diff -Naurp ../xeno-unstable.bk/xen/arch/x86/apic.c xen/arch/x86/apic.c
--- ../xeno-unstable.bk/xen/arch/x86/apic.c	2005-04-14 14:56:31.000000000 -0500
+++ xen/arch/x86/apic.c	2005-04-15 08:33:29.000000000 -0500
@@ -722,10 +722,19 @@ int reprogram_ac_timer(s_time_t timeout)
     return 1;
 }
 
+extern int nmi_profiling_started;
+extern int nmi_sanity_check(struct xen_regs * regs, int cpu);
+
 void smp_apic_timer_interrupt(struct xen_regs * regs)
 {
     ack_APIC_irq();
     perfc_incrc(apic_timer);
+    /*  we may lose NMI samples for several reasons.
+     * Here, we protect against that by doing an nmi sanity
+     * check every timer interrupt on every processor
+     */
+    if (nmi_profiling_started)
+        nmi_sanity_check(regs, smp_processor_id());
     raise_softirq(AC_TIMER_SOFTIRQ);
 }
 
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/Makefile xen/arch/x86/Makefile
--- ../xeno-unstable.bk/xen/arch/x86/Makefile	2005-04-14 14:56:31.000000000 -0500
+++ xen/arch/x86/Makefile	2005-04-15 08:33:52.000000000 -0500
@@ -15,7 +15,10 @@ ifneq ($(crash_debug),y)
 OBJS := $(patsubst cdb%.o,,$(OBJS))
 endif
 
+OBJS += oprofile/oprofile.o
+
 default: $(TARGET)
+	make -C oprofile
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
 	./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000
@@ -33,11 +36,15 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off
 boot/mkelf32: boot/mkelf32.c
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
+oprofile/oprofile.o:
+	$(MAKE) -C oprofile
+
 clean:
 	rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32
 	rm -f x86_32/*.o x86_32/*~ x86_32/core
 	rm -f x86_64/*.o x86_64/*~ x86_64/core
 	rm -f mtrr/*.o mtrr/*~ mtrr/core
+	rm -f oprofile/*.o
 
 delete-unfresh-files:
 	# nothing
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/microcode.c xen/arch/x86/microcode.c
--- ../xeno-unstable.bk/xen/arch/x86/microcode.c	2005-04-14 14:56:31.000000000 -0500
+++ xen/arch/x86/microcode.c	2005-04-15 08:33:55.000000000 -0500
@@ -87,13 +87,6 @@
 #define vmalloc(_s) xmalloc_bytes(_s)
 #define vfree(_p) xfree(_p)
 #define num_online_cpus() smp_num_cpus
-static inline int on_each_cpu(
-    void (*func) (void *info), void *info, int retry, int wait)
-{
-    int ret = smp_call_function(func, info, retry, wait);
-    func(info);
-    return ret;
-}
 
 #if 0
 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/nmi.c xen/arch/x86/nmi.c
--- ../xeno-unstable.bk/xen/arch/x86/nmi.c	2005-04-14 14:56:32.000000000 -0500
+++ xen/arch/x86/nmi.c	2005-04-15 08:33:57.000000000 -0500
@@ -5,6 +5,10 @@
  *
  *  Started by Ingo Molnar <mingo@xxxxxxxxxx>
  *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  *  Fixes:
  *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
  *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
@@ -34,6 +38,28 @@ unsigned int nmi_perfctr_msr;	/* the MSR
 
 extern int logical_proc_id[];
 
+/*
+ * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
+ * - it may be reserved by some other driver, or not
+ * - when not reserved by some other driver, it may be used for
+ *   the NMI watchdog, or not
+ *
+ * This is maintained separately from nmi_active because the NMI
+ * watchdog may also be driven from the I/O APIC timer.
+ */
+static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int lapic_nmi_owner;
+#define LAPIC_NMI_WATCHDOG      (1<<0)
+#define LAPIC_NMI_RESERVED      (1<<1)
+                                                                                                             
+/* nmi_active:
+ * +1: the lapic NMI watchdog is active, but can be disabled
+ *  0: the lapic NMI watchdog has not been set up, and cannot
+ *     be enabled
+ * -1: the lapic NMI watchdog is disabled, but can be enabled
+ */
+int nmi_active;
+
 #define K7_EVNTSEL_ENABLE	(1 << 22)
 #define K7_EVNTSEL_INT		(1 << 20)
 #define K7_EVNTSEL_OS		(1 << 17)
@@ -70,9 +96,9 @@ extern int logical_proc_id[];
  */
 #define MSR_P4_IQ_COUNTER0	0x30C
 #define MSR_P4_IQ_COUNTER1	0x30D
-#define MSR_P4_IQ_CCCR0		0x36C
-#define MSR_P4_IQ_CCCR1		0x36D
-#define MSR_P4_CRU_ESCR0	0x3B8 /* ESCR no. 4 */
+//#define MSR_P4_IQ_CCCR0		0x36C
+//#define MSR_P4_IQ_CCCR1		0x36D
+//#define MSR_P4_CRU_ESCR0	0x3B8 /* ESCR no. 4 */
 #define P4_NMI_CRU_ESCR0 \
     (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
      P4_ESCR_OS1|P4_ESCR_USR1)
@@ -128,6 +154,69 @@ static inline void nmi_pm_init(void) { }
  * Original code written by Keith Owens.
  */
 
+static void disable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active <= 0)
+                return;
+        switch (boot_cpu_data.x86_vendor) {
+        case X86_VENDOR_AMD:
+                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+                break;
+        case X86_VENDOR_INTEL:
+                switch (boot_cpu_data.x86) {
+                case 6:
+                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
+                        break;
+                case 15:
+			if (logical_proc_id[smp_processor_id()] == 0)
+			{
+                        	wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+	                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+			} else {
+				wrmsr(MSR_P4_IQ_CCCR1, 0, 0);	
+			}
+                        break;
+                }
+                break;
+        }
+        nmi_active = -1;
+        /* tell do_nmi() and others that we're not active any more */
+        nmi_watchdog = 0;
+}
+
+static void enable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active < 0) {
+                nmi_watchdog = NMI_LOCAL_APIC;
+                setup_apic_nmi_watchdog();
+        }
+}
+
+int reserve_lapic_nmi(void)
+{
+        unsigned int old_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        old_owner = lapic_nmi_owner;
+        lapic_nmi_owner |= LAPIC_NMI_RESERVED;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (old_owner & LAPIC_NMI_RESERVED)
+                return -EBUSY;
+        if (old_owner & LAPIC_NMI_WATCHDOG)
+                disable_lapic_nmi_watchdog();
+        return 0;
+}
+
+void release_lapic_nmi(void)
+{
+        unsigned int new_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
+        lapic_nmi_owner = new_owner;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (new_owner & LAPIC_NMI_WATCHDOG)
+                enable_lapic_nmi_watchdog();
+}
+
 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
 {
     unsigned int i;
@@ -252,6 +341,8 @@ void __pminit setup_apic_nmi_watchdog(vo
     default:
         return;
     }
+    lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
+    nmi_active = 1;
     nmi_pm_init();
 }
 
@@ -316,3 +407,7 @@ void nmi_watchdog_tick (struct xen_regs 
         }
     }
 }
+
+EXPORT_SYMBOL(reserve_lapic_nmi);
+EXPORT_SYMBOL(release_lapic_nmi);
+
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/Makefile xen/arch/x86/oprofile/Makefile
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/Makefile	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,34 @@
+
+include $(BASEDIR)/Rules.mk
+                                     
+
+# FLAGS is identical to CFLAGS except for -Werror                            
+FLAGS  := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
+FLAGS  += -iwithprefix include -Wall -pipe
+FLAGS  += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls
+
+ifeq ($(optimize),y)
+FLAGS  += -O3 -fomit-frame-pointer
+else
+x86_32/usercopy.o: CFLAGS += -O1
+endif
+                                                                                         
+# Prevent floating-point variables from creeping into Xen.
+FLAGS  += -msoft-float
+                                                                                         
+ifeq ($(TARGET_SUBARCH),x86_32)
+FLAGS  += -m32 -march=i686
+LDFLAGS := --oformat elf32-i386
+endif
+                                                                                         
+ifeq ($(TARGET_SUBARCH),x86_64)
+FLAGS  += -m64 -mno-red-zone -fpic -fno-reorder-blocks
+FLAGS  += -fno-asynchronous-unwind-tables
+endif
+
+default: $(OBJS) 
+	$(LD) $(LDFLAGS) -r -o oprofile.o $(OBJS)
+	
+%.o: %.c $(HDRS) Makefile
+	$(CC) $(FLAGS) -c $< -o $@
+
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/nmi_int.c xen/arch/x86/oprofile/nmi_int.c
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/nmi_int.c	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/nmi_int.c	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,430 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/slab.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE	(MAX_OPROF_DOMAINS/32 + 1)
+
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+
+extern struct domain * primary_profiler;
+extern struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_passive(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+
+int nmi_profiling_started = 0;
+
+int active_virq_count = 0;
+int passive_virq_count = 0;
+int other_virq_count = 0;
+int other_id = -1;
+int xen_count = 0;
+int dom_count = 0; 
+int ovf = 0;
+
+int nmi_callback(struct xen_regs * regs, int cpu)
+{
+	int xen_mode = 0;
+
+	ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+	xen_mode = RING_0(regs);
+	if (ovf) {
+		if (xen_mode)
+			xen_count++;
+		else
+			dom_count++;
+
+		if (is_active(current->domain)) {
+		/* This is lightly incorrect. If we do not deliver 
+			OVF virtual interrupts in a synchronous 
+			manner, a process switch may happen in the domain 
+			between the point the sample was collected and 
+			the point at which a VIRQ was delivered. However, 
+			it is not safe to call send_guest_virq from this 
+			NMI context, it may lead to a deadlock since NMIs are 
+			unmaskable. One optimization that we can do is 
+			that if the sample occurs while domain code is 
+			runnng, we know that it is safe to call 
+			send_guest_virq, since we know no Xen code 
+			is running at that time.
+			However, this may distort the sample distribution,
+			because we may lose more Xen mode samples.*/
+			active_virq_count++;
+			if (!xen_mode) {
+				send_guest_virq(current, VIRQ_PMC_OVF);
+				clear_bit(active_id(current->domain), &virq_ovf_pending[0]);
+			} else 
+				set_bit(active_id(current->domain), &virq_ovf_pending[0]);
+			primary_profiler->shared_info->active_samples++;
+		}
+		else if (is_passive(current->domain)) {
+			set_bit(active_id(primary_profiler), &virq_ovf_pending[0]);
+			passive_virq_count++;
+			primary_profiler->shared_info->passive_samples++;
+		}
+		else {
+			other_virq_count++;
+			other_id = current->domain->id;
+			primary_profiler->shared_info->other_samples++;
+		}
+	}
+	return 1;
+}
+
+static void free_msrs(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		xfree(cpu_msrs[i].counters);
+		cpu_msrs[i].counters = NULL;
+		xfree(cpu_msrs[i].controls);
+		cpu_msrs[i].controls = NULL;
+	}
+}
+ 
+static int allocate_msrs(void)
+{
+	int success = 1;
+
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		//if (!cpu_online(i))
+		if (!test_bit(i, &cpu_online_map))
+			continue;
+
+		cpu_msrs[i].counters = xmalloc(struct op_msr);
+		if (!cpu_msrs[i].counters) {
+			success = 0;
+			break;
+		}
+		cpu_msrs[i].controls = xmalloc(struct op_msr);
+		if (!cpu_msrs[i].controls) {
+			success = 0;
+			break;
+		}
+	}
+	if (!success)
+		free_msrs();
+
+	return success;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrs; ++i) {
+		rdmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrls; ++i) {
+		rdmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+}
+
+static void nmi_save_registers(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->fill_in_addresses(msrs);
+	nmi_cpu_save_registers(msrs);
+}
+
+int nmi_reserve_counters(void)
+{
+	if (!allocate_msrs())
+		return -ENOMEM;
+
+	/* We walk a thin line between law and rape here.
+	 * We need to be careful to install our NMI handler
+	 * without actually triggering any NMIs as this will
+	 * break the core code horrifically.
+	 */
+	/* Don't we need to do this on all CPUs?*/
+	if (reserve_lapic_nmi() < 0) {
+		free_msrs();
+		return -EBUSY;
+	}
+	/* We need to serialize save and setup for HT because the subset
+	 * of msrs are distinct for save and setup operations
+	 */
+	on_each_cpu(nmi_save_registers, NULL, 0, 1);
+	return 0;
+}
+
+static void nmi_cpu_setup(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->setup_ctrs(msrs);
+}
+
+int nmi_setup_events(void)
+{
+	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	return 0;
+}
+
+int nmi_enable_virq()
+{
+	set_nmi_callback(nmi_callback);
+	return 0;
+}
+
+static void nmi_cpu_start(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs const * msrs = &cpu_msrs[cpu];
+	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	model->start(msrs);
+}
+
+int nmi_start(void)
+{
+	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	nmi_profiling_started = 1;
+	return 0;
+}
+
+static void nmi_cpu_stop(void * dummy)
+{
+	unsigned int v;
+	int cpu = smp_processor_id();
+	struct op_msrs const * msrs = &cpu_msrs[cpu];
+	model->stop(msrs);
+
+	/* restoring APIC_LVTPC can trigger an apic error because the delivery
+	 * mode and vector nr combination can be illegal. That's by design: on
+	 * power on apic lvt contain a zero vector nr which are legal only for
+	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
+	 */
+	if (!(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+		|| (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)) {
+		printk("nmi_stop: APIC not good %p\n", apic_read(APIC_LVTPC));
+		mdelay(5000);
+	}
+	v = apic_read(APIC_LVTERR);
+	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+	apic_write(APIC_LVTERR, v);
+}
+ 
+void nmi_stop(void)
+{
+	nmi_profiling_started = 0;
+	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+	active_virq_count = 0;
+	passive_virq_count = 0;
+	other_virq_count = 0;
+	xen_count = 0;
+	dom_count = 0;
+}
+
+extern unsigned int read_ctr(struct op_msrs const * const msrs, int ctr);
+
+void nmi_sanity_check(struct xen_regs *regs, int cpu)
+{
+	int i;
+	int masked = 0;
+
+	/* We may have missed some NMI interrupts if we were already 
+		in an NMI context at that time. If this happens, then 
+		the counters are not reset and in the case of P4, the 
+		APIC LVT disable mask is set. In both cases we end up 
+		losing samples. On P4, this condition can be detected 
+		by checking the APIC LVT mask. But in P6, we need to 
+		examine the counters for overflow. So, every timer 
+		interrupt, we check that everything is OK */
+
+	if (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)
+		masked = 1;
+
+	nmi_callback(regs, cpu);
+
+	if (ovf && masked) {
+		if (is_active(current->domain))
+			current->domain->shared_info->nmi_restarts++;
+		else if (is_passive(current->domain))
+			primary_profiler->shared_info->nmi_restarts++;
+	}
+
+	/*if (jiffies %1000 == 0) {	
+		printk("cpu %d: sample count %d %d %d at %u\n", cpu, active_virq_count, passive_virq_count, other_virq_count, jiffies);
+		printk("other task id %d\n", other_id);
+		printk("%d in xen, %d in domain\n", xen_count, dom_count);
+		printk("counters %p %p\n", read_ctr(&cpu_msrs[cpu], 0), read_ctr(&cpu_msrs[cpu], 1));
+	}*/
+	
+
+	for (i = 0; i < adomains; i++)
+		if (test_and_clear_bit(i, &virq_ovf_pending[0])) {
+			send_guest_virq(adomain_ptrs[i], VIRQ_PMC_OVF);
+		}
+}
+
+void nmi_disable_virq(void)
+{
+	unset_nmi_callback();
+} 
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrls; ++i) {
+		wrmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrs; ++i) {
+		wrmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+}
+ 
+static void nmi_cpu_shutdown(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	nmi_restore_registers(msrs);
+}
+ 
+void nmi_release_counters(void)
+{
+	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	release_lapic_nmi();
+	free_msrs();
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 3)
+		return 0;
+
+#ifndef CONFIG_SMP
+	model = &op_p4_spec;
+	return 1;
+#else
+	//switch (smp_num_siblings) {
+	if (cpu_has_ht) 
+	{
+	  model = &op_p4_ht2_spec;
+	  return 1;
+	}
+	else
+	{
+	  model = &op_p4_spec;
+	  return 1;
+	}
+#endif
+	return 0;
+}
+
+
+static int __init ppro_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		return 0;
+
+	model = &op_ppro_spec;
+	return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary)
+{
+	__u8 vendor = current_cpu_data.x86_vendor;
+	__u8 family = current_cpu_data.x86;
+	int prim = 0;
+ 
+	if (!cpu_has_apic)
+		return -ENODEV;
+
+	if (primary_profiler == NULL) {
+		primary_profiler = current->domain;
+		prim = 1;
+	}
+
+	if (primary_profiler != current->domain)
+		goto out;
+
+	switch (vendor) {
+		case X86_VENDOR_INTEL:
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					if (!p4_init())
+						return -ENODEV;
+					break;
+				/* A P6-class processor */
+				case 6:
+					if (!ppro_init())
+						return -ENODEV;
+					break;
+				default:
+					return -ENODEV;
+			}
+			break;
+		default:
+			return -ENODEV;
+	}
+out:
+	if (copy_to_user((void *)num_events, (void *)&model->num_counters, sizeof(int)))
+		return -EFAULT;
+	if (copy_to_user((void *)is_primary, (void *)&prim, sizeof(int)))
+		return -EFAULT;
+
+	return 0;
+}
+
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/op_counter.h xen/arch/x86/oprofile/op_counter.h
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/op_counter.h	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/op_counter.h	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,33 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/op_model_p4.c xen/arch/x86/oprofile/op_model_p4.c
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/op_model_p4.c	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/op_model_p4.c	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,744 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+	if (cpu_has_ht)
+		num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+	return cpu_has_ht ? 2 : 1;
+#else
+	return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+	int virt_counter;
+	int counter_address;
+	int cccr_address;
+};
+
+struct p4_event_binding {
+	int escr_select;  /* value to put in CCCR */
+	int event_select; /* value to put in ESCR */
+	struct {
+		int virt_counter; /* for this counter... */
+		int escr_address; /* use this ESCR       */
+	} bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+	
+	{ /* BRANCH_RETIRED */
+		0x05, 0x06, 
+		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+	
+	{ /* MISPRED_BRANCH_RETIRED */
+		0x04, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+	
+	{ /* TC_DELIVER_MODE */
+		0x01, 0x01,
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
+	},
+	
+	{ /* BPU_FETCH_REQUEST */
+		0x00, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+	},
+
+	{ /* ITLB_REFERENCE */
+		0x03, 0x18,
+		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+	},
+
+	{ /* MEMORY_CANCEL */
+		0x05, 0x02,
+		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+	},
+
+	{ /* MEMORY_COMPLETE */
+		0x02, 0x08,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* LOAD_PORT_REPLAY */
+		0x02, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* STORE_PORT_REPLAY */
+		0x02, 0x05,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* MOB_LOAD_REPLAY */
+		0x02, 0x03,
+		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+	},
+
+	{ /* PAGE_WALK_TYPE */
+		0x04, 0x01,
+		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+	},
+
+	{ /* BSQ_CACHE_REFERENCE */
+		0x07, 0x0c, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+	},
+
+	{ /* IOQ_ALLOCATION */
+		0x06, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* IOQ_ACTIVE_ENTRIES */
+		0x06, 0x1a, 
+		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+		  { 0, 0 } }
+	},
+
+	{ /* FSB_DATA_ACTIVITY */
+		0x06, 0x17, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+
+	{ /* BSQ_ALLOCATION */
+		0x07, 0x05, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* BSQ_ACTIVE_ENTRIES */
+		0x07, 0x06,
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		  { 0, 0 } }
+	},
+
+	{ /* X87_ASSIST */
+		0x05, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* SSE_INPUT_ASSIST */
+		0x01, 0x34,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_SP_UOP */
+		0x01, 0x08, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_DP_UOP */
+		0x01, 0x0c, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_SP_UOP */
+		0x01, 0x0a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_DP_UOP */
+		0x01, 0x0e,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* 64BIT_MMX_UOP */
+		0x01, 0x02, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* 128BIT_MMX_UOP */
+		0x01, 0x1a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* X87_FP_UOP */
+		0x01, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* X87_SIMD_MOVES_UOP */
+		0x01, 0x2e, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* MACHINE_CLEAR */
+		0x05, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* GLOBAL_POWER_EVENTS */
+		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+  
+	{ /* TC_MS_XFER */
+		0x00, 0x05, 
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* UOP_QUEUE_WRITES */
+		0x00, 0x09,
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* FRONT_END_EVENT */
+		0x05, 0x08,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* EXECUTION_EVENT */
+		0x05, 0x0c,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* REPLAY_EVENT */
+		0x05, 0x09,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* INSTR_RETIRED */
+		0x04, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOPS_RETIRED */
+		0x04, 0x01,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOP_TYPE */    
+		0x02, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+	},
+
+	{ /* RETIRED_MISPRED_BRANCH_TYPE */
+		0x02, 0x05, 
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	},
+
+	{ /* RETIRED_BRANCH_TYPE */
+		0x02, 0x04,
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	}
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+	/*int cpu = smp_processor_id();
+	return (cpu != first_cpu(cpu_sibling_map[cpu]));*/
+	/* We want the two logical cpus of a physical cpu to use
+	disjoint set of counters. The following code is wrong. */
+	return 0;
+#endif	
+	return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+	unsigned int i; 
+	unsigned int addr, stag;
+
+	setup_num_counters();
+	stag = get_stagger();
+
+	/* the counter registers we pay attention to */
+	for (i = 0; i < num_counters; ++i) {
+		msrs->counters[i].addr = 
+			p4_counters[VIRT_CTR(stag, i)].counter_address;
+	}
+
+	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+	/* 18 CCCR registers */
+	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	/* 43 ESCR registers in three or four discontiguous group */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+
+	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+	 * to avoid special case in nmi_{save|restore}_registers() */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		for (addr = MSR_P4_BSU_ESCR0 + stag;
+		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	} else {
+		for (addr = MSR_P4_IQ_ESCR0 + stag;
+		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+
+	/* there are 2 remaining non-contiguously located ESCRs */
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		/* standard non-HT CPUs handle both remaining ESCRs*/
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else if (stag == 0) {
+		/* HT CPUs give the first remainder to the even thread, as
+		   the 32nd control register */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else {
+		/* and two copies of the second to the odd thread,
+		   for the 22st and 23nd control registers */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+	}
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+	int i;
+	int const maxbind = 2;
+	unsigned int cccr = 0;
+	unsigned int escr = 0;
+	unsigned int high = 0;
+	unsigned int counter_bit;
+	struct p4_event_binding *ev = NULL;
+	unsigned int stag;
+
+	stag = get_stagger();
+	
+	/* convert from counter *number* to counter *bit* */
+	counter_bit = 1 << VIRT_CTR(stag, ctr);
+	
+	/* find our event binding structure. */
+	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+		printk(KERN_ERR 
+		       "oprofile: P4 event code 0x%lx out of range\n", 
+		       counter_config[ctr].event);
+		return;
+	}
+	
+	ev = &(p4_events[counter_config[ctr].event - 1]);
+	
+	for (i = 0; i < maxbind; i++) {
+		if (ev->bindings[i].virt_counter & counter_bit) {
+
+			/* modify ESCR */
+			ESCR_READ(escr, high, ev, i);
+			ESCR_CLEAR(escr);
+			if (stag == 0) {
+				ESCR_SET_USR_0(escr, counter_config[ctr].user);
+				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+			} else {
+				ESCR_SET_USR_1(escr, counter_config[ctr].user);
+				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+			}
+			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_WRITE(escr, high, ev, i);
+		       
+			/* modify CCCR */
+			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			CCCR_CLEAR(cccr);
+			CCCR_SET_REQUIRED_BITS(cccr);
+			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+			if (stag == 0) {
+				CCCR_SET_PMI_OVF_0(cccr);
+			} else {
+				CCCR_SET_PMI_OVF_1(cccr);
+			}
+			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			return;
+		}
+	}
+
+	printk(KERN_ERR 
+	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+	       counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int i;
+	unsigned int low, high;
+	unsigned int addr;
+	unsigned int stag;
+
+	stag = get_stagger();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+	if (! MISC_PMC_ENABLED_P(low)) {
+		printk(KERN_ERR "oprofile: P4 PMC not available\n");
+		return;
+	}
+
+	/* clear the cccrs we will use */
+	for (i = 0 ; i < num_counters ; i++) {
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+	}
+
+	/* clear cccrs outside our concern */
+	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+		rdmsr(p4_unused_cccr[i], low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_unused_cccr[i], low, high);
+	}
+
+	/* clear all escrs (including those outside our concern) */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+
+	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
+	if (boot_cpu_data.x86_model < 0x3) {
+		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	} else if (stag == 0) {
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+	} else {
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	}		
+	
+	/* setup all counters */
+	for (i = 0 ; i < num_counters ; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+			pmc_setup_one_p4_counter(i);
+			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+
+extern void pmc_log_event(struct domain *d, unsigned int eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int p4_check_ctrs(unsigned int const cpu, 
+			  struct op_msrs const * const msrs,
+			  struct xen_regs * const regs)
+{
+	unsigned long ctr, low, high, stag, real;
+	int i, ovf = 0;
+	unsigned long eip = regs->eip;
+	int mode = 0;
+
+	if (RING_1(regs))
+		mode = 1;
+	else if (RING_0(regs))
+		mode = 2;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i]) 
+			continue;
+
+		/* 
+		 * there is some eccentricity in the hardware which
+		 * requires that we perform 2 extra corrections:
+		 *
+		 * - check both the CCCR:OVF flag for overflow and the
+		 *   counter high bit for un-flagged overflows.
+		 *
+		 * - write the counter back twice to ensure it gets
+		 *   updated properly.
+		 * 
+		 * the former seems to be related to extra NMIs happening
+		 * during the current NMI; the latter is reported as errata
+		 * N15 in intel doc 249199-029, pentium 4 specification
+		 * update, though their suggested work-around does not
+		 * appear to solve the problem.
+		 */
+		
+		real = VIRT_CTR(stag, i);
+
+		CCCR_READ(low, high, real);
+ 		CTR_READ(ctr, high, real);
+		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+			pmc_log_event(current, eip, mode, i);
+ 			CTR_WRITE(reset_value[i], real);
+			CCCR_CLEAR_OVF(low);
+			CCCR_WRITE(low, high, real);
+ 			CTR_WRITE(reset_value[i], real);
+			ovf = 1;
+		}
+	}
+
+	/* P4 quirk: you have to re-unmask the apic vector */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* See op_model_ppro.c */
+	return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i])
+			continue;
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_ENABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_DISABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+	.num_counters = NUM_COUNTERS_HT2,
+	.num_controls = NUM_CONTROLS_HT2,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+	.num_counters = NUM_COUNTERS_NON_HT,
+	.num_controls = NUM_CONTROLS_NON_HT,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/op_model_ppro.c xen/arch/x86/oprofile/op_model_ppro.c
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/op_model_ppro.c	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/op_model_ppro.c	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,166 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_P6_PERFCTR0;
+	msrs->counters[1].addr = MSR_P6_PERFCTR1;
+	
+	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+extern void pmc_log_event(struct domain *d, unsigned int eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int ppro_check_ctrs(unsigned int const cpu, 
+			    struct op_msrs const * const msrs,
+			    struct xen_regs * const regs)
+{
+	unsigned int low, high;
+	int i, ovf = 0;
+	unsigned long eip = regs->eip;
+	int mode = 0;
+
+	if (RING_1(regs)) 
+		mode = 1;
+	else if (RING_0(regs))
+		mode = 2;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			pmc_log_event(current, eip, mode, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+			ovf = 1;
+		}
+	}
+
+	/* Only P6 based Pentium M need to re-unmask the apic vector but it
+	 * doesn't hurt other P6 variant */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* We can't work out if we really handled an interrupt. We
+	 * might have caught a *second* counter just after overflowing
+	 * the interrupt for this counter then arrives
+	 * and we don't find a counter that's overflowed, so we
+	 * would return 0 and get dazed + confused. Instead we always
+	 * assume we found an overflow. This sucks.
+	 */
+	return ovf;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_ACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_INACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+unsigned int read_ctr(struct op_msrs const * const msrs, int i)
+{
+	unsigned int low, high;
+	CTR_READ(low, high, msrs, i);
+	return low;
+}
+
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop
+};
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/op_x86_model.h xen/arch/x86/oprofile/op_x86_model.h
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/op_x86_model.h	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/op_x86_model.h	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,55 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+	unsigned int high;
+	unsigned int low;
+};
+
+struct op_msr {
+	unsigned long addr;
+	struct op_saved_msr saved;
+};
+
+struct op_msrs {
+	struct op_msr * counters;
+	struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+	unsigned int const num_counters;
+	unsigned int const num_controls;
+	void (*fill_in_addresses)(struct op_msrs * const msrs);
+	void (*setup_ctrs)(struct op_msrs const * const msrs);
+	int (*check_ctrs)(unsigned int const cpu, 
+		struct op_msrs const * const msrs,
+		struct xen_regs * const regs);
+	void (*start)(struct op_msrs const * const msrs);
+	void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/oprofile/pmc.c xen/arch/x86/oprofile/pmc.c
--- ../xeno-unstable.bk/xen/arch/x86/oprofile/pmc.c	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/oprofile/pmc.c	2005-04-15 08:33:57.000000000 -0500
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon, email: xenoprof@xxxxxxxxxxxxx
+ */
+
+#include <xen/sched.h>
+
+#include "op_counter.h"
+
+int active_domains[MAX_OPROF_DOMAINS];
+int passive_domains[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int pdomains = 0;
+unsigned int activated = 0;
+
+#define VIRQ_BITMASK_SIZE	(MAX_OPROF_DOMAINS/32 + 1)
+
+struct domain * primary_profiler = NULL;
+struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+unsigned int virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+int is_active(struct domain *d) 
+{
+	int i;
+	for (i = 0; i < adomains; i++)
+		if (d->id == active_domains[i])
+			return 1;
+	return 0;
+}
+
+int active_id(struct domain *d)
+{
+	int i;
+	for (i = 0; i < adomains; i++)
+		if (d == adomain_ptrs[i])
+			return i;
+	return -1;
+}
+
+void free_adomain_ptrs() 
+{
+	int i;
+	int num = adomains;
+
+	adomains = 0;
+	for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+		virq_ovf_pending[i] = 0;
+
+	for (i = 0; i < num; i++) {
+		put_domain(adomain_ptrs[i]);
+		adomain_ptrs[i] = NULL;
+	}
+}
+
+int set_adomain_ptrs(int num)
+{
+	int i;
+	struct domain *d;
+
+	for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+		virq_ovf_pending[i] = 0;
+
+	for (i = 0; i < num; i++) {
+		d = find_domain_by_id(active_domains[i]);
+		if (!d) {
+			free_adomain_ptrs();
+			return -EFAULT;
+		}
+		adomain_ptrs[i] = d;
+		adomains++;
+	}
+	return 0;
+}
+
+int set_active(struct domain *d)
+{
+	if (is_active(d))
+		return 0;
+	/* hack if we run out of space */
+	if (adomains >= MAX_OPROF_DOMAINS) {
+		adomains--;
+		put_domain(adomain_ptrs[adomains]);
+	}
+	active_domains[adomains] = d->id;
+	if (get_domain(d))
+		adomain_ptrs[adomains++] = d;
+	else {
+		free_adomain_ptrs();
+		return -EFAULT;
+	}
+	return 0;
+}
+
+int is_passive(struct domain *d)
+{
+	int i;
+	for (i = 0; i < pdomains; i++)
+		if (d->id == passive_domains[i])
+			return 1;
+	return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+	if (is_active(d) || is_passive(d))
+		return 1;
+	return 0;
+}
+
+void pmc_log_event(struct domain *d, unsigned int eip, int mode, int event) 
+{
+	shared_info_t *s = NULL;
+	struct domain *dest = d;
+	int head = 0;
+
+	if (!is_profiled(d))
+		return;
+
+	if (is_passive(d)) {
+		dest = primary_profiler;
+		goto log_passive;
+	}
+		
+log_active:
+	s = dest->shared_info;
+
+	head = s->event_head;
+	if (head >= MAX_OPROF_EVENTS)
+		head = 0;
+
+	if (s->losing_samples)
+		s->samples_lost++;
+	if (head == s->event_tail - 1 || (head == MAX_OPROF_EVENTS - 1 && s->event_tail == 0))
+		s->losing_samples = 1;
+
+	s->event_log[head].eip = eip;
+	s->event_log[head].mode = mode;
+	s->event_log[head].event = event;
+	head++;
+	s->event_head = head;
+	return;
+
+log_passive:
+	/* We use the following inefficient format for logging events from other
+		domains. We put a special record indicating that the next record is
+		for another domain. This is done for each sample from another
+		domain */ 
+	s = dest->shared_info;
+
+	head = s->event_head;
+	if (head >= MAX_OPROF_EVENTS)
+		head = 0;
+
+	if (s->losing_samples)
+		s->samples_lost++;
+	if (head == s->event_tail - 1 || (head == MAX_OPROF_EVENTS - 1 && s->event_tail == 0))
+		s->losing_samples = 1;
+
+	s->event_log[head].eip = ~1UL;
+	s->event_log[head].mode = ~0;
+	s->event_log[head].event = d->id;
+	head++;
+	s->event_head = head;
+	goto log_active;
+}
+
+static void pmc_event_init(struct domain *d)
+{
+	shared_info_t *s = d->shared_info;
+	s->event_head = 0;
+	s->event_tail = 0;
+	s->losing_samples = 0;
+	s->samples_lost = 0;
+	s->nmi_restarts = 0;
+	s->active_samples = 0;
+	s->passive_samples = 0;
+	s->other_samples = 0;
+}
+
+extern int nmi_init(int *num_events, int *is_primary);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+#define PRIV_OP(op)	((op == PMC_SET_ACTIVE) || (op == PMC_SET_PASSIVE) || (op == PMC_RESERVE_COUNTERS) \
+			|| (op == PMC_SETUP_EVENTS) || (op == PMC_START) || (op == PMC_STOP) \
+			|| (op == PMC_RELEASE_COUNTERS) || (op == PMC_SHUTDOWN))
+
+int do_pmc_op(int op, unsigned int arg1, unsigned int arg2)
+{
+	int ret = 0;
+
+	if (PRIV_OP(op) && current->domain != primary_profiler)
+		return -EPERM;
+
+	switch (op) {
+		case PMC_INIT:
+			ret = nmi_init((int *)arg1, (int *)arg2);
+			break;
+
+		case PMC_SET_ACTIVE:
+			if (adomains != 0)
+				return -EPERM;
+			if (copy_from_user((void *)&active_domains,
+				(void *)arg1, arg2*sizeof(int)))
+				return -EFAULT;
+			if (set_adomain_ptrs(arg2))
+				return -EFAULT;
+			if (set_active(current->domain))
+				return -EFAULT;
+			break;
+
+		case PMC_SET_PASSIVE:
+			if (pdomains != 0)
+				return -EPERM;
+			if (copy_from_user((void *)&passive_domains,
+				(void *)arg1, arg2*sizeof(int)))
+				return -EFAULT;
+			pdomains = arg2;
+			break;
+
+		case PMC_RESERVE_COUNTERS:
+			ret = nmi_reserve_counters();
+			break;
+
+		case PMC_SETUP_EVENTS:
+			if (copy_from_user((void *)&counter_config, 
+				(void *)arg1, arg2*sizeof(struct op_counter_config)))
+				return -EFAULT;
+			ret = nmi_setup_events();
+			break;
+
+		case PMC_ENABLE_VIRQ:
+			if (!is_active(current->domain)) {
+				if (current->domain != primary_profiler)
+					return -EPERM;
+				else
+					set_active(current->domain);
+			}
+			ret = nmi_enable_virq();
+			pmc_event_init(current->domain);
+			activated++;
+			break;
+
+		case PMC_START:
+			if (activated < adomains)
+				return -EPERM;
+			ret = nmi_start();
+			break;
+
+		case PMC_STOP:
+			nmi_stop();
+			break;
+
+		case PMC_DISABLE_VIRQ:
+			if (!is_active(current->domain))
+				return -EPERM;
+			nmi_disable_virq();
+			activated--;
+			break;
+
+		case PMC_RELEASE_COUNTERS:
+			nmi_release_counters();
+			break;
+
+		case PMC_SHUTDOWN:
+			free_adomain_ptrs();
+			pdomains = 0;
+			activated = 0;
+			primary_profiler = NULL;
+			break;
+
+		default:
+			ret = -EINVAL;
+	}
+	return ret;
+}
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/traps.c xen/arch/x86/traps.c
--- ../xeno-unstable.bk/xen/arch/x86/traps.c	2005-04-15 08:27:45.000000000 -0500
+++ xen/arch/x86/traps.c	2005-04-15 08:33:59.000000000 -0500
@@ -2,6 +2,10 @@
  * arch/x86/traps.c
  * 
  * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -51,6 +55,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/debugger.h>
+#include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/x86_emulate.h>
 
@@ -898,13 +903,12 @@ static void unknown_nmi_error(unsigned c
     printk("Do you have a strange power saving mode enabled?\n");
 }
 
-asmlinkage void do_nmi(struct xen_regs *regs, unsigned long reason)
+static void default_do_nmi(struct xen_regs * regs, unsigned long reason)
 {
     ++nmi_count(smp_processor_id());
 
     if ( nmi_watchdog )
         nmi_watchdog_tick(regs);
-
     if ( reason & 0x80 )
         mem_parity_error(regs);
     else if ( reason & 0x40 )
@@ -913,6 +917,36 @@ asmlinkage void do_nmi(struct xen_regs *
         unknown_nmi_error((unsigned char)(reason&0xff));
 }
 
+static int dummy_nmi_callback(struct xen_regs * regs, int cpu)
+{
+        return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+                                                                                                              
+asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
+{
+        int cpu;
+        cpu = smp_processor_id();
+                                                                                                              
+        if (!nmi_callback(regs, cpu))
+                default_do_nmi(regs, reason);
+}
+                                                                                                              
+void set_nmi_callback(nmi_callback_t callback)
+{
+        nmi_callback = callback;
+}
+                                                                                                              
+void unset_nmi_callback(void)
+{
+        nmi_callback = dummy_nmi_callback;
+}
+
+EXPORT_SYMBOL(set_nmi_callback);
+EXPORT_SYMBOL(unset_nmi_callback);
+
+
 asmlinkage int math_state_restore(struct xen_regs *regs)
 {
     /* Prevent recursion. */
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/traps.c.orig xen/arch/x86/traps.c.orig
--- ../xeno-unstable.bk/xen/arch/x86/traps.c.orig	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/traps.c.orig	2005-04-15 08:33:16.000000000 -0500
@@ -0,0 +1,1194 @@
+/******************************************************************************
+ * arch/x86/traps.c
+ * 
+ * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/sched.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/mm.h>
+#include <xen/console.h>
+#include <asm/regs.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/spinlock.h>
+#include <xen/irq.h>
+#include <xen/perfc.h>
+#include <xen/softirq.h>
+#include <asm/shadow.h>
+#include <asm/domain_page.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <asm/debugreg.h>
+#include <asm/smp.h>
+#include <asm/flushtlb.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include <asm/debugger.h>
+#include <asm/msr.h>
+#include <asm/x86_emulate.h>
+
+/*
+ * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
+ *  fatal:  Xen prints diagnostic message and then hangs.
+ *  dom0:   The NMI is virtualised to DOM0.
+ *  ignore: The NMI error is cleared and ignored.
+ */
+#ifdef NDEBUG
+char opt_nmi[10] = "dom0";
+#else
+char opt_nmi[10] = "fatal";
+#endif
+string_param("nmi", opt_nmi);
+
+/* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
+idt_entry_t idt_table[IDT_ENTRIES];
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+/*
+ * This is called for faults at very unexpected times (e.g., when interrupts
+ * are disabled). In such situations we can't do much that is safe. We try to
+ * print out some tracing and then we just spin.
+ */
+asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
+{
+    int cpu = smp_processor_id();
+    unsigned long cr2;
+    static char *trapstr[] = { 
+        "divide error", "debug", "nmi", "bkpt", "overflow", "bounds", 
+        "invalid operation", "device not available", "double fault", 
+        "coprocessor segment", "invalid tss", "segment not found", 
+        "stack error", "general protection fault", "page fault", 
+        "spurious interrupt", "coprocessor error", "alignment check", 
+        "machine check", "simd error"
+    };
+
+    watchdog_on = 0;
+
+    show_registers(regs);
+
+    if ( trapnr == TRAP_page_fault )
+    {
+        __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
+        printk("Faulting linear address might be %p\n", cr2);
+    }
+
+    printk("************************************\n");
+    printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
+           cpu, trapnr, trapstr[trapnr], regs->error_code,
+           (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
+    printk("System shutting down -- need manual reset.\n");
+    printk("************************************\n");
+
+    (void)debugger_trap_fatal(trapnr, regs);
+
+    /* Lock up the console to prevent spurious output from other CPUs. */
+    console_force_lock();
+
+    /* Wait for manual reset. */
+    for ( ; ; )
+        __asm__ __volatile__ ( "hlt" );
+}
+
+static inline int do_trap(int trapnr, char *str,
+                          struct xen_regs *regs, 
+                          int use_error_code)
+{
+    struct exec_domain *ed = current;
+    struct trap_bounce *tb = &ed->arch.trap_bounce;
+    trap_info_t *ti;
+    unsigned long fixup;
+
+    DEBUGGER_trap_entry(trapnr, regs);
+
+    if ( !GUEST_MODE(regs) )
+        goto xen_fault;
+
+#ifndef NDEBUG
+    if ( (ed->arch.traps[trapnr].address == 0) && (ed->domain->id == 0) )
+        goto xen_fault;
+#endif
+
+    ti = current->arch.traps + trapnr;
+    tb->flags = TBF_EXCEPTION;
+    tb->cs    = ti->cs;
+    tb->eip   = ti->address;
+    if ( use_error_code )
+    {
+        tb->flags |= TBF_EXCEPTION_ERRCODE;
+        tb->error_code = regs->error_code;
+    }
+    if ( TI_GET_IF(ti) )
+        ed->vcpu_info->evtchn_upcall_mask = 1;
+    return 0;
+
+ xen_fault:
+
+    if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+    {
+        DPRINTK("Trap %d: %p -> %p\n", trapnr, regs->eip, fixup);
+        regs->eip = fixup;
+        return 0;
+    }
+
+    DEBUGGER_trap_fatal(trapnr, regs);
+
+    show_registers(regs);
+    panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
+          "[error_code=%04x]\n",
+          smp_processor_id(), trapnr, str, regs->error_code);
+    return 0;
+}
+
+#define DO_ERROR_NOCODE(trapnr, str, name) \
+asmlinkage int do_##name(struct xen_regs *regs) \
+{ \
+    return do_trap(trapnr, str, regs, 0); \
+}
+
+#define DO_ERROR(trapnr, str, name) \
+asmlinkage int do_##name(struct xen_regs *regs) \
+{ \
+    return do_trap(trapnr, str, regs, 1); \
+}
+
+DO_ERROR_NOCODE( 0, "divide error", divide_error)
+DO_ERROR_NOCODE( 4, "overflow", overflow)
+DO_ERROR_NOCODE( 5, "bounds", bounds)
+DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
+DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, "invalid TSS", invalid_TSS)
+DO_ERROR(11, "segment not present", segment_not_present)
+DO_ERROR(12, "stack segment", stack_segment)
+DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
+DO_ERROR(17, "alignment check", alignment_check)
+DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
+
+asmlinkage int do_int3(struct xen_regs *regs)
+{
+    struct exec_domain *ed = current;
+    struct trap_bounce *tb = &ed->arch.trap_bounce;
+    trap_info_t *ti;
+
+    DEBUGGER_trap_entry(TRAP_int3, regs);
+
+    if ( !GUEST_MODE(regs) )
+    {
+        DEBUGGER_trap_fatal(TRAP_int3, regs);
+        show_registers(regs);
+        panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
+    } 
+
+    ti = current->arch.traps + 3;
+    tb->flags = TBF_EXCEPTION;
+    tb->cs    = ti->cs;
+    tb->eip   = ti->address;
+    if ( TI_GET_IF(ti) )
+        ed->vcpu_info->evtchn_upcall_mask = 1;
+
+    return 0;
+}
+
+asmlinkage void do_machine_check(struct xen_regs *regs)
+{
+    fatal_trap(TRAP_machine_check, regs);
+}
+
+void propagate_page_fault(unsigned long addr, u16 error_code)
+{
+    trap_info_t *ti;
+    struct exec_domain *ed = current;
+    struct trap_bounce *tb = &ed->arch.trap_bounce;
+
+    ti = ed->arch.traps + 14;
+    tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
+    tb->cr2        = addr;
+    tb->error_code = error_code;
+    tb->cs         = ti->cs;
+    tb->eip        = ti->address;
+    if ( TI_GET_IF(ti) )
+        ed->vcpu_info->evtchn_upcall_mask = 1;
+
+    ed->arch.guest_cr2 = addr;
+}
+
+asmlinkage int do_page_fault(struct xen_regs *regs)
+{
+    unsigned long off, addr, fixup;
+    struct exec_domain *ed = current;
+    struct domain *d = ed->domain;
+    int ret;
+
+    __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
+
+    DEBUGGER_trap_entry(TRAP_page_fault, regs);
+
+    //printk("do_page_fault(eip=%p, va=%p, code=%d)\n", regs->eip, addr, regs->error_code);
+
+    perfc_incrc(page_faults);
+
+    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+    {
+        LOCK_BIGLOCK(d);
+        if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
+             unlikely((addr >> L2_PAGETABLE_SHIFT) ==
+                      d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
+        {
+            ptwr_flush(d, PTWR_PT_ACTIVE);
+            UNLOCK_BIGLOCK(d);
+            return EXCRET_fault_fixed;
+        }
+
+        if ( (addr < PAGE_OFFSET) &&
+             ((regs->error_code & 3) == 3) && /* write-protection fault */
+             ptwr_do_page_fault(d, addr) )
+        {
+            if ( unlikely(shadow_mode_enabled(d)) )
+                (void)shadow_fault(addr, regs);
+            UNLOCK_BIGLOCK(d);
+            return EXCRET_fault_fixed;
+        }
+        UNLOCK_BIGLOCK(d);
+    }
+
+    if ( unlikely(shadow_mode_enabled(d)) &&
+         ((addr < HYPERVISOR_VIRT_START) ||
+          (shadow_mode_external(d) && GUEST_CONTEXT(ed, regs))) &&
+         shadow_fault(addr, regs) )
+    {
+        return EXCRET_fault_fixed;
+    }
+
+    if ( unlikely(addr >= LDT_VIRT_START(ed)) && 
+         (addr < (LDT_VIRT_START(ed) + (ed->arch.ldt_ents*LDT_ENTRY_SIZE))) )
+    {
+        /*
+         * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
+         * send the fault up to the guest OS to be handled.
+         */
+        extern int map_ldt_shadow_page(unsigned int);
+        LOCK_BIGLOCK(d);
+        off  = addr - LDT_VIRT_START(ed);
+        addr = ed->arch.ldt_base + off;
+        ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
+        UNLOCK_BIGLOCK(d);
+        if ( likely(ret) )
+            return EXCRET_fault_fixed; /* successfully copied the mapping */
+    }
+
+    if ( !GUEST_MODE(regs) )
+        goto xen_fault;
+
+#ifndef NDEBUG
+    if ( (ed->arch.traps[TRAP_page_fault].address == 0) && (d->id == 0) )
+        goto xen_fault;
+#endif
+
+    propagate_page_fault(addr, regs->error_code);
+    return 0; 
+
+ xen_fault:
+
+    if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+    {
+        perfc_incrc(copy_user_faults);
+        if ( !shadow_mode_enabled(d) )
+            DPRINTK("Page fault: %p -> %p\n", regs->eip, fixup);
+        regs->eip = fixup;
+        return 0;
+    }
+
+    DEBUGGER_trap_fatal(TRAP_page_fault, regs);
+
+    show_registers(regs);
+    show_page_walk(addr);
+    panic("CPU%d FATAL PAGE FAULT\n"
+          "[error_code=%04x]\n"
+          "Faulting linear address might be %p\n",
+          smp_processor_id(), regs->error_code, addr);
+    return 0;
+}
+
+long do_fpu_taskswitch(int set)
+{
+    struct exec_domain *ed = current;
+
+    if ( set )
+    {
+        set_bit(EDF_GUEST_STTS, &ed->ed_flags);
+        stts();
+    }
+    else
+    {
+        clear_bit(EDF_GUEST_STTS, &ed->ed_flags);
+        if ( test_bit(EDF_USEDFPU, &ed->ed_flags) )
+            clts();
+    }
+
+    return 0;
+}
+
+/* Has the guest requested sufficient permission for this I/O access? */
+static inline int guest_io_okay(
+    unsigned int port, unsigned int bytes,
+    struct exec_domain *ed, struct xen_regs *regs)
+{
+    u16 x;
+#if defined(__x86_64__)
+    /* If in user mode, switch to kernel mode just to read I/O bitmap. */
+    extern void toggle_guest_mode(struct exec_domain *);
+    int user_mode = !(ed->arch.flags & TF_kernel_mode);
+#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(ed)
+#elif defined(__i386__)
+#define TOGGLE_MODE() ((void)0)
+#endif
+
+    if ( ed->arch.iopl >= (KERNEL_MODE(ed, regs) ? 1 : 3) )
+        return 1;
+
+    if ( ed->arch.iobmp_limit > (port + bytes) )
+    {
+        TOGGLE_MODE();
+        __get_user(x, (u16 *)(ed->arch.iobmp+(port>>3)));
+        TOGGLE_MODE();
+        if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
+            return 1;
+    }
+
+    return 0;
+}
+
+/* Has the administrator granted sufficient permission for this I/O access? */
+static inline int admin_io_okay(
+    unsigned int port, unsigned int bytes,
+    struct exec_domain *ed, struct xen_regs *regs)
+{
+    struct domain *d = ed->domain;
+    u16 x;
+
+    if ( d->arch.iobmp_mask != NULL )
+    {
+        x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
+        if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
+            return 1;
+    }
+
+    return 0;
+}
+
+/* Check admin limits. Silently fail the access if it is disallowed. */
+#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
+#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
+#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
+#define outb_user(_v, _p, _d, _r) \
+    (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
+#define outw_user(_v, _p, _d, _r) \
+    (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
+#define outl_user(_v, _p, _d, _r) \
+    (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
+
+#define insn_fetch(_type, _size, _ptr)          \
+({  unsigned long _x;                           \
+    if ( get_user(_x, (_type *)eip) )           \
+        goto read_fault;                        \
+    eip += _size; (_type)_x; })
+
+static int emulate_privileged_op(struct xen_regs *regs)
+{
+    struct exec_domain *ed = current;
+    unsigned long *reg, eip = regs->eip;
+    u8 opcode, modrm_reg = 0, rep_prefix = 0;
+    unsigned int port, i, op_bytes = 4, data;
+
+    /* Legacy prefixes. */
+    for ( i = 0; i < 8; i++ )
+    {
+        switch ( opcode = insn_fetch(u8, 1, eip) )
+        {
+        case 0x66: /* operand-size override */
+            op_bytes ^= 6; /* switch between 2/4 bytes */
+            break;
+        case 0x67: /* address-size override */
+        case 0x2e: /* CS override */
+        case 0x3e: /* DS override */
+        case 0x26: /* ES override */
+        case 0x64: /* FS override */
+        case 0x65: /* GS override */
+        case 0x36: /* SS override */
+        case 0xf0: /* LOCK */
+        case 0xf2: /* REPNE/REPNZ */
+            break;
+        case 0xf3: /* REP/REPE/REPZ */
+            rep_prefix = 1;
+            break;
+        default:
+            goto done_prefixes;
+        }
+    }
+ done_prefixes:
+
+#ifdef __x86_64__
+    /* REX prefix. */
+    if ( (opcode & 0xf0) == 0x40 )
+    {
+        modrm_reg = (opcode & 4) << 1;  /* REX.R */
+        /* REX.W, REX.B and REX.X do not need to be decoded. */
+        opcode = insn_fetch(u8, 1, eip);
+    }
+#endif
+    
+    /* Input/Output String instructions. */
+    if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
+    {
+        if ( rep_prefix && (regs->ecx == 0) )
+            goto done;
+
+    continue_io_string:
+        switch ( opcode )
+        {
+        case 0x6c: /* INSB */
+            op_bytes = 1;
+        case 0x6d: /* INSW/INSL */
+            if ( !guest_io_okay((u16)regs->edx, op_bytes, ed, regs) )
+                goto fail;
+            switch ( op_bytes )
+            {
+            case 1:
+                data = (u8)inb_user((u16)regs->edx, ed, regs);
+                if ( put_user((u8)data, (u8 *)regs->edi) )
+                    goto write_fault;
+                break;
+            case 2:
+                data = (u16)inw_user((u16)regs->edx, ed, regs);
+                if ( put_user((u16)data, (u16 *)regs->edi) )
+                    goto write_fault;
+                break;
+            case 4:
+                data = (u32)inl_user((u16)regs->edx, ed, regs);
+                if ( put_user((u32)data, (u32 *)regs->edi) )
+                    goto write_fault;
+                break;
+            }
+            regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            break;
+
+        case 0x6e: /* OUTSB */
+            op_bytes = 1;
+        case 0x6f: /* OUTSW/OUTSL */
+            if ( !guest_io_okay((u16)regs->edx, op_bytes, ed, regs) )
+                goto fail;
+            switch ( op_bytes )
+            {
+            case 1:
+                if ( get_user(data, (u8 *)regs->esi) )
+                    goto read_fault;
+                outb_user((u8)data, (u16)regs->edx, ed, regs);
+                break;
+            case 2:
+                if ( get_user(data, (u16 *)regs->esi) )
+                    goto read_fault;
+                outw_user((u16)data, (u16)regs->edx, ed, regs);
+                break;
+            case 4:
+                if ( get_user(data, (u32 *)regs->esi) )
+                    goto read_fault;
+                outl_user((u32)data, (u16)regs->edx, ed, regs);
+                break;
+            }
+            regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            break;
+        }
+
+        if ( rep_prefix && (--regs->ecx != 0) )
+        {
+            if ( !hypercall_preempt_check() )
+                goto continue_io_string;
+            eip = regs->eip;
+        }
+
+        goto done;
+    }
+
+    /* I/O Port and Interrupt Flag instructions. */
+    switch ( opcode )
+    {
+    case 0xe4: /* IN imm8,%al */
+        op_bytes = 1;
+    case 0xe5: /* IN imm8,%eax */
+        port = insn_fetch(u8, 1, eip);
+    exec_in:
+        if ( !guest_io_okay(port, op_bytes, ed, regs) )
+            goto fail;
+        switch ( op_bytes )
+        {
+        case 1:
+            regs->eax &= ~0xffUL;
+            regs->eax |= (u8)inb_user(port, ed, regs);
+            break;
+        case 2:
+            regs->eax &= ~0xffffUL;
+            regs->eax |= (u16)inw_user(port, ed, regs);
+            break;
+        case 4:
+            regs->eax = (u32)inl_user(port, ed, regs);
+            break;
+        }
+        goto done;
+
+    case 0xec: /* IN %dx,%al */
+        op_bytes = 1;
+    case 0xed: /* IN %dx,%eax */
+        port = (u16)regs->edx;
+        goto exec_in;
+
+    case 0xe6: /* OUT %al,imm8 */
+        op_bytes = 1;
+    case 0xe7: /* OUT %eax,imm8 */
+        port = insn_fetch(u8, 1, eip);
+    exec_out:
+        if ( !guest_io_okay(port, op_bytes, ed, regs) )
+            goto fail;
+        switch ( op_bytes )
+        {
+        case 1:
+            outb_user((u8)regs->eax, port, ed, regs);
+            break;
+        case 2:
+            outw_user((u16)regs->eax, port, ed, regs);
+            break;
+        case 4:
+            outl_user((u32)regs->eax, port, ed, regs);
+            break;
+        }
+        goto done;
+
+    case 0xee: /* OUT %al,%dx */
+        op_bytes = 1;
+    case 0xef: /* OUT %eax,%dx */
+        port = (u16)regs->edx;
+        goto exec_out;
+
+    case 0xfa: /* CLI */
+    case 0xfb: /* STI */
+        if ( ed->arch.iopl < (KERNEL_MODE(ed, regs) ? 1 : 3) )
+            goto fail;
+        /*
+         * This is just too dangerous to allow, in my opinion. Consider if the
+         * caller then tries to reenable interrupts using POPF: we can't trap
+         * that and we'll end up with hard-to-debug lockups. Fast & loose will
+         * do for us. :-)
+         */
+        /*ed->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
+        goto done;
+
+    case 0x0f: /* Two-byte opcode */
+        break;
+
+    default:
+        goto fail;
+    }
+
+    /* Remaining instructions only emulated from guest kernel. */
+    if ( !KERNEL_MODE(ed, regs) )
+        goto fail;
+
+    /* Privileged (ring 0) instructions. */
+    opcode = insn_fetch(u8, 1, eip);
+    switch ( opcode )
+    {
+    case 0x06: /* CLTS */
+        (void)do_fpu_taskswitch(0);
+        break;
+
+    case 0x09: /* WBINVD */
+        /* Ignore the instruction if unprivileged. */
+        if ( !IS_CAPABLE_PHYSDEV(ed->domain) )
+            DPRINTK("Non-physdev domain attempted WBINVD.\n");
+        else
+            wbinvd();
+        break;
+
+    case 0x20: /* MOV CR?,<reg> */
+        opcode = insn_fetch(u8, 1, eip);
+        if ( (opcode & 0xc0) != 0xc0 )
+            goto fail;
+        modrm_reg |= opcode & 7;
+        reg = decode_register(modrm_reg, regs, 0);
+        switch ( (opcode >> 3) & 7 )
+        {
+        case 0: /* Read CR0 */
+            *reg = 
+                (read_cr0() & ~X86_CR0_TS) | 
+                (test_bit(EDF_GUEST_STTS, &ed->ed_flags) ? X86_CR0_TS : 0);
+            break;
+
+        case 2: /* Read CR2 */
+            *reg = ed->arch.guest_cr2;
+            break;
+            
+        case 3: /* Read CR3 */
+            *reg = pagetable_val(ed->arch.guest_table);
+            break;
+
+        default:
+            goto fail;
+        }
+        break;
+
+    case 0x22: /* MOV <reg>,CR? */
+        opcode = insn_fetch(u8, 1, eip);
+        if ( (opcode & 0xc0) != 0xc0 )
+            goto fail;
+        modrm_reg |= opcode & 7;
+        reg = decode_register(modrm_reg, regs, 0);
+        switch ( (opcode >> 3) & 7 )
+        {
+        case 0: /* Write CR0 */
+            (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
+            break;
+
+        case 2: /* Write CR2 */
+            ed->arch.guest_cr2 = *reg;
+            break;
+            
+        case 3: /* Write CR3 */
+            LOCK_BIGLOCK(ed->domain);
+            (void)new_guest_cr3(*reg);
+            UNLOCK_BIGLOCK(ed->domain);
+            break;
+
+        default:
+            goto fail;
+        }
+        break;
+
+    case 0x30: /* WRMSR */
+        /* Ignore the instruction if unprivileged. */
+        if ( !IS_PRIV(ed->domain) )
+            DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
+                    regs->ecx, (long)regs->eax, (long)regs->edx);
+        else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
+            goto fail;
+        break;
+
+    case 0x32: /* RDMSR */
+        if ( !IS_PRIV(ed->domain) )
+            DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
+                    regs->ecx, (long)regs->eax, (long)regs->edx);
+        /* Everyone can read the MSR space. */
+        if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
+            goto fail;
+        break;
+
+    default:
+        goto fail;
+    }
+
+ done:
+    regs->eip = eip;
+    return EXCRET_fault_fixed;
+
+ fail:
+    return 0;
+
+ read_fault:
+    propagate_page_fault(eip, 4); /* user mode, read fault */
+    return EXCRET_fault_fixed;
+
+ write_fault:
+    propagate_page_fault(eip, 6); /* user mode, write fault */
+    return EXCRET_fault_fixed;
+}
+
+asmlinkage int do_general_protection(struct xen_regs *regs)
+{
+    struct exec_domain *ed = current;
+    struct trap_bounce *tb = &ed->arch.trap_bounce;
+    trap_info_t *ti;
+    unsigned long fixup;
+
+    DEBUGGER_trap_entry(TRAP_gp_fault, regs);
+
+    if ( regs->error_code & 1 )
+        goto hardware_gp;
+
+    if ( !GUEST_MODE(regs) )
+        goto gp_in_kernel;
+
+    /*
+     * Cunning trick to allow arbitrary "INT n" handling.
+     * 
+     * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
+     * instruction from trapping to the appropriate vector, when that might not
+     * be expected by Xen or the guest OS. For example, that entry might be for
+     * a fault handler (unlike traps, faults don't increment EIP), or might
+     * expect an error code on the stack (which a software trap never
+     * provides), or might be a hardware interrupt handler that doesn't like
+     * being called spuriously.
+     * 
+     * Instead, a GPF occurs with the faulting IDT vector in the error code.
+     * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is 
+     * clear to indicate that it's a software fault, not hardware.
+     * 
+     * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
+     * okay because they can only be triggered by an explicit DPL-checked
+     * instruction. The DPL specified by the guest OS for these vectors is NOT
+     * CHECKED!!
+     */
+    if ( (regs->error_code & 3) == 2 )
+    {
+        /* This fault must be due to <INT n> instruction. */
+        ti = current->arch.traps + (regs->error_code>>3);
+        if ( PERMIT_SOFTINT(TI_GET_DPL(ti), ed, regs) )
+        {
+            tb->flags = TBF_EXCEPTION;
+            regs->eip += 2;
+            goto finish_propagation;
+        }
+    }
+
+    /* Emulate some simple privileged and I/O instructions. */
+    if ( (regs->error_code == 0) &&
+         emulate_privileged_op(regs) )
+        return 0;
+
+#if defined(__i386__)
+    if ( VM_ASSIST(ed->domain, VMASST_TYPE_4gb_segments) && 
+         (regs->error_code == 0) && 
+         gpf_emulate_4gb(regs) )
+        return 0;
+#endif
+
+#ifndef NDEBUG
+    if ( (ed->arch.traps[TRAP_gp_fault].address == 0) &&
+         (ed->domain->id == 0) )
+        goto gp_in_kernel;
+#endif
+
+    /* Pass on GPF as is. */
+    ti = current->arch.traps + 13;
+    tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
+    tb->error_code = regs->error_code;
+ finish_propagation:
+    tb->cs         = ti->cs;
+    tb->eip        = ti->address;
+    if ( TI_GET_IF(ti) )
+        ed->vcpu_info->evtchn_upcall_mask = 1;
+    return 0;
+
+ gp_in_kernel:
+
+    if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+    {
+        DPRINTK("GPF (%04x): %p -> %p\n",
+                regs->error_code, regs->eip, fixup);
+        regs->eip = fixup;
+        return 0;
+    }
+
+    DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
+
+ hardware_gp:
+    show_registers(regs);
+    panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
+          smp_processor_id(), regs->error_code);
+    return 0;
+}
+
+unsigned long nmi_softirq_reason;
+static void nmi_softirq(void)
+{
+    if ( dom0 == NULL )
+        return;
+
+    if ( test_and_clear_bit(0, &nmi_softirq_reason) )
+        send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
+
+    if ( test_and_clear_bit(1, &nmi_softirq_reason) )
+        send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
+}
+
+asmlinkage void mem_parity_error(struct xen_regs *regs)
+{
+    /* Clear and disable the parity-error line. */
+    outb((inb(0x61)&15)|4,0x61);
+
+    switch ( opt_nmi[0] )
+    {
+    case 'd': /* 'dom0' */
+        set_bit(0, &nmi_softirq_reason);
+        raise_softirq(NMI_SOFTIRQ);
+    case 'i': /* 'ignore' */
+        break;
+    default:  /* 'fatal' */
+        console_force_unlock();
+        printk("\n\nNMI - MEMORY ERROR\n");
+        fatal_trap(TRAP_nmi, regs);
+    }
+}
+
+asmlinkage void io_check_error(struct xen_regs *regs)
+{
+    /* Clear and disable the I/O-error line. */
+    outb((inb(0x61)&15)|8,0x61);
+
+    switch ( opt_nmi[0] )
+    {
+    case 'd': /* 'dom0' */
+        set_bit(0, &nmi_softirq_reason);
+        raise_softirq(NMI_SOFTIRQ);
+    case 'i': /* 'ignore' */
+        break;
+    default:  /* 'fatal' */
+        console_force_unlock();
+        printk("\n\nNMI - I/O ERROR\n");
+        fatal_trap(TRAP_nmi, regs);
+    }
+}
+
+static void unknown_nmi_error(unsigned char reason)
+{
+    printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+    printk("Dazed and confused, but trying to continue\n");
+    printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct xen_regs *regs, unsigned long reason)
+{
+    ++nmi_count(smp_processor_id());
+
+    if ( nmi_watchdog )
+        nmi_watchdog_tick(regs);
+
+    if ( reason & 0x80 )
+        mem_parity_error(regs);
+    else if ( reason & 0x40 )
+        io_check_error(regs);
+    else if ( !nmi_watchdog )
+        unknown_nmi_error((unsigned char)(reason&0xff));
+}
+
+asmlinkage int math_state_restore(struct xen_regs *regs)
+{
+    /* Prevent recursion. */
+    clts();
+
+    if ( !test_bit(EDF_USEDFPU, &current->ed_flags) )
+    {
+        if ( test_bit(EDF_DONEFPUINIT, &current->ed_flags) )
+            restore_fpu(current);
+        else
+            init_fpu();
+        set_bit(EDF_USEDFPU, &current->ed_flags); /* so we fnsave on switch_to() */
+    }
+
+    if ( test_and_clear_bit(EDF_GUEST_STTS, &current->ed_flags) )
+    {
+        struct trap_bounce *tb = &current->arch.trap_bounce;
+        tb->flags      = TBF_EXCEPTION;
+        tb->cs         = current->arch.traps[7].cs;
+        tb->eip        = current->arch.traps[7].address;
+    }
+
+    return EXCRET_fault_fixed;
+}
+
+asmlinkage int do_debug(struct xen_regs *regs)
+{
+    unsigned long condition;
+    struct exec_domain *ed = current;
+    struct trap_bounce *tb = &ed->arch.trap_bounce;
+
+    __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
+
+    /* Mask out spurious debug traps due to lazy DR7 setting */
+    if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
+         (ed->arch.debugreg[7] == 0) )
+    {
+        __asm__("mov %0,%%db7" : : "r" (0UL));
+        goto out;
+    }
+
+    DEBUGGER_trap_entry(TRAP_debug, regs);
+
+    if ( !GUEST_MODE(regs) )
+    {
+        /* Clear TF just for absolute sanity. */
+        regs->eflags &= ~EF_TF;
+        /*
+         * We ignore watchpoints when they trigger within Xen. This may happen
+         * when a buffer is passed to us which previously had a watchpoint set
+         * on it. No need to bump EIP; the only faulting trap is an instruction
+         * breakpoint, which can't happen to us.
+         */
+        goto out;
+    } 
+
+    /* Save debug status register where guest OS can peek at it */
+    ed->arch.debugreg[6] = condition;
+
+    tb->flags = TBF_EXCEPTION;
+    tb->cs    = ed->arch.traps[1].cs;
+    tb->eip   = ed->arch.traps[1].address;
+
+ out:
+    return EXCRET_not_a_fault;
+}
+
+asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
+{
+    return EXCRET_not_a_fault;
+}
+
+void set_intr_gate(unsigned int n, void *addr)
+{
+#ifdef __i386__
+    int i;
+    /* Keep secondary tables in sync with IRQ updates. */
+    for ( i = 1; i < NR_CPUS; i++ )
+        if ( idt_tables[i] != NULL )
+            _set_gate(&idt_tables[i][n], 14, 0, addr);
+#endif
+    _set_gate(&idt_table[n], 14, 0, addr);
+}
+
+void set_system_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,14,3,addr);
+}
+
+void set_task_gate(unsigned int n, unsigned int sel)
+{
+    idt_table[n].a = sel << 16;
+    idt_table[n].b = 0x8500;
+}
+
+void set_tss_desc(unsigned int n, void *addr)
+{
+    _set_tssldt_desc(
+        gdt_table + __TSS(n),
+        (unsigned long)addr,
+        offsetof(struct tss_struct, __cacheline_filler) - 1,
+        9);
+}
+
+void __init trap_init(void)
+{
+    extern void percpu_traps_init(void);
+    extern void cpu_init(void);
+
+    /*
+     * Note that interrupt gates are always used, rather than trap gates. We 
+     * must have interrupts disabled until DS/ES/FS/GS are saved because the 
+     * first activation must have the "bad" value(s) for these registers and 
+     * we may lose them if another activation is installed before they are 
+     * saved. The page-fault handler also needs interrupts disabled until %cr2 
+     * has been read and saved on the stack.
+     */
+    set_intr_gate(TRAP_divide_error,&divide_error);
+    set_intr_gate(TRAP_debug,&debug);
+    set_intr_gate(TRAP_nmi,&nmi);
+    set_system_gate(TRAP_int3,&int3);         /* usable from all privileges */
+    set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
+    set_intr_gate(TRAP_bounds,&bounds);
+    set_intr_gate(TRAP_invalid_op,&invalid_op);
+    set_intr_gate(TRAP_no_device,&device_not_available);
+    set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
+    set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
+    set_intr_gate(TRAP_no_segment,&segment_not_present);
+    set_intr_gate(TRAP_stack_error,&stack_segment);
+    set_intr_gate(TRAP_gp_fault,&general_protection);
+    set_intr_gate(TRAP_page_fault,&page_fault);
+    set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
+    set_intr_gate(TRAP_copro_error,&coprocessor_error);
+    set_intr_gate(TRAP_alignment_check,&alignment_check);
+    set_intr_gate(TRAP_machine_check,&machine_check);
+    set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
+
+    percpu_traps_init();
+
+    cpu_init();
+
+    open_softirq(NMI_SOFTIRQ, nmi_softirq);
+}
+
+
+long do_set_trap_table(trap_info_t *traps)
+{
+    trap_info_t cur;
+    trap_info_t *dst = current->arch.traps;
+    long rc = 0;
+
+    LOCK_BIGLOCK(current->domain);
+
+    for ( ; ; )
+    {
+        if ( hypercall_preempt_check() )
+        {
+            rc = hypercall1_create_continuation(
+                __HYPERVISOR_set_trap_table, traps);
+            break;
+        }
+
+        if ( copy_from_user(&cur, traps, sizeof(cur)) ) 
+        {
+            rc = -EFAULT;
+            break;
+        }
+
+        if ( cur.address == 0 )
+            break;
+
+        if ( !VALID_CODESEL(cur.cs) )
+        {
+            rc = -EPERM;
+            break;
+        }
+
+        memcpy(dst+cur.vector, &cur, sizeof(cur));
+        traps++;
+    }
+
+    UNLOCK_BIGLOCK(current->domain);
+
+    return rc;
+}
+
+
+#if defined(__i386__)
+#define DB_VALID_ADDR(_a) \
+    ((_a) <= (PAGE_OFFSET - 4))
+#elif defined(__x86_64__)
+#define DB_VALID_ADDR(_a) \
+    ((_a) >= HYPERVISOR_VIRT_END) || ((_a) <= (HYPERVISOR_VIRT_START-8))
+#endif
+long set_debugreg(struct exec_domain *p, int reg, unsigned long value)
+{
+    int i;
+
+    switch ( reg )
+    {
+    case 0: 
+        if ( !DB_VALID_ADDR(value) ) return -EPERM;
+        if ( p == current ) 
+            __asm__ ( "mov %0, %%db0" : : "r" (value) );
+        break;
+    case 1: 
+        if ( !DB_VALID_ADDR(value) ) return -EPERM;
+        if ( p == current ) 
+            __asm__ ( "mov %0, %%db1" : : "r" (value) );
+        break;
+    case 2: 
+        if ( !DB_VALID_ADDR(value) ) return -EPERM;
+        if ( p == current ) 
+            __asm__ ( "mov %0, %%db2" : : "r" (value) );
+        break;
+    case 3:
+        if ( !DB_VALID_ADDR(value) ) return -EPERM;
+        if ( p == current ) 
+            __asm__ ( "mov %0, %%db3" : : "r" (value) );
+        break;
+    case 6:
+        /*
+         * DR6: Bits 4-11,16-31 reserved (set to 1).
+         *      Bit 12 reserved (set to 0).
+         */
+        value &= 0xffffefff; /* reserved bits => 0 */
+        value |= 0xffff0ff0; /* reserved bits => 1 */
+        if ( p == current ) 
+            __asm__ ( "mov %0, %%db6" : : "r" (value) );
+        break;
+    case 7:
+        /*
+         * DR7: Bit 10 reserved (set to 1).
+         *      Bits 11-12,14-15 reserved (set to 0).
+         * Privileged bits:
+         *      GD (bit 13): must be 0.
+         *      R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
+         *      LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
+         */
+        /* DR7 == 0 => debugging disabled for this domain. */
+        if ( value != 0 )
+        {
+            value &= 0xffff27ff; /* reserved bits => 0 */
+            value |= 0x00000400; /* reserved bits => 1 */
+            if ( (value & (1<<13)) != 0 ) return -EPERM;
+            for ( i = 0; i < 16; i += 2 )
+                if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
+        }
+        if ( p == current ) 
+            __asm__ ( "mov %0, %%db7" : : "r" (value) );
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    p->arch.debugreg[reg] = value;
+    return 0;
+}
+
+long do_set_debugreg(int reg, unsigned long value)
+{
+    return set_debugreg(current, reg, value);
+}
+
+unsigned long do_get_debugreg(int reg)
+{
+    if ( (reg < 0) || (reg > 7) ) return -EINVAL;
+    return current->arch.debugreg[reg];
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/entry.S
--- ../xeno-unstable.bk/xen/arch/x86/x86_32/entry.S	2005-04-15 08:27:45.000000000 -0500
+++ xen/arch/x86/x86_32/entry.S	2005-04-15 08:40:37.000000000 -0500
@@ -3,6 +3,10 @@
  *
  * Copyright (c) 2002-2004, K A Fraser
  * Copyright (c) 1991, 1992 Linus Torvalds
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  * 
  * Calling back to a guest OS:
  * ===========================
@@ -563,10 +567,10 @@ ENTRY(nmi)
         jnz   do_watchdog_tick
         movl  %ds,%eax
         cmpw  $(__HYPERVISOR_DS),%ax
-        jne   defer_nmi
+        jne   force_nmi
         movl  %es,%eax
         cmpw  $(__HYPERVISOR_DS),%ax
-        jne   defer_nmi
+        jne   force_nmi
 
 do_watchdog_tick:
         movl  $(__HYPERVISOR_DS),%edx
@@ -579,6 +583,32 @@ do_watchdog_tick:
         addl  $8,%esp
         jmp   ret_from_intr
 
+force_nmi:
+	movl %ds,-4(%esp)
+	movl %es,-8(%esp)
+	movl %fs,-12(%esp)
+	movl %gs,-16(%esp)
+	subl $16,%esp
+
+	movl $(__HYPERVISOR_DS),%edx
+	movl %edx,%ds
+	movl %edx,%es
+
+	movl %esp,%edx
+	addl $16,%edx
+	pushl %ebx
+	pushl %edx
+	call SYMBOL_NAME(do_nmi)
+	addl $8,%esp
+
+	addl $16,%esp
+	movl -4(%esp),%ds
+	movl -8(%esp),%es
+	movl -12(%esp),%fs
+	movl -16(%esp),%gs
+
+	jmp restore_all_xen
+
 defer_nmi:
         movl  $FIXMAP_apic_base,%eax
         # apic_wait_icr_idle()
@@ -739,6 +769,7 @@ ENTRY(hypercall_table)
         .long SYMBOL_NAME(do_boot_vcpu)
         .long SYMBOL_NAME(do_ni_hypercall)       /* 25 */
         .long SYMBOL_NAME(do_mmuext_op)
+	.long SYMBOL_NAME(do_pmc_op)
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long SYMBOL_NAME(do_ni_hypercall)
         .endr
diff -Naurp ../xeno-unstable.bk/xen/arch/x86/x86_32/entry.S.orig xen/arch/x86/x86_32/entry.S.orig
--- ../xeno-unstable.bk/xen/arch/x86/x86_32/entry.S.orig	1969-12-31 18:00:00.000000000 -0600
+++ xen/arch/x86/x86_32/entry.S.orig	2005-04-15 08:33:16.000000000 -0500
@@ -0,0 +1,744 @@
+/*
+ * Hypercall and fault low-level handling routines.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ * Copyright (c) 1991, 1992 Linus Torvalds
+ * 
+ * Calling back to a guest OS:
+ * ===========================
+ * 
+ * First, we require that all callbacks (either via a supplied
+ * interrupt-descriptor-table, or via the special event or failsafe callbacks
+ * in the shared-info-structure) are to ring 1. This just makes life easier,
+ * in that it means we don't have to do messy GDT/LDT lookups to find
+ * out which the privilege-level of the return code-selector. That code
+ * would just be a hassle to write, and would need to account for running
+ * off the end of the GDT/LDT, for example. For all callbacks we check
+ * that the provided return CS is not == __HYPERVISOR_{CS,DS}. Apart from that 
+ * we're safe as don't allow a guest OS to install ring-0 privileges into the
+ * GDT/LDT. It's up to the guest OS to ensure all returns via the IDT are to
+ * ring 1. If not, we load incorrect SS/ESP values from the TSS (for ring 1
+ * rather than the correct ring) and bad things are bound to ensue -- IRET is
+ * likely to fault, and we may end up killing the domain (no harm can
+ * come to Xen, though).
+ *      
+ * When doing a callback, we check if the return CS is in ring 0. If so,
+ * callback is delayed until next return to ring != 0.
+ * If return CS is in ring 1, then we create a callback frame
+ * starting at return SS/ESP. The base of the frame does an intra-privilege
+ * interrupt-return.
+ * If return CS is in ring > 1, we create a callback frame starting
+ * at SS/ESP taken from appropriate section of the current TSS. The base
+ * of the frame does an inter-privilege interrupt-return.
+ * 
+ * Note that the "failsafe callback" uses a special stackframe:
+ * { return_DS, return_ES, return_FS, return_GS, return_EIP,
+ *   return_CS, return_EFLAGS[, return_ESP, return_SS] }
+ * That is, original values for DS/ES/FS/GS are placed on stack rather than
+ * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them
+ * saved/restored in guest OS. Furthermore, if we load them we may cause
+ * a fault if they are invalid, which is a hassle to deal with. We avoid
+ * that problem if we don't load them :-) This property allows us to use
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS
+ * on return to ring != 0, we can simply package it up as a return via
+ * the failsafe callback, and let the guest OS sort it out (perhaps by
+ * killing an application process). Note that we also do this for any
+ * faulting IRET -- just let the guest OS handle it via the event
+ * callback.
+ *
+ * We terminate a domain in the following cases:
+ *  - creating a callback stack frame (due to bad ring-1 stack).
+ *  - faulting IRET on entry to failsafe callback handler.
+ * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
+ * handler in good order (absolutely no faults allowed!).
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define GET_CURRENT(reg)         \
+        movl $STACK_SIZE-4, reg; \
+        orl  %esp, reg;          \
+        andl $~3,reg;            \
+        movl (reg),reg;
+
+#ifdef CONFIG_VMX
+/*
+ * At VMExit time the processor saves the guest selectors, esp, eip, 
+ * and eflags. Therefore we don't save them, but simply decrement 
+ * the kernel stack pointer to make it consistent with the stack frame 
+ * at usual interruption time. The eflags of the host is not saved by VMX, 
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used 
+ * by do_IRQ(). Compared the xen_regs, we skip pushing for the following:
+ *   (10) u32 gs;                 
+ *   (9)  u32 fs;
+ *   (8)  u32 ds;
+ *   (7)  u32 es;
+ *               <- get_stack_bottom() (= HOST_ESP)
+ *   (6)  u32 ss;
+ *   (5)  u32 esp;
+ *   (4)  u32 eflags;
+ *   (3)  u32 cs;
+ *   (2)  u32 eip;
+ * (2/1)  u16 entry_vector;
+ * (1/1)  u16 error_code;
+ * However, get_stack_bottom() actually returns 20 bytes before the real
+ * bottom of the stack to allow space for:
+ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
+ */
+#define VMX_MONITOR_EFLAGS	0x202 /* IF on */
+#define NR_SKIPPED_REGS	6	/* See the above explanation */
+#define VMX_SAVE_ALL_NOSEGREGS \
+        pushl $VMX_MONITOR_EFLAGS; \
+        popf; \
+        subl $(NR_SKIPPED_REGS*4), %esp; \
+        movl $0, 0xc(%esp); /* eflags==0 identifies xen_regs as VMX guest */ \
+        pushl %eax; \
+        pushl %ebp; \
+        pushl %edi; \
+        pushl %esi; \
+        pushl %edx; \
+        pushl %ecx; \
+        pushl %ebx;
+
+ENTRY(vmx_asm_vmexit_handler)
+        /* selectors are restored/saved by VMX */
+        VMX_SAVE_ALL_NOSEGREGS
+        call SYMBOL_NAME(vmx_vmexit_handler)
+        jmp vmx_asm_do_resume
+
+ENTRY(vmx_asm_do_launch)
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+        popl %eax
+        addl $(NR_SKIPPED_REGS*4), %esp
+        /* VMLUANCH */
+        .byte 0x0f,0x01,0xc2
+        pushf
+        call SYMBOL_NAME(vm_launch_fail)
+        hlt
+        
+        ALIGN
+        
+ENTRY(vmx_asm_do_resume)
+vmx_test_all_events:
+        GET_CURRENT(%ebx)
+/*test_all_events:*/
+        xorl %ecx,%ecx
+        notl %ecx
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/  
+        movl EDOMAIN_processor(%ebx),%eax
+        shl  $6,%eax                    # sizeof(irq_cpustat) == 64
+        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+        jnz  vmx_process_softirqs
+
+vmx_restore_all_guest:
+        call SYMBOL_NAME(load_cr2)
+        /* 
+         * Check if we are going back to VMX-based VM
+         * By this time, all the setups in the VMCS must be complete.
+         */
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+        popl %eax
+        addl $(NR_SKIPPED_REGS*4), %esp
+        /* VMRESUME */
+        .byte 0x0f,0x01,0xc3
+        pushf
+        call SYMBOL_NAME(vm_resume_fail)
+        /* Should never reach here */
+        hlt
+
+        ALIGN
+vmx_process_softirqs:
+        sti       
+        call SYMBOL_NAME(do_softirq)
+        jmp  vmx_test_all_events
+#endif
+
+        ALIGN
+restore_all_guest:
+        testl $X86_EFLAGS_VM,XREGS_eflags(%esp)
+        jnz  restore_all_vm86
+FLT1:   movl XREGS_ds(%esp),%ds
+FLT2:   movl XREGS_es(%esp),%es
+FLT3:   movl XREGS_fs(%esp),%fs
+FLT4:   movl XREGS_gs(%esp),%gs
+restore_all_vm86:
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+        popl %eax
+        addl $4,%esp
+FLT5:   iret
+.section .fixup,"ax"
+FIX5:   subl  $28,%esp
+        pushl 28(%esp)                 # error_code/entry_vector
+        movl  %eax,XREGS_eax+4(%esp)
+        movl  %ebp,XREGS_ebp+4(%esp)
+        movl  %edi,XREGS_edi+4(%esp)
+        movl  %esi,XREGS_esi+4(%esp)
+        movl  %edx,XREGS_edx+4(%esp)
+        movl  %ecx,XREGS_ecx+4(%esp)
+        movl  %ebx,XREGS_ebx+4(%esp)
+FIX1:   SET_XEN_SEGMENTS(a)
+        movl  %eax,%fs
+        movl  %eax,%gs
+        sti
+        popl  %esi
+        pushfl                         # EFLAGS
+        movl  $__HYPERVISOR_CS,%eax
+        pushl %eax                     # CS
+        movl  $DBLFLT1,%eax
+        pushl %eax                     # EIP
+        pushl %esi                     # error_code/entry_vector
+        jmp   error_code
+DBLFLT1:GET_CURRENT(%ebx)
+        jmp   test_all_events
+failsafe_callback:
+        GET_CURRENT(%ebx)
+        leal  EDOMAIN_trap_bounce(%ebx),%edx
+        movl  EDOMAIN_failsafe_addr(%ebx),%eax
+        movl  %eax,TRAPBOUNCE_eip(%edx)
+        movl  EDOMAIN_failsafe_sel(%ebx),%eax
+        movw  %ax,TRAPBOUNCE_cs(%edx)
+        movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
+        call  create_bounce_frame
+        xorl  %eax,%eax
+        movl  %eax,XREGS_ds(%esp)
+        movl  %eax,XREGS_es(%esp)
+        movl  %eax,XREGS_fs(%esp)
+        movl  %eax,XREGS_gs(%esp)
+        jmp   test_all_events
+.previous
+.section __pre_ex_table,"a"
+	.long FLT1,FIX1
+	.long FLT2,FIX1
+	.long FLT3,FIX1
+	.long FLT4,FIX1
+	.long FLT5,FIX5
+.previous
+.section __ex_table,"a"
+        .long DBLFLT1,failsafe_callback
+.previous
+
+        ALIGN
+restore_all_xen:
+	popl %ebx
+	popl %ecx
+	popl %edx
+	popl %esi
+	popl %edi
+	popl %ebp
+	popl %eax
+        addl $4,%esp
+        iret
+
+        ALIGN
+ENTRY(hypercall)
+        subl $4,%esp
+	SAVE_ALL(b)
+        sti
+        GET_CURRENT(%ebx)
+        andl $(NR_hypercalls-1),%eax
+        PERFC_INCR(PERFC_hypercalls, %eax)
+        call *SYMBOL_NAME(hypercall_table)(,%eax,4)
+        movl %eax,XREGS_eax(%esp)       # save the return value
+
+test_all_events:
+        xorl %ecx,%ecx
+        notl %ecx
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/  
+        movl EDOMAIN_processor(%ebx),%eax
+        shl  $6,%eax                    # sizeof(irq_cpustat) == 64
+        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+        jnz  process_softirqs
+/*test_guest_events:*/
+        movl EDOMAIN_vcpu_info(%ebx),%eax
+        testb $0xFF,VCPUINFO_upcall_mask(%eax)
+        jnz  restore_all_guest
+        testb $0xFF,VCPUINFO_upcall_pending(%eax)
+        jz   restore_all_guest
+/*process_guest_events:*/
+        sti
+        leal EDOMAIN_trap_bounce(%ebx),%edx
+        movl EDOMAIN_event_addr(%ebx),%eax
+        movl %eax,TRAPBOUNCE_eip(%edx)
+        movl EDOMAIN_event_sel(%ebx),%eax
+        movw %ax,TRAPBOUNCE_cs(%edx)
+        movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+        call create_bounce_frame
+        movl EDOMAIN_vcpu_info(%ebx),%eax
+        movb $1,VCPUINFO_upcall_mask(%eax) # Upcalls are masked during delivery
+        jmp  test_all_events
+
+        ALIGN
+process_softirqs:
+        sti       
+        call SYMBOL_NAME(do_softirq)
+        jmp  test_all_events
+                
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
+/*   {EIP, CS, EFLAGS, [ESP, SS]}                                        */
+/* %edx == trap_bounce, %ebx == struct exec_domain                       */
+/* %eax,%ecx are clobbered. %gs:%esi contain new XREGS_ss/XREGS_esp. */
+create_bounce_frame:
+        movl XREGS_eflags+4(%esp),%ecx
+        movb XREGS_cs+4(%esp),%cl
+        testl $(2|X86_EFLAGS_VM),%ecx
+        jz   ring1 /* jump if returning to an existing ring-1 activation */
+        movl EDOMAIN_kernel_sp(%ebx),%esi
+FLT6:   movl EDOMAIN_kernel_ss(%ebx),%gs
+        testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
+        jz   nvm86_1
+        subl $16,%esi       /* push ES/DS/FS/GS (VM86 stack frame) */
+        movl XREGS_es+4(%esp),%eax
+FLT7:   movl %eax,%gs:(%esi)
+        movl XREGS_ds+4(%esp),%eax
+FLT8:   movl %eax,%gs:4(%esi)
+        movl XREGS_fs+4(%esp),%eax
+FLT9:   movl %eax,%gs:8(%esi)
+        movl XREGS_gs+4(%esp),%eax
+FLT10:  movl %eax,%gs:12(%esi)
+nvm86_1:subl $8,%esi        /* push SS/ESP (inter-priv iret) */
+        movl XREGS_esp+4(%esp),%eax
+FLT11:  movl %eax,%gs:(%esi) 
+        movl XREGS_ss+4(%esp),%eax
+FLT12:  movl %eax,%gs:4(%esi) 
+        jmp 1f
+ring1:  /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
+        movl XREGS_esp+4(%esp),%esi
+FLT13:  movl XREGS_ss+4(%esp),%gs 
+1:      /* Construct a stack frame: EFLAGS, CS/EIP */
+        subl $12,%esi
+        movl XREGS_eip+4(%esp),%eax
+FLT14:  movl %eax,%gs:(%esi) 
+        movl XREGS_cs+4(%esp),%eax
+FLT15:  movl %eax,%gs:4(%esi) 
+        movl XREGS_eflags+4(%esp),%eax
+FLT16:  movl %eax,%gs:8(%esi)
+        movb TRAPBOUNCE_flags(%edx),%cl
+        test $TBF_EXCEPTION_ERRCODE,%cl
+        jz   1f
+        subl $4,%esi                    # push error_code onto guest frame
+        movl TRAPBOUNCE_error_code(%edx),%eax
+FLT17:  movl %eax,%gs:(%esi)
+        testb $TBF_EXCEPTION_CR2,%cl
+        jz   2f
+        subl $4,%esi                    # push %cr2 onto guest frame
+        movl TRAPBOUNCE_cr2(%edx),%eax
+FLT18:  movl %eax,%gs:(%esi)
+1:      testb $TBF_FAILSAFE,%cl
+        jz   2f
+        subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
+        testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
+        jz   nvm86_2
+        xorl %eax,%eax               # VM86: we write zero selector values
+FLT19:  movl %eax,%gs:(%esi) 
+FLT20:  movl %eax,%gs:4(%esi)
+FLT21:  movl %eax,%gs:8(%esi) 
+FLT22:  movl %eax,%gs:12(%esi)
+        jmp  2f
+nvm86_2:movl XREGS_ds+4(%esp),%eax   # non-VM86: write real selector values
+FLT23:  movl %eax,%gs:(%esi) 
+        movl XREGS_es+4(%esp),%eax
+FLT24:  movl %eax,%gs:4(%esi)
+        movl XREGS_fs+4(%esp),%eax
+FLT25:  movl %eax,%gs:8(%esi) 
+        movl XREGS_gs+4(%esp),%eax
+FLT26:  movl %eax,%gs:12(%esi)
+2:      testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
+        jz   nvm86_3
+        xorl %eax,%eax      /* zero DS-GS, just as a real CPU would */
+        movl %eax,XREGS_ds+4(%esp)
+        movl %eax,XREGS_es+4(%esp)
+        movl %eax,XREGS_fs+4(%esp)
+        movl %eax,XREGS_gs+4(%esp)
+nvm86_3:/* Rewrite our stack frame and return to ring 1. */
+        /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
+        andl $0xfffcbeff,XREGS_eflags+4(%esp)
+        movl %gs,XREGS_ss+4(%esp)
+        movl %esi,XREGS_esp+4(%esp)
+        movzwl TRAPBOUNCE_cs(%edx),%eax
+        movl %eax,XREGS_cs+4(%esp)
+        movl TRAPBOUNCE_eip(%edx),%eax
+        movl %eax,XREGS_eip+4(%esp)
+        movb $0,TRAPBOUNCE_flags(%edx)
+        ret
+.section __ex_table,"a"
+	.long  FLT6,domain_crash_synchronous ,  FLT7,domain_crash_synchronous
+        .long  FLT8,domain_crash_synchronous ,  FLT9,domain_crash_synchronous
+        .long FLT10,domain_crash_synchronous , FLT11,domain_crash_synchronous
+        .long FLT12,domain_crash_synchronous , FLT13,domain_crash_synchronous
+        .long FLT14,domain_crash_synchronous , FLT15,domain_crash_synchronous
+        .long FLT16,domain_crash_synchronous , FLT17,domain_crash_synchronous
+	.long FLT18,domain_crash_synchronous , FLT19,domain_crash_synchronous
+        .long FLT20,domain_crash_synchronous , FLT21,domain_crash_synchronous
+        .long FLT22,domain_crash_synchronous , FLT23,domain_crash_synchronous
+        .long FLT24,domain_crash_synchronous , FLT25,domain_crash_synchronous
+        .long FLT26,domain_crash_synchronous
+.previous
+
+        ALIGN
+process_guest_exception_and_events:
+        leal EDOMAIN_trap_bounce(%ebx),%edx
+        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
+        jz   test_all_events
+        call create_bounce_frame
+        jmp  test_all_events
+
+        ALIGN
+ENTRY(ret_from_intr)
+        GET_CURRENT(%ebx)
+        movl  XREGS_eflags(%esp),%eax
+        movb  XREGS_cs(%esp),%al
+        testl $(3|X86_EFLAGS_VM),%eax
+        jnz   test_all_events
+        jmp   restore_all_xen
+
+ENTRY(divide_error)
+	pushl $TRAP_divide_error<<16
+	ALIGN
+error_code:
+        SAVE_ALL_NOSEGREGS(a)
+        SET_XEN_SEGMENTS(a)
+        testb $X86_EFLAGS_IF>>8,XREGS_eflags+1(%esp)
+        jz    exception_with_ints_disabled
+        sti                             # re-enable interrupts
+        xorl  %eax,%eax
+        movw  XREGS_entry_vector(%esp),%ax
+        movl  %esp,%edx
+	pushl %edx			# push the xen_regs pointer
+	GET_CURRENT(%ebx)
+        PERFC_INCR(PERFC_exceptions, %eax)
+	call  *SYMBOL_NAME(exception_table)(,%eax,4)
+        addl  $4,%esp
+        movl  XREGS_eflags(%esp),%eax
+        movb  XREGS_cs(%esp),%al
+        testl $(3|X86_EFLAGS_VM),%eax
+	jz    restore_all_xen
+        jmp   process_guest_exception_and_events
+
+exception_with_ints_disabled:
+        movl  XREGS_eflags(%esp),%eax
+        movb  XREGS_cs(%esp),%al
+        testl $(3|X86_EFLAGS_VM),%eax   # interrupts disabled outside Xen?
+        jnz   FATAL_exception_with_ints_disabled
+        pushl %esp
+        call  search_pre_exception_table
+        addl  $4,%esp
+        testl %eax,%eax                 # no fixup code for faulting EIP?
+        jz    FATAL_exception_with_ints_disabled
+        movl  %eax,XREGS_eip(%esp)
+        movl  %esp,%esi
+        subl  $4,%esp
+        movl  %esp,%edi
+        movl  $XREGS_kernel_sizeof/4,%ecx
+        rep;  movsl                     # make room for error_code/entry_vector
+        movl  XREGS_error_code(%esp),%eax # error_code/entry_vector
+        movl  %eax,XREGS_kernel_sizeof(%esp)
+        jmp   restore_all_xen           # return to fixup code
+
+FATAL_exception_with_ints_disabled:
+        xorl  %esi,%esi
+        movw  XREGS_entry_vector(%esp),%si
+        movl  %esp,%edx
+	pushl %edx			# push the xen_regs pointer
+        pushl %esi                      # push the trapnr (entry vector)
+        call  SYMBOL_NAME(fatal_trap)
+        ud2
+                                        
+ENTRY(coprocessor_error)
+	pushl $TRAP_copro_error<<16
+	jmp error_code
+
+ENTRY(simd_coprocessor_error)
+	pushl $TRAP_simd_error<<16
+	jmp error_code
+
+ENTRY(device_not_available)
+	pushl $TRAP_no_device<<16
+        jmp   error_code
+
+ENTRY(debug)
+	pushl $TRAP_debug<<16
+	jmp error_code
+
+ENTRY(int3)
+	pushl $TRAP_int3<<16
+	jmp error_code
+
+ENTRY(overflow)
+	pushl $TRAP_overflow<<16
+	jmp error_code
+
+ENTRY(bounds)
+	pushl $TRAP_bounds<<16
+	jmp error_code
+
+ENTRY(invalid_op)
+	pushl $TRAP_invalid_op<<16
+	jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+	pushl $TRAP_copro_seg<<16
+	jmp error_code
+
+ENTRY(invalid_TSS)
+        movw $TRAP_invalid_tss,2(%esp)
+	jmp error_code
+
+ENTRY(segment_not_present)
+        movw $TRAP_no_segment,2(%esp)
+	jmp error_code
+
+ENTRY(stack_segment)
+        movw $TRAP_stack_error,2(%esp)
+	jmp error_code
+
+ENTRY(general_protection)
+        movw $TRAP_gp_fault,2(%esp)
+	jmp error_code
+
+ENTRY(alignment_check)
+        movw $TRAP_alignment_check,2(%esp)
+	jmp error_code
+
+ENTRY(page_fault)
+        movw $TRAP_page_fault,2(%esp)
+	jmp error_code
+
+ENTRY(machine_check)
+        pushl $TRAP_machine_check<<16
+	jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+        pushl $TRAP_spurious_int<<16
+	jmp error_code
+
+ENTRY(nmi)
+        # Save state but do not trash the segment registers!
+        # We may otherwise be unable to reload them or copy them to ring 1. 
+	pushl %eax
+	SAVE_ALL_NOSEGREGS(a)
+
+        # Check for hardware problems.
+        inb   $0x61,%al
+        testb $0x80,%al
+        jne   nmi_parity_err
+        testb $0x40,%al
+        jne   nmi_io_err
+        movl  %eax,%ebx
+        
+        # Okay, its almost a normal NMI tick. We can only process it if:
+        #  A. We are the outermost Xen activation (in which case we have
+        #     the selectors safely saved on our stack)
+        #  B. DS and ES contain sane Xen values.
+        # In all other cases we bail without touching DS-GS, as we have
+        # interrupted an enclosing Xen activation in tricky prologue or
+        # epilogue code.
+        movl  XREGS_eflags(%esp),%eax
+        movb  XREGS_cs(%esp),%al
+        testl $(3|X86_EFLAGS_VM),%eax
+        jnz   do_watchdog_tick
+        movl  %ds,%eax
+        cmpw  $(__HYPERVISOR_DS),%ax
+        jne   defer_nmi
+        movl  %es,%eax
+        cmpw  $(__HYPERVISOR_DS),%ax
+        jne   defer_nmi
+
+do_watchdog_tick:
+        movl  $(__HYPERVISOR_DS),%edx
+        movl  %edx,%ds
+        movl  %edx,%es
+        movl  %esp,%edx
+        pushl %ebx   # reason
+        pushl %edx   # regs
+        call  SYMBOL_NAME(do_nmi)
+        addl  $8,%esp
+        jmp   ret_from_intr
+
+defer_nmi:
+        movl  $FIXMAP_apic_base,%eax
+        # apic_wait_icr_idle()
+1:      movl  %ss:APIC_ICR(%eax),%ebx
+        testl $APIC_ICR_BUSY,%ebx
+        jnz   1b
+        # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi)
+        movl  $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_LOGICAL | \
+                TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
+        jmp   restore_all_xen
+
+nmi_parity_err:
+        # Clear and disable the parity-error line
+        andb $0xf,%al
+        orb  $0x4,%al
+        outb %al,$0x61
+        cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore
+        je   nmi_out
+        bts  $0,%ss:SYMBOL_NAME(nmi_softirq_reason)
+        bts  $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat)
+        cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0
+        je   nmi_out
+        movl $(__HYPERVISOR_DS),%edx       # nmi=fatal
+        movl %edx,%ds
+        movl %edx,%es
+        movl %esp,%edx
+        push %edx
+        call SYMBOL_NAME(mem_parity_error)
+        addl $4,%esp
+nmi_out:movl  %ss:XREGS_eflags(%esp),%eax
+        movb  %ss:XREGS_cs(%esp),%al
+        testl $(3|X86_EFLAGS_VM),%eax
+        jz    restore_all_xen
+        movl  $(__HYPERVISOR_DS),%edx
+        movl  %edx,%ds
+        movl  %edx,%es
+        GET_CURRENT(%ebx)
+        jmp   test_all_events
+                
+nmi_io_err: 
+        # Clear and disable the I/O-error line
+        andb $0xf,%al
+        orb  $0x8,%al
+        outb %al,$0x61
+        cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore
+        je   nmi_out
+        bts  $1,%ss:SYMBOL_NAME(nmi_softirq_reason)
+        bts  $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat)
+        cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0
+        je   nmi_out
+        movl $(__HYPERVISOR_DS),%edx       # nmi=fatal
+        movl %edx,%ds
+        movl %edx,%es
+        movl %esp,%edx
+        push %edx
+        call SYMBOL_NAME(io_check_error)                        
+        addl $4,%esp
+        jmp  nmi_out
+
+
+ENTRY(setup_vm86_frame)
+        # Copies the entire stack frame forwards by 16 bytes.
+        .macro copy_vm86_words count=18
+        .if \count
+        pushl ((\count-1)*4)(%esp)
+        popl  ((\count-1)*4)+16(%esp)
+        copy_vm86_words "(\count-1)"
+        .endif
+        .endm
+        copy_vm86_words
+        addl $16,%esp
+        ret
+
+do_switch_vm86:
+        # Discard the return address
+        addl $4,%esp
+
+        # GS:ESI == Ring-1 stack activation
+        movl XREGS_esp(%esp),%esi
+VFLT1:  movl XREGS_ss(%esp),%gs
+
+        # ES:EDI == Ring-0 stack activation
+        leal XREGS_eip(%esp),%edi
+
+        # Restore the hypercall-number-clobbered EAX on our stack frame
+VFLT2:  movl %gs:(%esi),%eax
+        movl %eax,XREGS_eax(%esp)
+        addl $4,%esi
+        	
+      	# Copy the VM86 activation from the ring-1 stack to the ring-0 stack
+        movl $(XREGS_user_sizeof-XREGS_eip)/4,%ecx
+VFLT3:  movl %gs:(%esi),%eax
+        stosl
+        addl $4,%esi
+        loop VFLT3
+
+        # Fix up EFLAGS: IOPL=0, IF=1, VM=1
+        andl $~X86_EFLAGS_IOPL,XREGS_eflags(%esp)
+        orl  $X86_EFLAGS_IF|X86_EFLAGS_VM,XREGS_eflags(%esp)
+        
+        jmp test_all_events
+
+.section __ex_table,"a"
+        .long VFLT1,domain_crash_synchronous
+        .long VFLT2,domain_crash_synchronous
+        .long VFLT3,domain_crash_synchronous
+.previous
+
+.data
+
+ENTRY(exception_table)
+        .long SYMBOL_NAME(do_divide_error)
+        .long SYMBOL_NAME(do_debug)
+        .long 0 # nmi
+        .long SYMBOL_NAME(do_int3)
+        .long SYMBOL_NAME(do_overflow)
+        .long SYMBOL_NAME(do_bounds)
+        .long SYMBOL_NAME(do_invalid_op)
+        .long SYMBOL_NAME(math_state_restore)
+        .long 0 # double fault
+        .long SYMBOL_NAME(do_coprocessor_segment_overrun)
+        .long SYMBOL_NAME(do_invalid_TSS)
+        .long SYMBOL_NAME(do_segment_not_present)
+        .long SYMBOL_NAME(do_stack_segment)
+        .long SYMBOL_NAME(do_general_protection)
+        .long SYMBOL_NAME(do_page_fault)
+        .long SYMBOL_NAME(do_spurious_interrupt_bug)
+        .long SYMBOL_NAME(do_coprocessor_error)
+        .long SYMBOL_NAME(do_alignment_check)
+        .long SYMBOL_NAME(do_machine_check)
+        .long SYMBOL_NAME(do_simd_coprocessor_error)
+
+ENTRY(hypercall_table)
+        .long SYMBOL_NAME(do_set_trap_table)     /*  0 */
+        .long SYMBOL_NAME(do_mmu_update)
+        .long SYMBOL_NAME(do_set_gdt)
+        .long SYMBOL_NAME(do_stack_switch)
+        .long SYMBOL_NAME(do_set_callbacks)
+        .long SYMBOL_NAME(do_fpu_taskswitch)     /*  5 */
+        .long SYMBOL_NAME(do_sched_op)
+        .long SYMBOL_NAME(do_dom0_op)
+        .long SYMBOL_NAME(do_set_debugreg)
+        .long SYMBOL_NAME(do_get_debugreg)
+        .long SYMBOL_NAME(do_update_descriptor)  /* 10 */
+        .long SYMBOL_NAME(do_set_fast_trap)
+        .long SYMBOL_NAME(do_dom_mem_op)
+        .long SYMBOL_NAME(do_multicall)
+        .long SYMBOL_NAME(do_update_va_mapping)
+        .long SYMBOL_NAME(do_set_timer_op)       /* 15 */
+        .long SYMBOL_NAME(do_event_channel_op)
+        .long SYMBOL_NAME(do_xen_version)
+        .long SYMBOL_NAME(do_console_io)
+        .long SYMBOL_NAME(do_physdev_op)
+        .long SYMBOL_NAME(do_grant_table_op)     /* 20 */
+        .long SYMBOL_NAME(do_vm_assist)
+        .long SYMBOL_NAME(do_update_va_mapping_otherdomain)
+        .long SYMBOL_NAME(do_switch_vm86)
+        .long SYMBOL_NAME(do_boot_vcpu)
+        .long SYMBOL_NAME(do_ni_hypercall)       /* 25 */
+        .long SYMBOL_NAME(do_mmuext_op)
+        .rept NR_hypercalls-((.-hypercall_table)/4)
+        .long SYMBOL_NAME(do_ni_hypercall)
+        .endr
diff -Naurp ../xeno-unstable.bk/xen/include/asm-x86/apic.h xen/include/asm-x86/apic.h
--- ../xeno-unstable.bk/xen/include/asm-x86/apic.h	2005-04-14 14:56:32.000000000 -0500
+++ xen/include/asm-x86/apic.h	2005-04-15 08:34:22.000000000 -0500
@@ -77,6 +77,8 @@ extern void init_apic_mappings (void);
 extern void smp_local_timer_interrupt (struct xen_regs * regs);
 extern void setup_APIC_clocks (void);
 extern void setup_apic_nmi_watchdog (void);
+extern int reserve_lapic_nmi(void);
+extern void release_lapic_nmi(void);
 extern void nmi_watchdog_tick (struct xen_regs * regs);
 extern void touch_nmi_watchdog(void);
 extern int APIC_init_uniprocessor (void);
diff -Naurp ../xeno-unstable.bk/xen/include/asm-x86/msr.h xen/include/asm-x86/msr.h
--- ../xeno-unstable.bk/xen/include/asm-x86/msr.h	2005-04-14 14:56:32.000000000 -0500
+++ xen/include/asm-x86/msr.h	2005-04-15 08:34:24.000000000 -0500
@@ -11,6 +11,21 @@
 			  : /* no outputs */ \
 			  : "c" (msr), "a" (val1), "d" (val2))
 
+#define rdmsrl(msr,val) do { \
+	unsigned long l__,h__; \
+	rdmsr (msr, l__, h__);  \
+	val = l__;  \
+	val |= ((u64)h__<<32);  \
+} while(0)
+
+static inline void wrmsrl (unsigned long msr, unsigned long long val)
+{
+	unsigned long lo, hi;
+	lo = (unsigned long) val;
+	hi = val >> 32;
+	wrmsr (msr, lo, hi);
+}
+
 #define rdmsr_user(msr,val1,val2) ({\
     int _rc; \
     __asm__ __volatile__( \
@@ -47,16 +62,8 @@
 #define rdtscl(low) \
      __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
 
-#if defined(__i386__)
 #define rdtscll(val) \
      __asm__ __volatile__("rdtsc" : "=A" (val))
-#elif defined(__x86_64__)
-#define rdtscll(val) do { \
-     unsigned int a,d; \
-     asm volatile("rdtsc" : "=a" (a), "=d" (d)); \
-     (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
-} while(0)
-#endif
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
@@ -140,12 +147,38 @@
 #define MSR_IA32_UCODE_WRITE		0x79
 #define MSR_IA32_UCODE_REV		0x8b
 
+#define MSR_P6_PERFCTR0		0xc1
+#define MSR_P6_PERFCTR1		0xc2
+
 #define MSR_IA32_BBL_CR_CTL		0x119
 
+#define MSR_IA32_SYSENTER_CS		0x174
+#define MSR_IA32_SYSENTER_ESP		0x175
+#define MSR_IA32_SYSENTER_EIP		0x176
+
 #define MSR_IA32_MCG_CAP		0x179
 #define MSR_IA32_MCG_STATUS		0x17a
 #define MSR_IA32_MCG_CTL		0x17b
 
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX		0x180
+#define MSR_IA32_MCG_EBX		0x181
+#define MSR_IA32_MCG_ECX		0x182
+#define MSR_IA32_MCG_EDX		0x183
+#define MSR_IA32_MCG_ESI		0x184
+#define MSR_IA32_MCG_EDI		0x185
+#define MSR_IA32_MCG_EBP		0x186
+#define MSR_IA32_MCG_ESP		0x187
+#define MSR_IA32_MCG_EFLAGS		0x188
+#define MSR_IA32_MCG_EIP		0x189
+#define MSR_IA32_MCG_RESERVED		0x18A
+
+#define MSR_P6_EVNTSEL0			0x186
+#define MSR_P6_EVNTSEL1			0x187
+
+#define MSR_IA32_PERF_STATUS		0x198
+#define MSR_IA32_PERF_CTL		0x199
+
 #define MSR_IA32_THERM_CONTROL		0x19a
 #define MSR_IA32_THERM_INTERRUPT	0x19b
 #define MSR_IA32_THERM_STATUS		0x19c
@@ -178,6 +211,92 @@
 #define MSR_P6_EVNTSEL0			0x186
 #define MSR_P6_EVNTSEL1			0x187
 
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0             0x300
+#define MSR_P4_BPU_PERFCTR1             0x301
+#define MSR_P4_BPU_PERFCTR2             0x302
+#define MSR_P4_BPU_PERFCTR3             0x303
+#define MSR_P4_MS_PERFCTR0              0x304
+#define MSR_P4_MS_PERFCTR1              0x305
+#define MSR_P4_MS_PERFCTR2              0x306
+#define MSR_P4_MS_PERFCTR3              0x307
+#define MSR_P4_FLAME_PERFCTR0           0x308
+#define MSR_P4_FLAME_PERFCTR1           0x309
+#define MSR_P4_FLAME_PERFCTR2           0x30a
+#define MSR_P4_FLAME_PERFCTR3           0x30b
+#define MSR_P4_IQ_PERFCTR0              0x30c
+#define MSR_P4_IQ_PERFCTR1              0x30d
+#define MSR_P4_IQ_PERFCTR2              0x30e
+#define MSR_P4_IQ_PERFCTR3              0x30f
+#define MSR_P4_IQ_PERFCTR4              0x310
+#define MSR_P4_IQ_PERFCTR5              0x311
+#define MSR_P4_BPU_CCCR0                0x360
+#define MSR_P4_BPU_CCCR1                0x361
+#define MSR_P4_BPU_CCCR2                0x362
+#define MSR_P4_BPU_CCCR3                0x363
+#define MSR_P4_MS_CCCR0                 0x364
+#define MSR_P4_MS_CCCR1                 0x365
+#define MSR_P4_MS_CCCR2                 0x366
+#define MSR_P4_MS_CCCR3                 0x367
+#define MSR_P4_FLAME_CCCR0              0x368
+#define MSR_P4_FLAME_CCCR1              0x369
+#define MSR_P4_FLAME_CCCR2              0x36a
+#define MSR_P4_FLAME_CCCR3              0x36b
+#define MSR_P4_IQ_CCCR0                 0x36c
+#define MSR_P4_IQ_CCCR1                 0x36d
+#define MSR_P4_IQ_CCCR2                 0x36e
+#define MSR_P4_IQ_CCCR3                 0x36f
+#define MSR_P4_IQ_CCCR4                 0x370
+#define MSR_P4_IQ_CCCR5                 0x371
+#define MSR_P4_ALF_ESCR0                0x3ca
+#define MSR_P4_ALF_ESCR1                0x3cb
+#define MSR_P4_BPU_ESCR0                0x3b2
+#define MSR_P4_BPU_ESCR1                0x3b3
+#define MSR_P4_BSU_ESCR0                0x3a0
+#define MSR_P4_BSU_ESCR1                0x3a1
+#define MSR_P4_CRU_ESCR0                0x3b8
+#define MSR_P4_CRU_ESCR1                0x3b9
+#define MSR_P4_CRU_ESCR2                0x3cc
+#define MSR_P4_CRU_ESCR3                0x3cd
+#define MSR_P4_CRU_ESCR4                0x3e0
+#define MSR_P4_CRU_ESCR5                0x3e1
+#define MSR_P4_DAC_ESCR0                0x3a8
+#define MSR_P4_DAC_ESCR1                0x3a9
+#define MSR_P4_FIRM_ESCR0               0x3a4
+#define MSR_P4_FIRM_ESCR1               0x3a5
+#define MSR_P4_FLAME_ESCR0              0x3a6
+#define MSR_P4_FLAME_ESCR1              0x3a7
+#define MSR_P4_FSB_ESCR0                0x3a2
+#define MSR_P4_FSB_ESCR1                0x3a3
+#define MSR_P4_IQ_ESCR0                 0x3ba
+#define MSR_P4_IQ_ESCR1                 0x3bb
+#define MSR_P4_IS_ESCR0                 0x3b4
+#define MSR_P4_IS_ESCR1                 0x3b5
+#define MSR_P4_ITLB_ESCR0               0x3b6
+#define MSR_P4_ITLB_ESCR1               0x3b7
+#define MSR_P4_IX_ESCR0                 0x3c8
+#define MSR_P4_IX_ESCR1                 0x3c9
+#define MSR_P4_MOB_ESCR0                0x3aa
+#define MSR_P4_MOB_ESCR1                0x3ab
+#define MSR_P4_MS_ESCR0                 0x3c0
+#define MSR_P4_MS_ESCR1                 0x3c1
+#define MSR_P4_PMH_ESCR0                0x3ac
+#define MSR_P4_PMH_ESCR1                0x3ad
+#define MSR_P4_RAT_ESCR0                0x3bc
+#define MSR_P4_RAT_ESCR1                0x3bd
+#define MSR_P4_SAAT_ESCR0               0x3ae
+#define MSR_P4_SAAT_ESCR1               0x3af
+#define MSR_P4_SSU_ESCR0                0x3be
+#define MSR_P4_SSU_ESCR1                0x3bf    /* guess: not defined in manual */
+#define MSR_P4_TBPU_ESCR0               0x3c2
+#define MSR_P4_TBPU_ESCR1               0x3c3
+#define MSR_P4_TC_ESCR0                 0x3c4
+#define MSR_P4_TC_ESCR1                 0x3c5
+#define MSR_P4_U2L_ESCR0                0x3b0
+#define MSR_P4_U2L_ESCR1                0x3b1
+
+
+
 
 /* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
 #define MSR_K7_EVNTSEL0            0xC0010000
@@ -196,7 +315,7 @@
 #define MSR_K7_FID_VID_CTL		0xC0010041
 #define MSR_K7_VID_STATUS		0xC0010042
 
-/* K6 MSRs */
+/* AMD Defined MSRs */
 #define MSR_K6_EFER			0xC0000080
 #define MSR_K6_STAR			0xC0000081
 #define MSR_K6_WHCR			0xC0000082
@@ -205,6 +324,28 @@
 #define MSR_K6_PSOR			0xC0000087
 #define MSR_K6_PFIR			0xC0000088
 
+#define MSR_K7_EVNTSEL0			0xC0010000
+#define MSR_K7_EVNTSEL1			0xC0010001
+#define MSR_K7_EVNTSEL2			0xC0010002
+#define MSR_K7_EVNTSEL3			0xC0010003
+#define MSR_K7_PERFCTR0			0xC0010004
+#define MSR_K7_PERFCTR1			0xC0010005
+#define MSR_K7_PERFCTR2			0xC0010006
+#define MSR_K7_PERFCTR3			0xC0010007
+#define MSR_K7_HWCR			0xC0010015
+#define MSR_K7_CLK_CTL			0xC001001b
+#define MSR_K7_FID_VID_CTL		0xC0010041
+#define MSR_K7_FID_VID_STATUS		0xC0010042
+
+/* extended feature register */
+#define MSR_EFER 			0xc0000080
+
+/* EFER bits: */
+
+/* Execute Disable enable */
+#define _EFER_NX			11
+#define EFER_NX				(1<<_EFER_NX)
+
 /* Centaur-Hauls/IDT defined MSRs. */
 #define MSR_IDT_FCR1			0x107
 #define MSR_IDT_FCR2			0x108
@@ -224,6 +365,7 @@
 /* VIA Cyrix defined MSRs*/
 #define MSR_VIA_FCR			0x1107
 #define MSR_VIA_LONGHAUL		0x110a
+#define MSR_VIA_RNG			0x110b
 #define MSR_VIA_BCR2			0x1147
 
 /* Transmeta defined MSRs */
@@ -232,4 +374,6 @@
 #define MSR_TMTA_LRTI_READOUT		0x80868018
 #define MSR_TMTA_LRTI_VOLT_MHZ		0x8086801a
 
+
+
 #endif /* __ASM_MSR_H */
diff -Naurp ../xeno-unstable.bk/xen/include/asm-x86/nmi.h xen/include/asm-x86/nmi.h
--- ../xeno-unstable.bk/xen/include/asm-x86/nmi.h	1969-12-31 18:00:00.000000000 -0600
+++ xen/include/asm-x86/nmi.h	2005-04-15 08:34:24.000000000 -0500
@@ -0,0 +1,26 @@
+/*
+ *  linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+struct xen_regs;
+ 
+typedef int (*nmi_callback_t)(struct xen_regs * regs, int cpu);
+ 
+/** 
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+ 
+/** 
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+ 
+#endif /* ASM_NMI_H */
diff -Naurp ../xeno-unstable.bk/xen/include/public/xen.h xen/include/public/xen.h
--- ../xeno-unstable.bk/xen/include/public/xen.h	2005-04-14 14:56:32.000000000 -0500
+++ xen/include/public/xen.h	2005-04-15 08:34:26.000000000 -0500
@@ -4,6 +4,10 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
 #define __HYPERVISOR_boot_vcpu            24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_pmc_op		  27
 
 /*
  * MULTICALLS
@@ -81,6 +86,7 @@
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error.                    */
 #define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error.                       */
+#define VIRQ_PMC_OVF    6  /* PMC Overflow				  */
 #define NR_VIRQS        7
 
 /*
@@ -245,6 +251,21 @@ struct mmuext_op {
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT		0
+#define PMC_SET_ACTIVE		1
+#define PMC_SET_PASSIVE		2
+#define PMC_RESERVE_COUNTERS	3
+#define PMC_SETUP_EVENTS	4
+#define PMC_ENABLE_VIRQ		5
+#define PMC_START		6
+#define PMC_STOP		7
+#define PMC_DISABLE_VIRQ	8
+#define PMC_RELEASE_COUNTERS	9
+#define PMC_SHUTDOWN		10
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -300,6 +321,8 @@ typedef struct
 /* Support for multi-processor guests. */
 #define MAX_VIRT_CPUS 32
 
+#define MAX_OPROF_EVENTS	32
+#define MAX_OPROF_DOMAINS	25	
 /*
  * Per-VCPU information goes here. This will be cleaned up more when Xen 
  * actually supports multi-VCPU guests.
@@ -413,6 +436,20 @@ typedef struct shared_info_st
 
     arch_shared_info_t arch;
 
+    /* Oprofile structures */
+    u8 event_head;
+    u8 event_tail;
+    struct {
+	u32 eip;
+	u8 mode;
+	u8 event;
+    } PACKED event_log[MAX_OPROF_EVENTS];
+    u8 losing_samples;
+    u64 samples_lost;
+    u32 nmi_restarts;
+    u64 active_samples;
+    u64 passive_samples;
+    u64 other_samples;
 } PACKED shared_info_t;
 
 /*
diff -Naurp ../xeno-unstable.bk/xen/include/xen/smp.h xen/include/xen/smp.h
--- ../xeno-unstable.bk/xen/include/xen/smp.h	2005-04-14 14:56:32.000000000 -0500
+++ xen/include/xen/smp.h	2005-04-15 08:34:28.000000000 -0500
@@ -47,6 +47,18 @@ extern int smp_call_function(
     void (*func) (void *info), void *info, int retry, int wait);
 
 /*
+ * Call a function on all processors
+ */
+static inline int on_each_cpu(void (*func) (void *info), void *info,
+                              int retry, int wait)
+{
+        int ret = 0;
+        ret = smp_call_function(func, info, retry, wait);
+        func(info);
+        return ret;
+}
+
+/*
  * True once the per process idle is forked
  */
 extern int smp_threads_ready;