WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Add xenoprof support

# HG changeset patch
# User ack@xxxxxxxxxxxxxxxxxxxxxxx
# Node ID e049baa9055dfa15bbf5ed0b3c3e56fabedbc386
# Parent  388c59fefaa6add89ca38622f2170cb7c98429ba
Add xenoprof support
Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>

diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32  Thu Apr  6 16:49:21 2006
+++ b/buildconfigs/linux-defconfig_xen0_x86_32  Thu Apr  6 17:58:01 2006
@@ -1231,6 +1231,7 @@
 #
 # Instrumentation Support
 #
+# CONFIG_PROFILING is not set
 # CONFIG_KPROBES is not set
 
 #
diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32  Thu Apr  6 16:49:21 2006
+++ b/buildconfigs/linux-defconfig_xenU_x86_32  Thu Apr  6 17:58:01 2006
@@ -779,6 +779,7 @@
 #
 # Instrumentation Support
 #
+# CONFIG_PROFILING is not set
 # CONFIG_KPROBES is not set
 
 #
diff -r 388c59fefaa6 -r e049baa9055d buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Thu Apr  6 16:49:21 2006
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Thu Apr  6 17:58:01 2006
@@ -2892,6 +2892,7 @@
 #
 # Instrumentation Support
 #
+# CONFIG_PROFILING is not set
 # CONFIG_KPROBES is not set
 
 #
diff -r 388c59fefaa6 -r e049baa9055d linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig    Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig    Thu Apr  6 17:58:01 2006
@@ -1116,9 +1116,7 @@
 menu "Instrumentation Support"
        depends on EXPERIMENTAL
 
-if !X86_XEN
 source "arch/i386/oprofile/Kconfig"
-endif
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Thu Apr  6 17:58:01 2006
@@ -177,6 +177,32 @@
 
 EXPORT_SYMBOL(touch_pte_range);
 
+void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
+{
+       int error;
+       
+       struct vm_struct *vma;
+       vma = get_vm_area (vm_size, VM_IOREMAP);
+      
+       if (vma == NULL) {
+               printk ("ioremap.c,vm_map_xen_pages(): "
+                       "Failed to get VMA area\n");
+               return NULL;
+       }
+
+       error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
+                                             maddr >> PAGE_SHIFT, vm_size,
+                                             prot, DOMID_SELF );
+       if (error == 0) {
+               return vma->addr;
+       } else {
+               printk ("ioremap.c,vm_map_xen_pages(): "
+                       "Failed to map xen shared pages into kernel space\n");
+               return NULL;
+       }
+}
+EXPORT_SYMBOL(vm_map_xen_pages);
+
 /*
  * Does @address reside within a non-highmem page that is local to this virtual
  * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h    Thu Apr 
 6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h    Thu Apr 
 6 17:58:01 2006
@@ -335,6 +335,14 @@
 {
        return _hypercall2(int, callback_op, cmd, arg);
 }
+
+static inline int
+HYPERVISOR_xenoprof_op(
+       int op, unsigned long arg1, unsigned long arg2)
+{
+       return _hypercall3(int, xenoprof_op, op, arg1, arg2);
+}
+
 
 #endif /* __HYPERCALL_H__ */
 
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h  Thu Apr 
 6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h  Thu Apr 
 6 17:58:01 2006
@@ -335,6 +335,13 @@
        int cmd, void *arg)
 {
        return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_xenoprof_op(
+       int op, unsigned long arg1, unsigned long arg2)
+{
+       return _hypercall3(int, xenoprof_op, op, arg1, arg2);
 }
 
 #endif /* __HYPERCALL_H__ */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/Makefile     Thu Apr  6 17:58:01 2006
@@ -2,6 +2,7 @@
 subdir-y += cpu
 subdir-y += genapic
 subdir-y += hvm
+subdir-y += oprofile
 
 subdir-$(x86_32) += x86_32
 subdir-$(x86_64) += x86_64
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/domain.c     Thu Apr  6 17:58:01 2006
@@ -915,6 +915,8 @@
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
+extern void free_xenoprof_pages(struct domain *d);
+
 void domain_relinquish_resources(struct domain *d)
 {
     struct vcpu *v;
@@ -961,6 +963,10 @@
     /* Relinquish every page of memory. */
     relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
+
+    /* Free page used by xen oprofile buffer */
+    free_xenoprof_pages(d);
+
 }
 
 void arch_dump_domain_info(struct domain *d)
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/x86_32/entry.S       Thu Apr  6 17:58:01 2006
@@ -645,6 +645,7 @@
         .long do_nmi_op
         .long do_arch_sched_op
         .long do_callback_op        /* 30 */
+        .long do_xenoprof_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
@@ -681,6 +682,7 @@
         .byte 2 /* do_nmi_op            */
         .byte 2 /* do_arch_sched_op     */
         .byte 2 /* do_callback_op       */  /* 30 */
+        .byte 3 /* do_xenoprof_op       */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/x86_64/entry.S       Thu Apr  6 17:58:01 2006
@@ -553,6 +553,7 @@
         .quad do_nmi_op
         .quad do_arch_sched_op
         .quad do_callback_op        /* 30 */
+        .quad do_xenoprof_op
         .rept NR_hypercalls-((.-hypercall_table)/8)
         .quad do_ni_hypercall
         .endr
@@ -589,6 +590,7 @@
         .byte 2 /* do_nmi_op            */
         .byte 2 /* do_arch_sched_op     */
         .byte 2 /* do_callback_op       */  /* 30 */
+        .byte 3 /* do_xenoprof_op       */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r 388c59fefaa6 -r e049baa9055d xen/include/public/xen.h
--- a/xen/include/public/xen.h  Thu Apr  6 16:49:21 2006
+++ b/xen/include/public/xen.h  Thu Apr  6 17:58:01 2006
@@ -61,6 +61,7 @@
 #define __HYPERVISOR_nmi_op               28
 #define __HYPERVISOR_sched_op             29
 #define __HYPERVISOR_callback_op          30
+#define __HYPERVISOR_xenoprof_op          31
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -77,7 +78,8 @@
 #define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        8
+#define VIRQ_XENOPROF   7  /* XenOprofile interrupt: new sample available */
+#define NR_VIRQS        9
 
 /*
  * MMU-UPDATE REQUESTS
diff -r 388c59fefaa6 -r e049baa9055d xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Apr  6 16:49:21 2006
+++ b/xen/include/xen/sched.h   Thu Apr  6 17:58:01 2006
@@ -14,6 +14,7 @@
 #include <xen/grant_table.h>
 #include <xen/rangeset.h>
 #include <asm/domain.h>
+#include <xen/xenoprof.h>
 
 extern unsigned long volatile jiffies;
 extern rwlock_t domlist_lock;
@@ -155,6 +156,9 @@
 
     /* Control-plane tools handle for this domain. */
     xen_domain_handle_t handle;
+
+    /* pointer to xenoprof data (oprofile support) */
+    xenoprof_t *xenoprof;
 };
 
 struct domain_setup_info
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/i386/oprofile/Makefile
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile  Thu Apr  6 17:58:01 2006
@@ -0,0 +1,16 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+               oprof.o cpu_buffer.o buffer_sync.o \
+               event_buffer.o oprofile_files.o \
+               oprofilefs.o oprofile_stats.o  \
+               timer_int.o )
+
+ifdef CONFIG_X86_XEN
+oprofile-y                             := $(DRIVER_OBJS) xenoprof.o
+else 
+oprofile-y                             := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_X86_LOCAL_APIC)      += nmi_int.o op_model_athlon.o \
+                                          op_model_ppro.o op_model_p4.o
+oprofile-$(CONFIG_X86_IO_APIC)         += nmi_timer_int.o
+endif
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c        Thu Apr  6 
17:58:01 2006
@@ -0,0 +1,395 @@
+/**
+ * @file xenoprof.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon and Jose Renato Santos for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/vmalloc.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/pgtable.h>
+#include <xen/evtchn.h>
+#include "op_counter.h"
+
+#include <xen/interface/xen.h>
+#include <xen/interface/xenoprof.h>
+
+static int xenoprof_start(void);
+static void xenoprof_stop(void);
+
+void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
+
+static int xenoprof_enabled = 0;
+static int num_events = 0;
+static int is_primary = 0;
+
+/* sample buffers shared with Xen */
+xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
+/* Shared buffer area */
+char * shared_buffer;
+/* Number of buffers in shared area (one per VCPU) */
+int nbuf;
+/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
+int ovf_irq[NR_CPUS];
+/* cpu model type string - copied from Xen memory space on XENOPROF_init 
command */
+char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+
+#ifdef CONFIG_PM
+
+static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+{
+       if (xenoprof_enabled == 1)
+               xenoprof_stop();
+       return 0;
+}
+
+
+static int xenoprof_resume(struct sys_device * dev)
+{
+       if (xenoprof_enabled == 1)
+               xenoprof_start();
+       return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+       set_kset_name("oprofile"),
+       .resume         = xenoprof_resume,
+       .suspend        = xenoprof_suspend
+};
+
+
+static struct sys_device device_oprofile = {
+       .id     = 0,
+       .cls    = &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+       int error;
+       if (!(error = sysdev_class_register(&oprofile_sysclass)))
+               error = sysdev_register(&device_oprofile);
+       return error;
+}
+
+
+static void __exit exit_driverfs(void)
+{
+       sysdev_unregister(&device_oprofile);
+       sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+
+static irqreturn_t 
+xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+{
+       int head, tail, size;
+       xenoprof_buf_t * buf;
+       int cpu;
+
+       cpu = smp_processor_id();
+       buf = xenoprof_buf[cpu];
+
+       head = buf->event_head;
+       tail = buf->event_tail;
+       size = buf->event_size;
+
+       if (tail > head) {
+               while (tail < size) {
+                       oprofile_add_pc(buf->event_log[tail].eip,
+                                       buf->event_log[tail].mode,
+                                       buf->event_log[tail].event);
+                       oprofile_samples++;
+                       tail++;
+               }
+               tail = 0;
+       }
+       while (tail < head) {
+               oprofile_add_pc(buf->event_log[tail].eip,
+                               buf->event_log[tail].mode,
+                               buf->event_log[tail].event);
+               oprofile_samples++;
+               tail++;
+       }
+
+       buf->event_tail = tail;
+
+       return IRQ_HANDLED;
+}
+
+
+static void unbind_virq_cpu(void * info)
+{
+       int cpu = smp_processor_id();
+       if (ovf_irq[cpu] >= 0) {
+               unbind_from_irqhandler(ovf_irq[cpu], NULL);
+               ovf_irq[cpu] = -1;
+       }
+}
+
+
+static void unbind_virq(void)
+{
+       on_each_cpu(unbind_virq_cpu, NULL, 0, 1);
+}
+
+
+int bind_virq_error;
+
+static void bind_virq_cpu(void * info)
+{
+       int result;
+       int cpu = smp_processor_id();
+
+       result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
+                                        cpu,
+                                        xenoprof_ovf_interrupt,
+                                        SA_INTERRUPT,
+                                        "xenoprof",
+                                        NULL);
+
+       if (result<0) {
+               bind_virq_error = result;
+               printk("xenoprof.c: binding VIRQ_XENOPROF to IRQ failed on CPU "
+                      "%d\n", cpu);
+       } else {
+               ovf_irq[cpu] = result;
+       }
+}
+
+
+static int bind_virq(void)
+{
+       bind_virq_error = 0;
+       on_each_cpu(bind_virq_cpu, NULL, 0, 1);
+       if (bind_virq_error) {
+               unbind_virq();
+               return bind_virq_error;
+       } else {
+               return 0;
+       }
+}
+
+
+static int xenoprof_setup(void)
+{
+       int ret;
+
+       ret = bind_virq();
+       if (ret)
+               return ret;
+
+       if (is_primary) {
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters,
+                                            (unsigned long)NULL,
+                                            (unsigned long)NULL);
+               if (ret)
+                       goto err;
+
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events,
+                                            (unsigned long)&counter_config,
+                                            (unsigned long)num_events);
+               if (ret)
+                       goto err;
+       }
+
+       ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq,
+                                    (unsigned long)NULL,
+                                    (unsigned long)NULL);
+       if (ret)
+               goto err;
+
+       xenoprof_enabled = 1;
+       return 0;
+ err:
+       unbind_virq();
+       return ret;
+}
+
+
+static void xenoprof_shutdown(void)
+{
+       xenoprof_enabled = 0;
+
+       HYPERVISOR_xenoprof_op(XENOPROF_disable_virq,
+                              (unsigned long)NULL,
+                              (unsigned long)NULL);
+
+       if (is_primary) {
+               HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
+                                      (unsigned long)NULL,
+                                      (unsigned long)NULL);
+       }
+
+       unbind_virq();
+}
+
+
+static int xenoprof_start(void)
+{
+       int ret = 0;
+
+       if (is_primary)
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_start,
+                                            (unsigned long)NULL,
+                                            (unsigned long)NULL);
+       return ret;
+}
+
+
+static void xenoprof_stop(void)
+{
+       if (is_primary)
+               HYPERVISOR_xenoprof_op(XENOPROF_stop,
+                                      (unsigned long)NULL,
+                                      (unsigned long)NULL);
+}
+
+
+static int xenoprof_set_active(int * active_domains,
+                         unsigned int adomains)
+{
+       int ret = 0;
+       if (is_primary)
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active,
+                                            (unsigned long)active_domains,
+                                            (unsigned long)adomains);
+       return ret;
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
+{
+       unsigned int i;
+
+       for (i = 0; i < num_events; ++i) {
+               struct dentry * dir;
+               char buf[2];
+ 
+               snprintf(buf, 2, "%d", i);
+               dir = oprofilefs_mkdir(sb, root, buf);
+               oprofilefs_create_ulong(sb, dir, "enabled",
+                                       &counter_config[i].enabled);
+               oprofilefs_create_ulong(sb, dir, "event",
+                                       &counter_config[i].event);
+               oprofilefs_create_ulong(sb, dir, "count",
+                                       &counter_config[i].count);
+               oprofilefs_create_ulong(sb, dir, "unit_mask",
+                                       &counter_config[i].unit_mask);
+               oprofilefs_create_ulong(sb, dir, "kernel",
+                                       &counter_config[i].kernel);
+               oprofilefs_create_ulong(sb, dir, "user",
+                                       &counter_config[i].user);
+       }
+
+       return 0;
+}
+
+
+struct oprofile_operations xenoprof_ops = {
+       .create_files   = xenoprof_create_files,
+       .set_active     = xenoprof_set_active,
+       .setup          = xenoprof_setup,
+       .shutdown       = xenoprof_shutdown,
+       .start          = xenoprof_start,
+       .stop           = xenoprof_stop
+};
+
+
+/* in order to get driverfs right */
+static int using_xenoprof;
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+       xenoprof_init_result_t result;
+       xenoprof_buf_t * buf;
+       int max_samples = 16;
+       int vm_size;
+       int npages;
+       int i;
+
+       int ret = HYPERVISOR_xenoprof_op(XENOPROF_init,
+                                        (unsigned long)max_samples,
+                                        (unsigned long)&result);
+
+       if (!ret) {
+               pgprot_t prot = __pgprot(_KERNPG_TABLE);
+
+               num_events = result.num_events;
+               is_primary = result.is_primary;
+               nbuf = result.nbuf;
+
+               npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1;
+               vm_size = npages * PAGE_SIZE;
+
+               shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr,
+                                                         vm_size, prot);
+               if (!shared_buffer) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               for (i=0; i< nbuf; i++) {
+                       buf = (xenoprof_buf_t*) 
+                               &shared_buffer[i * result.bufsize];
+                       BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
+                       xenoprof_buf[buf->vcpu_id] = buf;
+               }
+
+               /*  cpu_type is detected by Xen */
+               cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
+               strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
+               xenoprof_ops.cpu_type = cpu_type;
+
+               init_driverfs();
+               using_xenoprof = 1;
+               *ops = xenoprof_ops;
+
+               for (i=0; i<NR_CPUS; i++)
+                       ovf_irq[i] = -1;
+       }
+ out:
+       printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
+              "is_primary %d\n", ret, num_events, is_primary);
+       return ret;
+}
+
+
+void __exit oprofile_arch_exit(void)
+{
+       if (using_xenoprof)
+               exit_driverfs();
+
+       if (shared_buffer) {
+               vunmap(shared_buffer);
+               shared_buffer = NULL;
+       }
+       if (is_primary)
+               HYPERVISOR_xenoprof_op(XENOPROF_shutdown,
+                                      (unsigned long)NULL,
+                                      (unsigned long)NULL);
+}
diff -r 388c59fefaa6 -r e049baa9055d 
linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile        Thu Apr  6 
17:58:01 2006
@@ -0,0 +1,22 @@
+#
+# oprofile for x86-64.
+# Just reuse the one from i386. 
+#
+
+obj-$(CONFIG_OPROFILE) += oprofile.o
+ 
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+       oprof.o cpu_buffer.o buffer_sync.o \
+       event_buffer.o oprofile_files.o \
+       oprofilefs.o oprofile_stats.o \
+       timer_int.o )
+
+ifdef
+OPROFILE-y := xenoprof.o
+else
+OPROFILE-y := init.o backtrace.o
+OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o 
\
+                                    op_model_ppro.o
+OPROFILE-$(CONFIG_X86_IO_APIC)    += nmi_timer_int.o 
+endif
+oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
diff -r 388c59fefaa6 -r e049baa9055d patches/linux-2.6.16/xenoprof-generic.patch
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/patches/linux-2.6.16/xenoprof-generic.patch       Thu Apr  6 17:58:01 2006
@@ -0,0 +1,384 @@
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c 
./drivers/oprofile/buffer_sync.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c    2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/buffer_sync.c   2006-04-03 15:53:05.000000000 +0100
+@@ -6,6 +6,10 @@
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+  *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+  * This is the core of the buffer management. Each
+  * CPU buffer is processed and entered into the
+  * global event buffer. Such processing is necessary
+@@ -275,15 +279,24 @@ static void add_cpu_switch(int i)
+       last_cookie = INVALID_COOKIE;
+ }
+ 
+-static void add_kernel_ctx_switch(unsigned int in_kernel)
++static void add_cpu_mode_switch(unsigned int cpu_mode)
+ {
+       add_event_entry(ESCAPE_CODE);
+-      if (in_kernel)
+-              add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
+-      else
+-              add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
++      switch (cpu_mode) {
++      case CPU_MODE_USER:
++              add_event_entry(USER_ENTER_SWITCH_CODE);
++              break;
++      case CPU_MODE_KERNEL:
++              add_event_entry(KERNEL_ENTER_SWITCH_CODE);
++              break;
++      case CPU_MODE_XEN:
++              add_event_entry(XEN_ENTER_SWITCH_CODE);
++              break;
++      default:
++              break;
++      }
+ }
+- 
++
+ static void
+ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
+ {
+@@ -348,9 +361,9 @@ static int add_us_sample(struct mm_struc
+  * for later lookup from userspace.
+  */
+ static int
+-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
+ {
+-      if (in_kernel) {
++      if (cpu_mode >= CPU_MODE_KERNEL) {
+               add_sample_entry(s->eip, s->event);
+               return 1;
+       } else if (mm) {
+@@ -496,7 +509,7 @@ void sync_buffer(int cpu)
+       struct mm_struct *mm = NULL;
+       struct task_struct * new;
+       unsigned long cookie = 0;
+-      int in_kernel = 1;
++      int cpu_mode = 1;
+       unsigned int i;
+       sync_buffer_state state = sb_buffer_start;
+       unsigned long available;
+@@ -513,12 +526,12 @@ void sync_buffer(int cpu)
+               struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
+  
+               if (is_code(s->eip)) {
+-                      if (s->event <= CPU_IS_KERNEL) {
++                      if (s->event <= CPU_MODE_XEN) {
+                               /* kernel/userspace switch */
+-                              in_kernel = s->event;
++                              cpu_mode = s->event;
+                               if (state == sb_buffer_start)
+                                       state = sb_sample_start;
+-                              add_kernel_ctx_switch(s->event);
++                              add_cpu_mode_switch(s->event);
+                       } else if (s->event == CPU_TRACE_BEGIN) {
+                               state = sb_bt_start;
+                               add_trace_begin();
+@@ -536,7 +549,7 @@ void sync_buffer(int cpu)
+                       }
+               } else {
+                       if (state >= sb_bt_start &&
+-                          !add_sample(mm, s, in_kernel)) {
++                          !add_sample(mm, s, cpu_mode)) {
+                               if (state == sb_bt_start) {
+                                       state = sb_bt_ignore;
+                                       
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c 
./drivers/oprofile/cpu_buffer.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c     2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/cpu_buffer.c    2006-04-03 15:53:05.000000000 +0100
+@@ -6,6 +6,10 @@
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+  *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+  * Each CPU has a local buffer that stores PC value/event
+  * pairs. We also log context switches when we notice them.
+  * Eventually each CPU's buffer is processed into the global
+@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
+                       goto fail;
+  
+               b->last_task = NULL;
+-              b->last_is_kernel = -1;
++              b->last_cpu_mode = -1;
+               b->tracing = 0;
+               b->buffer_size = buffer_size;
+               b->tail_pos = 0;
+@@ -114,7 +118,7 @@ void cpu_buffer_reset(struct oprofile_cp
+        * collected will populate the buffer with proper
+        * values to initialize the buffer
+        */
+-      cpu_buf->last_is_kernel = -1;
++      cpu_buf->last_cpu_mode = -1;
+       cpu_buf->last_task = NULL;
+ }
+ 
+@@ -164,13 +168,13 @@ add_code(struct oprofile_cpu_buffer * bu
+  * because of the head/tail separation of the writer and reader
+  * of the CPU buffer.
+  *
+- * is_kernel is needed because on some architectures you cannot
++ * cpu_mode is needed because on some architectures you cannot
+  * tell if you are in kernel or user space simply by looking at
+- * pc. We tag this in the buffer by generating kernel enter/exit
+- * events whenever is_kernel changes
++ * pc. We tag this in the buffer by generating kernel/user (and xen)
++ *  enter events whenever cpu_mode changes
+  */
+ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
+-                    int is_kernel, unsigned long event)
++                    int cpu_mode, unsigned long event)
+ {
+       struct task_struct * task;
+ 
+@@ -181,16 +185,16 @@ static int log_sample(struct oprofile_cp
+               return 0;
+       }
+ 
+-      is_kernel = !!is_kernel;
++      WARN_ON(cpu_mode > CPU_MODE_XEN);
+ 
+       task = current;
+ 
+       /* notice a switch from user->kernel or vice versa */
+-      if (cpu_buf->last_is_kernel != is_kernel) {
+-              cpu_buf->last_is_kernel = is_kernel;
+-              add_code(cpu_buf, is_kernel);
++      if (cpu_buf->last_cpu_mode != cpu_mode) {
++              cpu_buf->last_cpu_mode = cpu_mode;
++              add_code(cpu_buf, cpu_mode);
+       }
+-
++      
+       /* notice a task switch */
+       if (cpu_buf->last_task != task) {
+               cpu_buf->last_task = task;
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h 
./drivers/oprofile/cpu_buffer.h
+--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h     2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/cpu_buffer.h    2006-04-03 15:53:05.000000000 +0100
+@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
+       volatile unsigned long tail_pos;
+       unsigned long buffer_size;
+       struct task_struct * last_task;
+-      int last_is_kernel;
++      int last_cpu_mode;
+       int tracing;
+       struct op_sample * buffer;
+       unsigned long sample_received;
+@@ -51,7 +51,9 @@ extern struct oprofile_cpu_buffer cpu_bu
+ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
+ 
+ /* transient events for the CPU buffer -> event buffer */
+-#define CPU_IS_KERNEL 1
+-#define CPU_TRACE_BEGIN 2
++#define CPU_MODE_USER    0
++#define CPU_MODE_KERNEL  1
++#define CPU_MODE_XEN     2
++#define CPU_TRACE_BEGIN  3
+ 
+ #endif /* OPROFILE_CPU_BUFFER_H */
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h 
./drivers/oprofile/event_buffer.h
+--- ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h   2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/event_buffer.h  2006-04-03 15:53:05.000000000 +0100
+@@ -29,11 +29,12 @@ void wake_up_buffer_waiter(void);
+ #define CPU_SWITCH_CODE               2
+ #define COOKIE_SWITCH_CODE            3
+ #define KERNEL_ENTER_SWITCH_CODE      4
+-#define KERNEL_EXIT_SWITCH_CODE               5
++#define USER_ENTER_SWITCH_CODE                5
+ #define MODULE_LOADED_CODE            6
+ #define CTX_TGID_CODE                 7
+ #define TRACE_BEGIN_CODE              8
+ #define TRACE_END_CODE                        9
++#define XEN_ENTER_SWITCH_CODE         10
+  
+ #define INVALID_COOKIE ~0UL
+ #define NO_COOKIE 0UL
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.c 
./drivers/oprofile/oprof.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.c  2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/oprof.c 2006-04-03 15:53:05.000000000 +0100
+@@ -5,6 +5,10 @@
+  * @remark Read the file COPYING
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
+  */
+ 
+ #include <linux/kernel.h>
+@@ -19,7 +23,7 @@
+ #include "cpu_buffer.h"
+ #include "buffer_sync.h"
+ #include "oprofile_stats.h"
+- 
++
+ struct oprofile_operations oprofile_ops;
+ 
+ unsigned long oprofile_started;
+@@ -33,6 +37,17 @@ static DECLARE_MUTEX(start_sem);
+  */
+ static int timer = 0;
+ 
++extern unsigned int adomains;
++extern int active_domains[MAX_OPROF_DOMAINS];
++
++int oprofile_set_active(void)
++{
++      if (oprofile_ops.set_active)
++              return oprofile_ops.set_active(active_domains, adomains);
++
++      return -EINVAL;
++}
++
+ int oprofile_setup(void)
+ {
+       int err;
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.h 
./drivers/oprofile/oprof.h
+--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.h  2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/oprof.h 2006-04-03 15:53:05.000000000 +0100
+@@ -35,5 +35,7 @@ void oprofile_create_files(struct super_
+ void oprofile_timer_init(struct oprofile_operations * ops);
+ 
+ int oprofile_set_backtrace(unsigned long depth);
++
++int oprofile_set_active(void);
+  
+ #endif /* OPROF_H */
+diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c 
./drivers/oprofile/oprofile_files.c
+--- ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c 2006-03-20 
05:53:29.000000000 +0000
++++ ./drivers/oprofile/oprofile_files.c        2006-04-03 15:53:05.000000000 
+0100
+@@ -5,15 +5,21 @@
+  * @remark Read the file COPYING
+  *
+  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.     
+  */
+ 
+ #include <linux/fs.h>
+ #include <linux/oprofile.h>
++#include <asm/uaccess.h>
++#include <linux/ctype.h>
+ 
+ #include "event_buffer.h"
+ #include "oprofile_stats.h"
+ #include "oprof.h"
+- 
++
+ unsigned long fs_buffer_size = 131072;
+ unsigned long fs_cpu_buffer_size = 8192;
+ unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+@@ -117,11 +123,79 @@ static ssize_t dump_write(struct file * 
+ static struct file_operations dump_fops = {
+       .write          = dump_write,
+ };
+- 
++
++#define TMPBUFSIZE 512
++
++unsigned int adomains = 0;
++long active_domains[MAX_OPROF_DOMAINS];
++
++static ssize_t adomain_write(struct file * file, char const __user * buf, 
++                           size_t count, loff_t * offset)
++{
++      char tmpbuf[TMPBUFSIZE];
++      char * startp = tmpbuf;
++      char * endp = tmpbuf;
++      int i;
++      unsigned long val;
++      
++      if (*offset)
++              return -EINVAL; 
++      if (!count)
++              return 0;
++      if (count > TMPBUFSIZE - 1)
++              return -EINVAL;
++
++      memset(tmpbuf, 0x0, TMPBUFSIZE);
++
++      if (copy_from_user(tmpbuf, buf, count))
++              return -EFAULT;
++      
++      for (i = 0; i < MAX_OPROF_DOMAINS; i++)
++              active_domains[i] = -1;
++      adomains = 0;
++
++      while (1) {
++              val = simple_strtol(startp, &endp, 0);
++              if (endp == startp)
++                      break;
++              while (ispunct(*endp))
++                      endp++;
++              active_domains[adomains++] = val;
++              if (adomains >= MAX_OPROF_DOMAINS)
++                      break;
++              startp = endp;
++      }
++      if (oprofile_set_active())
++              return -EINVAL; 
++      return count;
++}
++
++static ssize_t adomain_read(struct file * file, char __user * buf, 
++                          size_t count, loff_t * offset)
++{
++      char tmpbuf[TMPBUFSIZE];
++      size_t len = 0;
++      int i;
++      /* This is all screwed up if we run out of space */
++      for (i = 0; i < adomains; i++) 
++              len += snprintf(tmpbuf + len, TMPBUFSIZE - len, 
++                              "%u ", (unsigned int)active_domains[i]);
++      len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
++      return simple_read_from_buffer((void __user *)buf, count, 
++                                     offset, tmpbuf, len);
++}
++
++
++static struct file_operations active_domain_ops = {
++      .read           = adomain_read,
++      .write          = adomain_write,
++};
++
+ void oprofile_create_files(struct super_block * sb, struct dentry * root)
+ {
+       oprofilefs_create_file(sb, root, "enable", &enable_fops);
+       oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
++      oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+       oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
+       oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
+       oprofilefs_create_ulong(sb, root, "buffer_watershed", 
&fs_buffer_watershed);
+diff -pruN ../pristine-linux-2.6.16/include/linux/oprofile.h 
./include/linux/oprofile.h
+--- ../pristine-linux-2.6.16/include/linux/oprofile.h  2006-03-20 
05:53:29.000000000 +0000
++++ ./include/linux/oprofile.h 2006-04-03 15:53:05.000000000 +0100
+@@ -16,6 +16,8 @@
+ #include <linux/types.h>
+ #include <linux/spinlock.h>
+ #include <asm/atomic.h>
++
++#include <xen/interface/xenoprof.h>
+  
+ struct super_block;
+ struct dentry;
+@@ -27,6 +29,8 @@ struct oprofile_operations {
+       /* create any necessary configuration files in the oprofile fs.
+        * Optional. */
+       int (*create_files)(struct super_block * sb, struct dentry * root);
++      /* setup active domains with Xen */
++      int (*set_active)(int *active_domains, unsigned int adomains);
+       /* Do any necessary interrupt setup. Optional. */
+       int (*setup)(void);
+       /* Do any necessary interrupt shutdown. Optional. */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/Makefile
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/Makefile    Thu Apr  6 17:58:01 2006
@@ -0,0 +1,5 @@
+obj-y += xenoprof.o
+obj-y += nmi_int.o
+obj-y += op_model_p4.o
+obj-y += op_model_ppro.o
+obj-y += op_model_athlon.o
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/nmi_int.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/nmi_int.c   Thu Apr  6 17:58:01 2006
@@ -0,0 +1,399 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified for Xen: by Aravind Menon & Jose Renato Santos
+ *   These modifications are:
+ *   Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+#include <xen/delay.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1)
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+extern struct domain *primary_profiler;
+extern struct domain *adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
+
+int nmi_callback(struct cpu_user_regs *regs, int cpu)
+{
+       int xen_mode = 0;
+       int ovf;
+
+       ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+       xen_mode = ring_0(regs);
+       if ( ovf )
+       {
+               if ( is_active(current->domain) )
+               {
+                       if ( !xen_mode )
+                       {
+                               send_guest_vcpu_virq(current, VIRQ_XENOPROF);
+                       } 
+               }
+       }
+       return 1;
+}
+ 
+ 
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+       unsigned int const nr_ctrs = model->num_counters;
+       unsigned int const nr_ctrls = model->num_controls; 
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
+       unsigned int i;
+
+       for (i = 0; i < nr_ctrs; ++i) {
+               rdmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
+       }
+ 
+       for (i = 0; i < nr_ctrls; ++i) {
+               rdmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
+       }
+}
+
+
+static void nmi_save_registers(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       model->fill_in_addresses(msrs);
+       nmi_cpu_save_registers(msrs);
+}
+
+
+static void free_msrs(void)
+{
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               xfree(cpu_msrs[i].counters);
+               cpu_msrs[i].counters = NULL;
+               xfree(cpu_msrs[i].controls);
+               cpu_msrs[i].controls = NULL;
+       }
+}
+
+
+static int allocate_msrs(void)
+{
+       int success = 1;
+       size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+       size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               if (!test_bit(i, &cpu_online_map))
+                       continue;
+
+               cpu_msrs[i].counters = xmalloc_bytes(counters_size);
+               if (!cpu_msrs[i].counters) {
+                       success = 0;
+                       break;
+               }
+               cpu_msrs[i].controls = xmalloc_bytes(controls_size);
+               if (!cpu_msrs[i].controls) {
+                       success = 0;
+                       break;
+               }
+       }
+
+       if (!success)
+               free_msrs();
+
+       return success;
+}
+
+
+static void nmi_cpu_setup(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       model->setup_ctrs(msrs);
+}
+
+
+int nmi_setup_events(void)
+{
+       on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+       return 0;
+}
+
+int nmi_reserve_counters(void)
+{
+       if (!allocate_msrs())
+               return -ENOMEM;
+
+       /* We walk a thin line between law and rape here.
+        * We need to be careful to install our NMI handler
+        * without actually triggering any NMIs as this will
+        * break the core code horrifically.
+        */
+       if (reserve_lapic_nmi() < 0) {
+               free_msrs();
+               return -EBUSY;
+       }
+       /* We need to serialize save and setup for HT because the subset
+        * of msrs are distinct for save and setup operations
+        */
+       on_each_cpu(nmi_save_registers, NULL, 0, 1);
+       return 0;
+}
+
+int nmi_enable_virq(void)
+{
+       set_nmi_callback(nmi_callback);
+       return 0;
+}
+
+
+void nmi_disable_virq(void)
+{
+       unset_nmi_callback();
+} 
+
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+       unsigned int const nr_ctrs = model->num_counters;
+       unsigned int const nr_ctrls = model->num_controls; 
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
+       unsigned int i;
+
+       for (i = 0; i < nr_ctrls; ++i) {
+               wrmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
+       }
+ 
+       for (i = 0; i < nr_ctrs; ++i) {
+               wrmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
+       }
+}
+ 
+
+static void nmi_cpu_shutdown(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       nmi_restore_registers(msrs);
+}
+
+ 
+void nmi_release_counters(void)
+{
+       on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+       release_lapic_nmi();
+       free_msrs();
+}
+
+ 
+static void nmi_cpu_start(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs const * msrs = &cpu_msrs[cpu];
+       saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       model->start(msrs);
+}
+ 
+
+int nmi_start(void)
+{
+       on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+       return 0;
+}
+ 
+ 
+static void nmi_cpu_stop(void * dummy)
+{
+       unsigned int v;
+       int cpu = smp_processor_id();
+       struct op_msrs const * msrs = &cpu_msrs[cpu];
+       model->stop(msrs);
+
+       /* restoring APIC_LVTPC can trigger an apic error because the delivery
+        * mode and vector nr combination can be illegal. That's by design: on
+        * power on apic lvt contain a zero vector nr which are legal only for
+        * NMI delivery mode. So inhibit apic err before restoring lvtpc
+        */
+       if ( !(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+            || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) )
+       {
+               printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
+               mdelay(5000);
+       }
+       v = apic_read(APIC_LVTERR);
+       apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+       apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+       apic_write(APIC_LVTERR, v);
+}
+ 
+ 
+void nmi_stop(void)
+{
+       on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(char * cpu_type)
+{ 
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 4)
+               return 0;
+
+#ifndef CONFIG_SMP
+       strncpy (cpu_type, "i386/p4", XENOPROF_CPU_TYPE_SIZE - 1);
+       model = &op_p4_spec;
+       return 1;
+#else
+       switch (smp_num_siblings) {
+               case 1:
+                       strncpy (cpu_type, "i386/p4", 
+                                XENOPROF_CPU_TYPE_SIZE - 1);
+                       model = &op_p4_spec;
+                       return 1;
+
+               case 2:
+                       strncpy (cpu_type, "i386/p4-ht", 
+                                XENOPROF_CPU_TYPE_SIZE - 1);
+                       model = &op_p4_ht2_spec;
+                       return 1;
+       }
+#endif
+       printk("Xenoprof ERROR: P4 HyperThreading detected with > 2 threads\n");
+
+       return 0;
+}
+
+
+static int __init ppro_init(char *cpu_type)
+{
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 0xd)
+               return 0;
+
+       if (cpu_model == 9) {
+               strncpy (cpu_type, "i386/p6_mobile", XENOPROF_CPU_TYPE_SIZE - 
1);
+       } else if (cpu_model > 5) {
+               strncpy (cpu_type, "i386/piii", XENOPROF_CPU_TYPE_SIZE - 1);
+       } else if (cpu_model > 2) {
+               strncpy (cpu_type, "i386/pii", XENOPROF_CPU_TYPE_SIZE - 1);
+       } else {
+               strncpy (cpu_type, "i386/ppro", XENOPROF_CPU_TYPE_SIZE - 1);
+       }
+
+       model = &op_ppro_spec;
+       return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary, char *cpu_type)
+{
+       __u8 vendor = current_cpu_data.x86_vendor;
+       __u8 family = current_cpu_data.x86;
+       int prim = 0;
+ 
+       if (!cpu_has_apic)
+               return -ENODEV;
+
+       if (primary_profiler == NULL) {
+               /* For now, only dom0 can be the primary profiler */
+               if (current->domain->domain_id == 0) {
+                       primary_profiler = current->domain;
+                       prim = 1;
+               }
+       }
+ 
+       /* Make sure string is NULL terminated */
+       cpu_type[XENOPROF_CPU_TYPE_SIZE - 1] = 0;
+
+       switch (vendor) {
+               case X86_VENDOR_AMD:
+                       /* Needs to be at least an Athlon (or hammer in 32bit 
mode) */
+
+                       switch (family) {
+                       default:
+                               return -ENODEV;
+                       case 6:
+                               model = &op_athlon_spec;
+                               strncpy (cpu_type, "i386/athlon", 
+                                        XENOPROF_CPU_TYPE_SIZE - 1);
+                               break;
+                       case 0xf:
+                               model = &op_athlon_spec;
+                               /* Actually it could be i386/hammer too, but 
give
+                                  user space an consistent name. */
+                               strncpy (cpu_type, "x86-64/hammer", 
+                                        XENOPROF_CPU_TYPE_SIZE - 1);
+                               break;
+                       }
+                       break;
+ 
+               case X86_VENDOR_INTEL:
+                       switch (family) {
+                               /* Pentium IV */
+                               case 0xf:
+                                       if (!p4_init(cpu_type))
+                                               return -ENODEV;
+                                       break;
+
+                               /* A P6-class processor */
+                               case 6:
+                                       if (!ppro_init(cpu_type))
+                                               return -ENODEV;
+                                       break;
+
+                               default:
+                                       return -ENODEV;
+                       }
+                       break;
+
+               default:
+                       return -ENODEV;
+       }
+
+       *num_events = model->num_counters;
+       *is_primary = prim;
+
+       return 0;
+}
+
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_counter.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_counter.h        Thu Apr  6 17:58:01 2006
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_athlon.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_model_athlon.c   Thu Apr  6 17:58:01 2006
@@ -0,0 +1,168 @@
+/**
+ * @file op_model_athlon.h
+ * athlon / K7 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} 
while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned 
int)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} 
while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} 
while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                              int mode, int event);
+ 
+static void athlon_fill_in_addresses(struct op_msrs * const msrs)
+{
+       msrs->counters[0].addr = MSR_K7_PERFCTR0;
+       msrs->counters[1].addr = MSR_K7_PERFCTR1;
+       msrs->counters[2].addr = MSR_K7_PERFCTR2;
+       msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+       msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+       msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+       msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+       msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+}
+
+ 
+static void athlon_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+ 
+       /* clear all counters */
+       for (i = 0 ; i < NUM_CONTROLS; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_CLEAR(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+       
+       /* avoid a false detection of ctr overflows in NMI handler */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               CTR_WRITE(1, msrs, i);
+       }
+
+       /* enable active counters */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+
+                       CTR_WRITE(counter_config[i].count, msrs, i);
+
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_CLEAR(low);
+                       CTRL_SET_ENABLE(low);
+                       CTRL_SET_USR(low, counter_config[i].user);
+                       CTRL_SET_KERN(low, counter_config[i].kernel);
+                       CTRL_SET_UM(low, counter_config[i].unit_mask);
+                       CTRL_SET_EVENT(low, counter_config[i].event);
+                       CTRL_WRITE(low, high, msrs, i);
+               } else {
+                       reset_value[i] = 0;
+               }
+       }
+}
+
+ 
+static int athlon_check_ctrs(unsigned int const cpu,
+                             struct op_msrs const * const msrs,
+                             struct cpu_user_regs * const regs)
+
+{
+       unsigned int low, high;
+       int i;
+       int ovf = 0;
+       unsigned long eip = regs->eip;
+       int mode = 0;
+
+       if (guest_kernel_mode(current, regs))
+               mode = 1;
+       else if (ring_0(regs))
+               mode = 2;
+
+       for (i = 0 ; i < NUM_COUNTERS; ++i) {
+               CTR_READ(low, high, msrs, i);
+               if (CTR_OVERFLOWED(low)) {
+                       xenoprof_log_event(current, eip, mode, i);
+                       CTR_WRITE(reset_value[i], msrs, i);
+                       ovf = 1;
+               }
+       }
+
+       /* See op_model_ppro.c */
+       return ovf;
+}
+
+ 
+static void athlon_start(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+       for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+               if (reset_value[i]) {
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_SET_ACTIVE(low);
+                       CTRL_WRITE(low, high, msrs, i);
+               }
+       }
+}
+
+
+static void athlon_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       int i;
+
+       /* Subtle: stop on all counters to avoid race with
+        * setting our pm callback */
+       for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_SET_INACTIVE(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+}
+
+
+struct op_x86_model_spec const op_athlon_spec = {
+       .num_counters = NUM_COUNTERS,
+       .num_controls = NUM_CONTROLS,
+       .fill_in_addresses = &athlon_fill_in_addresses,
+       .setup_ctrs = &athlon_setup_ctrs,
+       .check_ctrs = &athlon_check_ctrs,
+       .start = &athlon_start,
+       .stop = &athlon_stop
+};
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_p4.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_model_p4.c       Thu Apr  6 17:58:01 2006
@@ -0,0 +1,739 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+       if (smp_num_siblings == 2)
+               num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+       return smp_num_siblings == 2 ? 2 : 1;
+#else
+       return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+       int virt_counter;
+       int counter_address;
+       int cccr_address;
+};
+
+struct p4_event_binding {
+       int escr_select;  /* value to put in CCCR */
+       int event_select; /* value to put in ESCR */
+       struct {
+               int virt_counter; /* for this counter... */
+               int escr_address; /* use this ESCR       */
+       } bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+       { CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+       { CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+       { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+       { CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+       { CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+       { CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+       { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+       { CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS       NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+       MSR_P4_BPU_CCCR1,       MSR_P4_BPU_CCCR3,
+       MSR_P4_MS_CCCR1,        MSR_P4_MS_CCCR3,
+       MSR_P4_FLAME_CCCR1,     MSR_P4_FLAME_CCCR3,
+       MSR_P4_IQ_CCCR0,        MSR_P4_IQ_CCCR1,
+       MSR_P4_IQ_CCCR2,        MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+       
+       { /* BRANCH_RETIRED */
+               0x05, 0x06, 
+               { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+       
+       { /* MISPRED_BRANCH_RETIRED */
+               0x04, 0x03, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+       
+       { /* TC_DELIVER_MODE */
+               0x01, 0x01,
+               { { CTR_MS_0, MSR_P4_TC_ESCR0},  
+                 { CTR_MS_2, MSR_P4_TC_ESCR1} }
+       },
+       
+       { /* BPU_FETCH_REQUEST */
+               0x00, 0x03, 
+               { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+                 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+       },
+
+       { /* ITLB_REFERENCE */
+               0x03, 0x18,
+               { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+       },
+
+       { /* MEMORY_CANCEL */
+               0x05, 0x02,
+               { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+       },
+
+       { /* MEMORY_COMPLETE */
+               0x02, 0x08,
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* LOAD_PORT_REPLAY */
+               0x02, 0x04, 
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* STORE_PORT_REPLAY */
+               0x02, 0x05,
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* MOB_LOAD_REPLAY */
+               0x02, 0x03,
+               { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+       },
+
+       { /* PAGE_WALK_TYPE */
+               0x04, 0x01,
+               { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+                 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+       },
+
+       { /* BSQ_CACHE_REFERENCE */
+               0x07, 0x0c, 
+               { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+                 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+       },
+
+       { /* IOQ_ALLOCATION */
+               0x06, 0x03, 
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { 0, 0 } }
+       },
+
+       { /* IOQ_ACTIVE_ENTRIES */
+               0x06, 0x1a, 
+               { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+                 { 0, 0 } }
+       },
+
+       { /* FSB_DATA_ACTIVITY */
+               0x06, 0x17, 
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+       },
+
+       { /* BSQ_ALLOCATION */
+               0x07, 0x05, 
+               { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+                 { 0, 0 } }
+       },
+
+       { /* BSQ_ACTIVE_ENTRIES */
+               0x07, 0x06,
+               { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+                 { 0, 0 } }
+       },
+
+       { /* X87_ASSIST */
+               0x05, 0x03, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* SSE_INPUT_ASSIST */
+               0x01, 0x34,
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* PACKED_SP_UOP */
+               0x01, 0x08, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* PACKED_DP_UOP */
+               0x01, 0x0c, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* SCALAR_SP_UOP */
+               0x01, 0x0a, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* SCALAR_DP_UOP */
+               0x01, 0x0e,
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* 64BIT_MMX_UOP */
+               0x01, 0x02, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* 128BIT_MMX_UOP */
+               0x01, 0x1a, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* X87_FP_UOP */
+               0x01, 0x04, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* X87_SIMD_MOVES_UOP */
+               0x01, 0x2e, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* MACHINE_CLEAR */
+               0x05, 0x02, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* GLOBAL_POWER_EVENTS */
+               0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+       },
+  
+       { /* TC_MS_XFER */
+               0x00, 0x05, 
+               { { CTR_MS_0, MSR_P4_MS_ESCR0},
+                 { CTR_MS_2, MSR_P4_MS_ESCR1} }
+       },
+
+       { /* UOP_QUEUE_WRITES */
+               0x00, 0x09,
+               { { CTR_MS_0, MSR_P4_MS_ESCR0},
+                 { CTR_MS_2, MSR_P4_MS_ESCR1} }
+       },
+
+       { /* FRONT_END_EVENT */
+               0x05, 0x08,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* EXECUTION_EVENT */
+               0x05, 0x0c,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* REPLAY_EVENT */
+               0x05, 0x09,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* INSTR_RETIRED */
+               0x04, 0x02, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+
+       { /* UOPS_RETIRED */
+               0x04, 0x01,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+
+       { /* UOP_TYPE */    
+               0x02, 0x02, 
+               { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+                 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+       },
+
+       { /* RETIRED_MISPRED_BRANCH_TYPE */
+               0x02, 0x05, 
+               { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+                 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+       },
+
+       { /* RETIRED_BRANCH_TYPE */
+               0x02, 0x04,
+               { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+                 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+       }
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, 
(escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, 
(escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, 
(low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, 
(low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), 
(h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), 
-1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+       int cpu = smp_processor_id();
+       return (cpu != first_cpu(cpu_sibling_map[cpu]));
+#endif 
+       return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+       unsigned int i; 
+       unsigned int addr, stag;
+
+       setup_num_counters();
+       stag = get_stagger();
+
+       /* the counter registers we pay attention to */
+       for (i = 0; i < num_counters; ++i) {
+               msrs->counters[i].addr = 
+                       p4_counters[VIRT_CTR(stag, i)].counter_address;
+       }
+
+       /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+       /* 18 CCCR registers */
+       for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+            addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+       
+       /* 43 ESCR registers in three or four discontiguous group */
+       for (addr = MSR_P4_BSU_ESCR0 + stag;
+            addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+
+       /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+        * to avoid special case in nmi_{save|restore}_registers() */
+       if (boot_cpu_data.x86_model >= 0x3) {
+               for (addr = MSR_P4_BSU_ESCR0 + stag;
+                    addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+                       msrs->controls[i].addr = addr;
+               }
+       } else {
+               for (addr = MSR_P4_IQ_ESCR0 + stag;
+                    addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+                       msrs->controls[i].addr = addr;
+               }
+       }
+
+       for (addr = MSR_P4_RAT_ESCR0 + stag;
+            addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+       
+       for (addr = MSR_P4_MS_ESCR0 + stag;
+            addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+               msrs->controls[i].addr = addr;
+       }
+       
+       for (addr = MSR_P4_IX_ESCR0 + stag;
+            addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+               msrs->controls[i].addr = addr;
+       }
+
+       /* there are 2 remaining non-contiguously located ESCRs */
+
+       if (num_counters == NUM_COUNTERS_NON_HT) {              
+               /* standard non-HT CPUs handle both remaining ESCRs*/
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+       } else if (stag == 0) {
+               /* HT CPUs give the first remainder to the even thread, as
+                  the 32nd control register */
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+       } else {
+               /* and two copies of the second to the odd thread,
+                  for the 22st and 23nd control registers */
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+       }
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+       int i;
+       int const maxbind = 2;
+       unsigned int cccr = 0;
+       unsigned int escr = 0;
+       unsigned int high = 0;
+       unsigned int counter_bit;
+       struct p4_event_binding *ev = NULL;
+       unsigned int stag;
+
+       stag = get_stagger();
+       
+       /* convert from counter *number* to counter *bit* */
+       counter_bit = 1 << VIRT_CTR(stag, ctr);
+       
+       /* find our event binding structure. */
+       if (counter_config[ctr].event <= 0 || counter_config[ctr].event > 
NUM_EVENTS) {
+               printk(KERN_ERR 
+                      "oprofile: P4 event code 0x%lx out of range\n", 
+                      counter_config[ctr].event);
+               return;
+       }
+       
+       ev = &(p4_events[counter_config[ctr].event - 1]);
+       
+       for (i = 0; i < maxbind; i++) {
+               if (ev->bindings[i].virt_counter & counter_bit) {
+
+                       /* modify ESCR */
+                       ESCR_READ(escr, high, ev, i);
+                       ESCR_CLEAR(escr);
+                       if (stag == 0) {
+                               ESCR_SET_USR_0(escr, counter_config[ctr].user);
+                               ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+                       } else {
+                               ESCR_SET_USR_1(escr, counter_config[ctr].user);
+                               ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+                       }
+                       ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+                       ESCR_SET_EVENT_MASK(escr, 
counter_config[ctr].unit_mask);                       
+                       ESCR_WRITE(escr, high, ev, i);
+                      
+                       /* modify CCCR */
+                       CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+                       CCCR_CLEAR(cccr);
+                       CCCR_SET_REQUIRED_BITS(cccr);
+                       CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+                       if (stag == 0) {
+                               CCCR_SET_PMI_OVF_0(cccr);
+                       } else {
+                               CCCR_SET_PMI_OVF_1(cccr);
+                       }
+                       CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+                       return;
+               }
+       }
+
+       printk(KERN_ERR 
+              "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+              counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int i;
+       unsigned int low, high;
+       unsigned int addr;
+       unsigned int stag;
+
+       stag = get_stagger();
+
+       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+       if (! MISC_PMC_ENABLED_P(low)) {
+               printk(KERN_ERR "oprofile: P4 PMC not available\n");
+               return;
+       }
+
+       /* clear the cccrs we will use */
+       for (i = 0 ; i < num_counters ; i++) {
+               rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+               CCCR_CLEAR(low);
+               CCCR_SET_REQUIRED_BITS(low);
+               wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+       }
+
+       /* clear cccrs outside our concern */
+       for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+               rdmsr(p4_unused_cccr[i], low, high);
+               CCCR_CLEAR(low);
+               CCCR_SET_REQUIRED_BITS(low);
+               wrmsr(p4_unused_cccr[i], low, high);
+       }
+
+       /* clear all escrs (including those outside our concern) */
+       for (addr = MSR_P4_BSU_ESCR0 + stag;
+            addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+               wrmsr(addr, 0, 0);
+       }
+
+       /* On older models clear also MSR_P4_IQ_ESCR0/1 */
+       if (boot_cpu_data.x86_model < 0x3) {
+               wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+               wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+       }
+
+       for (addr = MSR_P4_RAT_ESCR0 + stag;
+            addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+               wrmsr(addr, 0, 0);
+       }
+       
+       for (addr = MSR_P4_MS_ESCR0 + stag;
+            addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+               wrmsr(addr, 0, 0);
+       }
+       
+       for (addr = MSR_P4_IX_ESCR0 + stag;
+            addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+               wrmsr(addr, 0, 0);
+       }
+
+       if (num_counters == NUM_COUNTERS_NON_HT) {              
+               wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+               wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+       } else if (stag == 0) {
+               wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+       } else {
+               wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+       }               
+       
+       /* setup all counters */
+       for (i = 0 ; i < num_counters ; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+                       pmc_setup_one_p4_counter(i);
+                       CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+               } else {
+                       reset_value[i] = 0;
+               }
+       }
+}
+
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                              int mode, int event);
+
+static int p4_check_ctrs(unsigned int const cpu,
+                         struct op_msrs const * const msrs,
+                         struct cpu_user_regs * const regs)
+{
+       unsigned long ctr, low, high, stag, real;
+       int i;
+       int ovf = 0;
+       unsigned long eip = regs->eip;
+       int mode = 0;
+
+       if (guest_kernel_mode(current, regs))
+               mode = 1;
+       else if (ring_0(regs))
+               mode = 2;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               
+               if (!reset_value[i]) 
+                       continue;
+
+               /* 
+                * there is some eccentricity in the hardware which
+                * requires that we perform 2 extra corrections:
+                *
+                * - check both the CCCR:OVF flag for overflow and the
+                *   counter high bit for un-flagged overflows.
+                *
+                * - write the counter back twice to ensure it gets
+                *   updated properly.
+                * 
+                * the former seems to be related to extra NMIs happening
+                * during the current NMI; the latter is reported as errata
+                * N15 in intel doc 249199-029, pentium 4 specification
+                * update, though their suggested work-around does not
+                * appear to solve the problem.
+                */
+               
+               real = VIRT_CTR(stag, i);
+
+               CCCR_READ(low, high, real);
+               CTR_READ(ctr, high, real);
+               if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+                       xenoprof_log_event(current, eip, mode, i);
+                       CTR_WRITE(reset_value[i], real);
+                       CCCR_CLEAR_OVF(low);
+                       CCCR_WRITE(low, high, real);
+                       CTR_WRITE(reset_value[i], real);
+                       ovf = 1;
+               }
+       }
+
+       /* P4 quirk: you have to re-unmask the apic vector */
+       apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+       return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+       unsigned int low, high, stag;
+       int i;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               if (!reset_value[i])
+                       continue;
+               CCCR_READ(low, high, VIRT_CTR(stag, i));
+               CCCR_SET_ENABLE(low);
+               CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+       }
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low, high, stag;
+       int i;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               CCCR_READ(low, high, VIRT_CTR(stag, i));
+               CCCR_SET_DISABLE(low);
+               CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+       }
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+       .num_counters = NUM_COUNTERS_HT2,
+       .num_controls = NUM_CONTROLS_HT2,
+       .fill_in_addresses = &p4_fill_in_addresses,
+       .setup_ctrs = &p4_setup_ctrs,
+       .check_ctrs = &p4_check_ctrs,
+       .start = &p4_start,
+       .stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+       .num_counters = NUM_COUNTERS_NON_HT,
+       .num_controls = NUM_CONTROLS_NON_HT,
+       .fill_in_addresses = &p4_fill_in_addresses,
+       .setup_ctrs = &p4_setup_ctrs,
+       .check_ctrs = &p4_check_ctrs,
+       .start = &p4_start,
+       .stop = &p4_stop
+};
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_model_ppro.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_model_ppro.c     Thu Apr  6 17:58:01 2006
@@ -0,0 +1,153 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} 
while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 
-1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+       msrs->counters[0].addr = MSR_P6_PERFCTR0;
+       msrs->counters[1].addr = MSR_P6_PERFCTR1;
+       
+       msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+       msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+
+       /* clear all counters */
+       for (i = 0 ; i < NUM_CONTROLS; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_CLEAR(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+       
+       /* avoid a false detection of ctr overflows in NMI handler */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               CTR_WRITE(1, msrs, i);
+       }
+
+       /* enable active counters */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+
+                       CTR_WRITE(counter_config[i].count, msrs, i);
+
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_CLEAR(low);
+                       CTRL_SET_ENABLE(low);
+                       CTRL_SET_USR(low, counter_config[i].user);
+                       CTRL_SET_KERN(low, counter_config[i].kernel);
+                       CTRL_SET_UM(low, counter_config[i].unit_mask);
+                       CTRL_SET_EVENT(low, counter_config[i].event);
+                       CTRL_WRITE(low, high, msrs, i);
+               }
+       }
+}
+
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                              int mode, int event);
+ 
+static int ppro_check_ctrs(unsigned int const cpu,
+                           struct op_msrs const * const msrs,
+                           struct cpu_user_regs * const regs)
+{
+       unsigned int low, high;
+       int i;
+       int ovf = 0;
+       unsigned long eip = regs->eip;
+       int mode = 0;
+
+       if ( guest_kernel_mode(current, regs) ) 
+               mode = 1;
+       else if ( ring_0(regs) )
+               mode = 2;
+ 
+       for (i = 0 ; i < NUM_COUNTERS; ++i) {
+               CTR_READ(low, high, msrs, i);
+               if (CTR_OVERFLOWED(low)) {
+                       xenoprof_log_event(current, eip, mode, i);
+                       CTR_WRITE(reset_value[i], msrs, i);
+                       ovf = 1;
+               }
+       }
+
+       /* Only P6 based Pentium M need to re-unmask the apic vector but it
+        * doesn't hurt other P6 variant */
+       apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+       return ovf;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       CTRL_READ(low, high, msrs, 0);
+       CTRL_SET_ACTIVE(low);
+       CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       CTRL_READ(low, high, msrs, 0);
+       CTRL_SET_INACTIVE(low);
+       CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+struct op_x86_model_spec const op_ppro_spec = {
+       .num_counters = NUM_COUNTERS,
+       .num_controls = NUM_CONTROLS,
+       .fill_in_addresses = &ppro_fill_in_addresses,
+       .setup_ctrs = &ppro_setup_ctrs,
+       .check_ctrs = &ppro_check_ctrs,
+       .start = &ppro_start,
+       .stop = &ppro_stop
+};
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/op_x86_model.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/op_x86_model.h      Thu Apr  6 17:58:01 2006
@@ -0,0 +1,51 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+       unsigned int high;
+       unsigned int low;
+};
+
+struct op_msr {
+       unsigned long addr;
+       struct op_saved_msr saved;
+};
+
+struct op_msrs {
+       struct op_msr * counters;
+       struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+       unsigned int const num_counters;
+       unsigned int const num_controls;
+       void (*fill_in_addresses)(struct op_msrs * const msrs);
+       void (*setup_ctrs)(struct op_msrs const * const msrs);
+       int (*check_ctrs)(unsigned int const cpu, 
+                         struct op_msrs const * const msrs,
+                         struct cpu_user_regs * const regs);
+       void (*start)(struct op_msrs const * const msrs);
+       void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -r 388c59fefaa6 -r e049baa9055d xen/arch/x86/oprofile/xenoprof.c
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/arch/x86/oprofile/xenoprof.c  Thu Apr  6 17:58:01 2006
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon & Jose Renato Santos
+ *            (email: xenoprof@xxxxxxxxxxxxx)
+ */
+
+#include <xen/sched.h>
+#include <public/xenoprof.h>
+
+#include "op_counter.h"
+
+/* Limit amount of pages used for shared buffer (per domain) */
+#define MAX_OPROF_SHARED_PAGES 32
+
+int active_domains[MAX_OPROF_DOMAINS];
+int active_ready[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int activated = 0;
+struct domain *primary_profiler = NULL;
+int xenoprof_state = XENOPROF_IDLE;
+
+u64 total_samples = 0;
+u64 invalid_buffer_samples = 0;
+u64 corrupted_buffer_samples = 0;
+u64 lost_samples = 0;
+u64 active_samples = 0;
+u64 idle_samples = 0;
+u64 others_samples = 0;
+
+
+extern int nmi_init(int *num_events, int *is_primary, char *cpu_type);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+int is_active(struct domain *d)
+{
+    xenoprof_t *x = d->xenoprof;
+    if ( x )
+    {
+        if ( x->domain_type == XENOPROF_DOMAIN_ACTIVE )
+            return 1;
+        else
+            return 0;
+    }
+    else
+        return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+    return is_active(d);
+}
+
+static void xenoprof_reset_stat(void)
+{
+    total_samples = 0;
+    invalid_buffer_samples = 0;
+    corrupted_buffer_samples = 0;
+    lost_samples = 0;
+    active_samples = 0;
+    idle_samples = 0;
+    others_samples = 0;
+
+    return;
+}
+
+static void xenoprof_reset_buf(struct domain *d)
+{
+    int j;
+    xenoprof_buf_t *buf;
+
+    if ( !d->xenoprof )
+    {
+        printk("xenoprof_reset_buf: ERROR - Unexpected Xenoprof NULL pointer 
\n");
+        return;
+    }
+
+    for ( j=0; j<MAX_VIRT_CPUS; j++ )
+    {
+        buf = d->xenoprof->vcpu[j].buffer;
+        if ( buf )
+        {
+            buf->event_head = 0;
+            buf->event_tail = 0;
+        }
+    }
+}
+
+int active_index(struct domain *d)
+{
+    int i;
+    int id;
+
+    id = d->domain_id;
+    for ( i=0; i<adomains; i++ )
+        if ( active_domains[i] == id )
+        {
+            return i;
+        }
+    return -1;
+}
+
+int set_active(struct domain *d)
+{
+    int ind;
+    xenoprof_t *x;
+
+    ind = active_index(d);
+    if ( ind <0 )
+        return -EPERM;
+
+    x = d->xenoprof;
+    if ( x )
+    {
+        x->domain_ready = 1;
+        x->domain_type = XENOPROF_DOMAIN_ACTIVE;
+        active_ready[ind] = 1;
+        activated++;
+        return 0;
+    }
+    else
+        return -EPERM;
+}
+
+int reset_active(struct domain *d)
+{
+    int ind;
+    xenoprof_t *x;
+
+    ind = active_index(d);
+    if ( ind <0 )
+        return -EPERM;
+
+    x = d->xenoprof;
+    if ( x )
+    {
+        x->domain_ready = 0;
+        x->domain_type = XENOPROF_DOMAIN_IGNORED;
+        active_ready[ind] = 0;
+        activated--;
+        if ( activated <= 0 )
+            adomains = 0;
+        return 0;
+    }
+    else
+        return -EPERM;
+}
+
+int set_active_domains(int num)
+{
+    int primary;
+    int i;
+    struct domain *d;
+
+    /* reset any existing active domains from previous runs */
+    for ( i=0; i<adomains; i++ )
+    {
+        if ( active_ready[i] )
+        {
+            d = find_domain_by_id(active_domains[i]);
+            if ( d )
+            {
+                reset_active(d);
+                put_domain(d);
+            }
+        }
+    }
+
+    adomains=num;
+
+    /* Add primary profiler to list of active domains if not there yet */
+    primary = active_index(primary_profiler);
+    if ( primary == -1 )
+    {
+        /* return if there is no space left on list */
+        if ( num >= MAX_OPROF_DOMAINS )
+            return -E2BIG;
+        else
+        {
+            active_domains[num] = primary_profiler->domain_id;
+            num++;
+        }
+    }
+
+    adomains = num;
+    activated = 0;
+
+    for ( i=0; i<adomains; i++ )
+    {
+        active_ready[i] = 0;
+    }
+
+    return 0;
+}
+
+void xenoprof_log_event(struct vcpu *vcpu, unsigned long eip, int mode, int 
event)
+{
+    xenoprof_vcpu_t *v;
+    xenoprof_buf_t *buf;
+    int head;
+    int tail;
+    int size;
+
+
+    total_samples++;
+
+    /* ignore samples of un-monitored domains */
+    /* Count samples in idle separate from other unmonitored domains */
+    if ( !is_profiled(vcpu->domain) )
+    {
+      others_samples++;
+      return;
+    }
+
+    v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id];
+
+    /* Sanity check. Should never happen */ 
+    if ( !v->buffer )
+    {
+        invalid_buffer_samples++;
+        return;
+    }
+
+    buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer;
+
+    head = buf->event_head;
+    tail = buf->event_tail;
+    size = v->event_size;
+
+    /* make sure indexes in shared buffer are sane */
+    if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) )
+    {
+        corrupted_buffer_samples++;
+        return;
+    }
+
+    if ( (head == tail - 1) || (head == size - 1 && tail == 0) )
+    {
+        buf->lost_samples++;
+        lost_samples++;
+    }
+    else
+    {
+        buf->event_log[head].eip = eip;
+        buf->event_log[head].mode = mode;
+        buf->event_log[head].event = event;
+        head++;
+        if ( head >= size )
+            head = 0;
+        buf->event_head = head;
+        active_samples++;
+        if ( mode == 0 )
+            buf->user_samples++;
+        else if ( mode == 1 )
+            buf->kernel_samples++;
+        else
+            buf->xen_samples++;
+    }
+}
+
+char *alloc_xenoprof_buf(struct domain *d, int npages)
+{
+    char *rawbuf;
+    int i, order;
+
+    /* allocate pages to store sample buffer shared with domain */
+    order = get_order_from_pages(npages);
+    rawbuf =  alloc_xenheap_pages(order);
+    if( rawbuf == NULL )
+    {
+        printk("alloc_xenoprof_buf(): memory allocation failed\n");
+        return 0;
+    }
+
+    /* Share pages so that kernel can map it */
+    for ( i=0; i<npages; i++ )
+    {
+        share_xen_page_with_guest(virt_to_page(rawbuf + i * PAGE_SIZE), 
+                                 d, XENSHARE_writable);
+    }
+
+    return rawbuf;
+}
+
+int alloc_xenoprof_struct(struct domain *d, int max_samples)
+{
+    struct vcpu *v;
+    int nvcpu, npages, bufsize, max_bufsize;
+    int i;
+
+    d->xenoprof = xmalloc(xenoprof_t);
+
+    if ( !d->xenoprof )
+    {
+        printk ("alloc_xenoprof_struct(): memory "
+                "allocation (xmalloc) failed\n");
+        return -ENOMEM;
+    }
+
+    memset(d->xenoprof, 0, sizeof(*d->xenoprof));
+
+    nvcpu = 0;
+    for_each_vcpu(d, v)
+        nvcpu++;
+
+    /* reduce buffer size if necessary to limit pages allocated */
+    bufsize = sizeof(xenoprof_buf_t) +
+        (max_samples - 1) * sizeof(struct event_log);
+    max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu;
+    if ( bufsize > max_bufsize )
+    {
+        bufsize = max_bufsize;
+        max_samples = ( (max_bufsize - sizeof(xenoprof_buf_t)) /
+                        sizeof(struct event_log) ) + 1;
+    }
+
+    npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
+    d->xenoprof->rawbuf = alloc_xenoprof_buf(d, npages);
+    if ( !d->xenoprof->rawbuf )
+    {
+        xfree(d->xenoprof);
+        d->xenoprof = NULL;
+        return -ENOMEM;
+    }
+
+    d->xenoprof->npages = npages;
+    d->xenoprof->nbuf = nvcpu;
+    d->xenoprof->bufsize = bufsize;
+    d->xenoprof->domain_ready = 0;
+    d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED;
+
+    /* Update buffer pointers for active vcpus */
+    i=0;
+    for_each_vcpu(d, v)
+    {
+        d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples;
+        d->xenoprof->vcpu[v->vcpu_id].buffer =
+            (xenoprof_buf_t *)&d->xenoprof->rawbuf[i * bufsize];
+        d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples;
+        d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id;
+
+        i++;
+        /* in the unlikely case that the number of active vcpus changes */
+        if ( i >= nvcpu )
+            break;
+    }
+
+    return 0;
+}
+
+void free_xenoprof_pages(struct domain *d)
+{
+    xenoprof_t *x;
+    int order;
+
+    x = d->xenoprof;
+
+    if ( x )
+    {
+        if ( x->rawbuf )
+        {
+            order = get_order_from_pages(x->npages);
+            free_xenheap_pages(x->rawbuf, order);
+        }
+        xfree(x);
+        d->xenoprof = NULL;
+    }
+}
+
+int xenoprof_init(int max_samples, xenoprof_init_result_t *init_result)
+{
+    xenoprof_init_result_t result;
+    int is_primary, num_events;
+    struct domain *d = current->domain;
+    int ret;
+
+    ret = nmi_init(&num_events, &is_primary, result.cpu_type);
+    if ( is_primary )
+        primary_profiler = current->domain;
+
+    if ( ret < 0 )
+        goto err;
+
+    /* we allocate xenoprof struct and buffers only at first time 
+       xenoprof_init is called. Memory is then kept until domain is destroyed 
*/
+    if ( !d->xenoprof )
+    {
+        if ( (ret = alloc_xenoprof_struct(d, max_samples)) < 0 )
+            goto err;
+    }
+
+    xenoprof_reset_buf(d);
+
+    d->xenoprof->domain_type  = XENOPROF_DOMAIN_IGNORED;
+    d->xenoprof->domain_ready = 0;
+    d->xenoprof->is_primary = is_primary;
+
+    result.is_primary = is_primary;
+    result.num_events = num_events;
+    result.nbuf = d->xenoprof->nbuf;
+    result.bufsize = d->xenoprof->bufsize;
+    result.buf_maddr = __pa(d->xenoprof->rawbuf);
+
+    if ( copy_to_user((void *)init_result, (void *)&result, sizeof(result)) )
+    {
+        ret = -EFAULT;
+        goto err;
+    }
+
+    return ret;
+
+ err:
+    if ( primary_profiler == current->domain )
+        primary_profiler = NULL;
+    return ret;
+}
+
+#define PRIV_OP(op) ( (op == XENOPROF_set_active) \
+                   || (op == XENOPROF_reserve_counters) \
+                   || (op == XENOPROF_setup_events) \
+                   || (op == XENOPROF_start) \
+                   || (op == XENOPROF_stop) \
+                   || (op == XENOPROF_release_counters) \
+                   || (op == XENOPROF_shutdown))
+
+int do_xenoprof_op(int op, unsigned long arg1, unsigned long arg2)
+{
+    int ret = 0;
+
+    if ( PRIV_OP(op) && current->domain != primary_profiler )
+    {
+        printk("xenoprof: dom %d denied privileged operation %d\n",
+               current->domain->domain_id, op);
+        return -EPERM;
+    }
+
+    switch ( op )
+    {
+    case XENOPROF_init:
+        ret = xenoprof_init((int)arg1, (xenoprof_init_result_t *)arg2);
+        break;
+
+    case XENOPROF_set_active:
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        if ( arg2 > MAX_OPROF_DOMAINS )
+            return -E2BIG;
+        if ( copy_from_user((void *)&active_domains, 
+                            (void *)arg1, arg2*sizeof(int)) )
+            return -EFAULT;
+        ret = set_active_domains(arg2);
+        break;
+
+    case XENOPROF_reserve_counters:
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        ret = nmi_reserve_counters();
+        if ( !ret )
+            xenoprof_state = XENOPROF_COUNTERS_RESERVED;
+        break;
+
+    case XENOPROF_setup_events:
+        if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED )
+            return -EPERM;
+        if ( adomains == 0 )
+        {
+            set_active_domains(0);
+        }
+
+        if ( copy_from_user((void *)&counter_config, (void *)arg1, 
+                            arg2 * sizeof(struct op_counter_config)) )
+            return -EFAULT;
+        ret = nmi_setup_events();
+        if ( !ret )
+            xenoprof_state = XENOPROF_READY;
+        break;
+
+    case XENOPROF_enable_virq:
+        if ( current->domain == primary_profiler )
+        {
+            nmi_enable_virq();
+            xenoprof_reset_stat();
+        }
+        xenoprof_reset_buf(current->domain);
+        ret = set_active(current->domain);
+        break;
+
+    case XENOPROF_start:
+        if ( (xenoprof_state == XENOPROF_READY) &&
+             (activated == adomains) )
+        {
+            ret = nmi_start();
+        }
+        else 
+            ret= -EPERM;
+
+        if ( !ret )
+            xenoprof_state = XENOPROF_PROFILING;
+        break;
+
+    case XENOPROF_stop:
+        if ( xenoprof_state != XENOPROF_PROFILING )
+            return -EPERM;
+        nmi_stop();
+        xenoprof_state = XENOPROF_READY;
+        break;
+
+    case XENOPROF_disable_virq:
+        if ( (xenoprof_state == XENOPROF_PROFILING) && 
+             (is_active(current->domain)) )
+            return -EPERM;
+        ret = reset_active(current->domain);
+        break;
+
+    case XENOPROF_release_counters:
+        if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) ||
+             (xenoprof_state == XENOPROF_READY) )
+        {
+            xenoprof_state = XENOPROF_IDLE;
+            nmi_release_counters();
+            nmi_disable_virq();
+        }
+        else
+            ret = -EPERM;
+        break;
+
+    case XENOPROF_shutdown:
+        if ( xenoprof_state == XENOPROF_IDLE )
+        {
+            activated = 0;
+            adomains=0;
+            primary_profiler = NULL;
+            ret = 0;
+        }
+        else 
+            ret = -EPERM;
+        break;
+
+    default:
+        ret = -EINVAL;
+    }
+
+    if ( ret < 0 )
+        printk("xenoprof: operation %d failed for dom %d (status : %d)\n",
+               op, current->domain->domain_id, ret);
+
+    return ret;
+}
diff -r 388c59fefaa6 -r e049baa9055d xen/include/public/xenoprof.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/include/public/xenoprof.h     Thu Apr  6 17:58:01 2006
@@ -0,0 +1,72 @@
+/******************************************************************************
+ * xenoprof.h
+ * 
+ * Interface for enabling system wide profiling based on hardware performance
+ * counters
+ * 
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_PUBLIC_XENOPROF_H__
+#define __XEN_PUBLIC_XENOPROF_H__
+
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define XENOPROF_init               0
+#define XENOPROF_set_active         1
+#define XENOPROF_reserve_counters   3
+#define XENOPROF_setup_events       4
+#define XENOPROF_enable_virq        5
+#define XENOPROF_start              6
+#define XENOPROF_stop               7
+#define XENOPROF_disable_virq       8
+#define XENOPROF_release_counters   9
+#define XENOPROF_shutdown          10
+
+#define MAX_OPROF_EVENTS    32
+#define MAX_OPROF_DOMAINS   25 
+#define XENOPROF_CPU_TYPE_SIZE 64
+
+/* Xenoprof performance events (not Xen events) */
+struct event_log {
+    uint64_t eip;
+    uint8_t mode;
+    uint8_t event;
+};
+
+/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
+typedef struct xenoprof_buf {
+    uint32_t event_head;
+    uint32_t event_tail;
+    uint32_t event_size;
+    uint32_t vcpu_id;
+    uint64_t xen_samples;
+    uint64_t kernel_samples;
+    uint64_t user_samples;
+    uint64_t lost_samples;
+    struct event_log event_log[1];
+} xenoprof_buf_t;
+
+typedef struct xenoprof_init_result {
+    int32_t  num_events;
+    int32_t  is_primary;
+    int32_t  nbuf;
+    int32_t  bufsize;
+    uint64_t buf_maddr;
+    char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+} xenoprof_init_result_t;
+
+
+#endif /* __XEN_PUBLIC_XENOPROF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 388c59fefaa6 -r e049baa9055d xen/include/xen/xenoprof.h
--- /dev/null   Thu Apr  6 16:49:21 2006
+++ b/xen/include/xen/xenoprof.h        Thu Apr  6 17:58:01 2006
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * xenoprof.h
+ * 
+ * Xenoprof: Xenoprof enables performance profiling in Xen
+ * 
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_XENOPROF_H__
+#define __XEN_XENOPROF_H__
+
+#include <public/xenoprof.h>
+
+#define XENOPROF_DOMAIN_IGNORED    0
+#define XENOPROF_DOMAIN_ACTIVE     1
+
+#define XENOPROF_IDLE              0
+#define XENOPROF_COUNTERS_RESERVED 1
+#define XENOPROF_READY             2
+#define XENOPROF_PROFILING         3
+
+
+typedef struct xenoprof_vcpu {
+    int event_size;
+    xenoprof_buf_t *buffer;
+} xenoprof_vcpu_t;
+
+typedef struct xenoprof {
+    char* rawbuf;
+    int npages;
+    int nbuf;
+    int bufsize;
+    int domain_type;
+    int domain_ready;
+    int is_primary;
+    xenoprof_vcpu_t vcpu [MAX_VIRT_CPUS];
+} xenoprof_t;
+
+#endif  /* __XEN__XENOPROF_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Add xenoprof support, Xen patchbot -unstable <=