diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 2005-04-15 08:27:45.000000000 -0500 +++ linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 2005-04-15 09:01:45.000000000 -0500 @@ -75,6 +75,12 @@ CONFIG_OBSOLETE_MODPARM=y CONFIG_KMOD=y # +# OProfile options +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# # X86 Processor Configuration # CONFIG_XENARCH="i386" diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 2005-04-15 08:27:45.000000000 -0500 +++ linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 2005-04-15 09:01:45.000000000 -0500 @@ -71,6 +71,12 @@ CONFIG_OBSOLETE_MODPARM=y CONFIG_KMOD=y # +# OProfile options +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# # X86 Processor Configuration # CONFIG_XENARCH="i386" diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/Makefile --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile 2005-04-14 14:56:31.000000000 -0500 +++ linux-2.6.11-xen-sparse/arch/xen/i386/Makefile 2005-04-15 09:01:46.000000000 -0500 @@ -78,7 +78,6 @@ core-y += arch/xen/i386/kernel/ \ drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/ drivers-$(CONFIG_PCI) += arch/xen/i386/pci/ # must be linked after kernel/ -drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ drivers-$(CONFIG_PM) += arch/i386/power/ # for clean diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/Kconfig linux-2.6.11-xen-sparse/arch/xen/Kconfig --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-04-15 08:27:45.000000000 -0500 +++ linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-04-15 09:01:47.000000000 -0500 @@ -188,3 +188,6 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/xen/oprofile/Kconfig" + diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c 2005-04-15 08:27:45.000000000 -0500 +++ linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c 2005-04-15 09:01:47.000000000 -0500 @@ -44,9 +44,14 @@ #include #include +int virq_to_phys(int virq); + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) EXPORT_SYMBOL(force_evtchn_callback); EXPORT_SYMBOL(evtchn_do_upcall); +EXPORT_SYMBOL(virq_to_phys); +EXPORT_SYMBOL(bind_virq_to_irq); +EXPORT_SYMBOL(unbind_virq_from_irq); #endif /* @@ -58,7 +63,6 @@ static spinlock_t irq_mapping_update_loc /* IRQ <-> event-channel mappings. */ static int evtchn_to_irq[NR_EVENT_CHANNELS]; static int irq_to_evtchn[NR_IRQS]; - /* IRQ <-> VIRQ mapping. */ DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]); @@ -150,6 +154,15 @@ static int find_unbound_irq(void) return irq; } +int virq_to_phys(int virq) +{ + int cpu = smp_processor_id(); + + if (virq >= NR_VIRQS) + return -1; + return per_cpu(virq_to_irq, cpu)[virq]; +} + int bind_virq_to_irq(int virq) { evtchn_op_t op; diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/Makefile linux-2.6.11-xen-sparse/arch/xen/Makefile --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/Makefile 2005-04-14 14:56:31.000000000 -0500 +++ linux-2.6.11-xen-sparse/arch/xen/Makefile 2005-04-15 09:01:48.000000000 -0500 @@ -22,6 +22,8 @@ UTS_MACHINE := $(XENARCH) core-y += arch/xen/kernel/ +drivers-$(CONFIG_OPROFILE) += arch/xen/oprofile/ + include/.asm-ignore: include/asm @rm -f include/.asm-ignore @mv include/asm include/.asm-ignore diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/Kconfig linux-2.6.11-xen-sparse/arch/xen/oprofile/Kconfig --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/Kconfig 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/arch/xen/oprofile/Kconfig 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,23 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + +endmenu + diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/Makefile linux-2.6.11-xen-sparse/arch/xen/oprofile/Makefile --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/arch/xen/oprofile/Makefile 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,9 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) pmc.o diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/op_counter.h linux-2.6.11-xen-sparse/arch/xen/oprofile/op_counter.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/op_counter.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/arch/xen/oprofile/op_counter.h 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,29 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/pmc.c linux-2.6.11-xen-sparse/arch/xen/oprofile/pmc.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/oprofile/pmc.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/arch/xen/oprofile/pmc.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,323 @@ +/** + * @file nmi_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "op_counter.h" + +static int pmc_start(void); +static void pmc_stop(void); + +/* 0 == registered but off, 1 == registered and on */ +static int pmc_enabled = 0; +static int num_events = 0; +static int is_primary = 0; + +#ifdef CONFIG_PM + +static int pmc_suspend(struct sys_device *dev, u32 state) +{ + if (pmc_enabled == 1) + pmc_stop(); + return 0; +} + + +static int pmc_resume(struct sys_device *dev) +{ + if (pmc_enabled == 1) + pmc_start(); + return 0; +} + + +static struct sysdev_class oprofile_sysclass = { + set_kset_name("oprofile"), + .resume = pmc_resume, + .suspend = pmc_suspend, +}; + + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + + +static int __init init_driverfs(void) +{ + int error; + if (!(error = sysdev_class_register(&oprofile_sysclass))) + error = sysdev_register(&device_oprofile); + return error; +} + + +static void __exit exit_driverfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_driverfs() do { } while (0) +#define exit_driverfs() do { } while (0) +#endif /* CONFIG_PM */ + +unsigned long long oprofile_samples = 0; + +static irqreturn_t pmc_ovf_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + int head, tail; + shared_info_t *s = HYPERVISOR_shared_info; + int cpu = smp_processor_id(); + + head = s->event_head; + tail = s->event_tail; + + /* oprofile_add_sample will also handle samples from other domains */ + + //printk(KERN_INFO "pmc_ovf_interrupt: head %d, tail %d\n", head, tail); + + if (tail > head) { + while (tail < MAX_OPROF_EVENTS) { + oprofile_add_sample(s->event_log[tail].eip, + s->event_log[tail].mode, + s->event_log[tail].event, cpu); + /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", + s->event_log[tail].eip, s->event_log[tail].mode, + s->event_log[tail].event);*/ + oprofile_samples++; + tail++; + } + tail = 0; + } + while (tail < head) { + oprofile_add_sample(s->event_log[tail].eip, + s->event_log[tail].mode, s->event_log[tail].event, cpu); + /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", + s->event_log[tail].eip, s->event_log[tail].mode, + s->event_log[tail].event);*/ + oprofile_samples++; + tail++; + } + + s->event_tail = tail; + s->losing_samples = 0; + + return IRQ_HANDLED; +} + +extern int virq_to_phys(int virq); + +static int pmc_setup(void) +{ + int ret; + + if ((ret = request_irq(bind_virq_to_irq(VIRQ_PMC_OVF), + pmc_ovf_interrupt, SA_INTERRUPT, "pmc_ovf", NULL))) + goto release_irq; + + if (is_primary) { + ret = HYPERVISOR_pmc_op(PMC_RESERVE_COUNTERS, (unsigned int)NULL, (unsigned int)NULL); + //printk(KERN_INFO "pmc_setup: reserve_counters: ret %d\n", ret); + + ret = HYPERVISOR_pmc_op(PMC_SETUP_EVENTS, (unsigned int)&counter_config, (unsigned int)num_events); + //printk(KERN_INFO "pmc_setup: setup_events: ret %d\n", ret); + } + + ret = HYPERVISOR_pmc_op(PMC_ENABLE_VIRQ, (unsigned int)NULL, (unsigned int)NULL); + //printk(KERN_INFO "pmc_setup: enable_virq: ret %d\n", ret); + + pmc_enabled = 1; + return 0; + +release_irq: + free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL); + unbind_virq_from_irq(VIRQ_PMC_OVF); + + return ret; +} + +static void pmc_shutdown(void) +{ + int ret; + pmc_enabled = 0; + + ret = HYPERVISOR_pmc_op(PMC_DISABLE_VIRQ, (unsigned int)NULL, (unsigned int)NULL); + //printk(KERN_INFO "pmc_shutdown: disable_virq: ret %d\n", ret); + + if (is_primary) { + ret = HYPERVISOR_pmc_op(PMC_RELEASE_COUNTERS, (unsigned int)NULL, (unsigned int)NULL); + //printk(KERN_INFO "pmc_shutdown: release_counters: ret %d\n", ret); + } + + free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL); + unbind_virq_from_irq(VIRQ_PMC_OVF); +} + +static int pmc_start(void) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_START, (unsigned int)NULL, (unsigned int)NULL); + //printk(KERN_INFO "pmc_start: ret %d\n", ret); + return ret; +} + +static void pmc_stop(void) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_STOP, (unsigned int)NULL, (unsigned int)NULL); + //printk(KERN_INFO "pmc_stop: ret %d\n", ret); + printk(KERN_INFO "pmc: oprofile samples %llu, active %llu, passive %llu, other %llu, buffering losses %llu, NMI restarted %d\n", + oprofile_samples, HYPERVISOR_shared_info->active_samples, HYPERVISOR_shared_info->passive_samples, + HYPERVISOR_shared_info->other_samples, HYPERVISOR_shared_info->samples_lost, HYPERVISOR_shared_info->nmi_restarts); +} + +static int pmc_set_active(int *active_domains, unsigned int adomains) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_SET_ACTIVE, + (unsigned int)active_domains, (unsigned int)adomains); + return ret; +} + +static int pmc_set_passive(int *passive_domains, unsigned int pdomains) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_SET_PASSIVE, + (unsigned int)passive_domains, (unsigned int)pdomains); + return ret; +} + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int pmc_create_files(struct super_block * sb, struct dentry * root) +{ + unsigned int i; + + for (i = 0; i < num_events; ++i) { + struct dentry * dir; + char buf[2]; + + snprintf(buf, 2, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + } + + //printk(KERN_INFO "pmc_create_files\n"); + return 0; +} + + +struct oprofile_operations pmc_ops = { + .create_files = pmc_create_files, + .set_active = pmc_set_active, + .set_passive = pmc_set_passive, + .setup = pmc_setup, + .shutdown = pmc_shutdown, + .start = pmc_start, + .stop = pmc_stop +}; + + +static void __init p4_init(void) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 3) + pmc_ops.cpu_type = "type_unknown"; + + /* We always use a non-HT system because that goves us more events */ + pmc_ops.cpu_type = "i386/p4"; +} + + +static void __init ppro_init(void) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 0xd) + pmc_ops.cpu_type = "type_unknown"; + + if (cpu_model == 9) { + pmc_ops.cpu_type = "i386/p6_mobile"; + } else if (cpu_model > 5) { + pmc_ops.cpu_type = "i386/piii"; + } else if (cpu_model > 2) { + pmc_ops.cpu_type = "i386/pii"; + } else { + pmc_ops.cpu_type = "i386/ppro"; + } +} + +/* in order to get driverfs right */ +static int using_pmc; + +int __init oprofile_arch_init(struct oprofile_operations ** ops) +{ + int ret = HYPERVISOR_pmc_op(PMC_INIT, (unsigned int)&num_events, (unsigned int)&is_primary); + + if (!ret) { + __u8 vendor = current_cpu_data.x86_vendor; + __u8 family = current_cpu_data.x86; + + if (vendor == X86_VENDOR_INTEL) { + switch (family) { + /* Pentium IV */ + case 0xf: + p4_init(); + break; + /* A P6-class processor */ + case 6: + ppro_init(); + break; + default: + pmc_ops.cpu_type = "type_unknown"; + } + } else pmc_ops.cpu_type = "type_unknown"; + + init_driverfs(); + using_pmc = 1; + *ops = &pmc_ops; + } + printk (KERN_INFO "oprofile_arch_init: ret %d, events %d, is_primary %d\n", ret, num_events, is_primary); + return ret; +} + + +void __exit oprofile_arch_exit(void) +{ + if (using_pmc) + exit_driverfs(); + if (is_primary) + HYPERVISOR_pmc_op(PMC_SHUTDOWN, (unsigned int )NULL, (unsigned int)NULL); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.c linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,540 @@ +/** + * @file buffer_sync.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary + * in several circumstances, mentioned below. + * + * The processing does the job of converting the + * transitory EIP value into a persistent dentry/offset + * value that the profiler can record at its leisure. + * + * See fs/dcookies.c for a description of the dentry/offset + * objects. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "oprofile_stats.h" +#include "event_buffer.h" +#include "cpu_buffer.h" +#include "buffer_sync.h" + +static LIST_HEAD(dying_tasks); +static LIST_HEAD(dead_tasks); +cpumask_t marked_cpus = CPU_MASK_NONE; +static spinlock_t task_mortuary = SPIN_LOCK_UNLOCKED; +void process_task_mortuary(void); + + +/* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs + * does the task eventually get freed, because by then + * we are sure we will not reference it again. + */ +static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) +{ + struct task_struct * task = (struct task_struct *)data; + spin_lock(&task_mortuary); + list_add(&task->tasks, &dying_tasks); + spin_unlock(&task_mortuary); + return NOTIFY_OK; +} + + +/* The task is on its way out. A sync of the buffer means we can catch + * any remaining samples for this task. + */ +static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) +{ + /* To avoid latency problems, we only process the current CPU, + * hoping that most samples for the task are on this CPU + */ + sync_buffer(smp_processor_id()); + return 0; +} + + +/* The task is about to try a do_munmap(). We peek at what it's going to + * do, and if it's an executable region, process the samples first, so + * we don't lose any. This does not have to be exact, it's a QoI issue + * only. + */ +static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) +{ + unsigned long addr = (unsigned long)data; + struct mm_struct * mm = current->mm; + struct vm_area_struct * mpnt; + + down_read(&mm->mmap_sem); + + mpnt = find_vma(mm, addr); + if (mpnt && mpnt->vm_file && (mpnt->vm_flags & VM_EXEC)) { + up_read(&mm->mmap_sem); + /* To avoid latency problems, we only process the current CPU, + * hoping that most samples for the task are on this CPU + */ + sync_buffer(smp_processor_id()); + return 0; + } + + up_read(&mm->mmap_sem); + return 0; +} + + +/* We need to be told about new modules so we don't attribute to a previously + * loaded module, or drop the samples on the floor. + */ +static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) +{ +#ifdef CONFIG_MODULES + if (val != MODULE_STATE_COMING) + return 0; + + /* FIXME: should we process all CPU buffers ? */ + down(&buffer_sem); + add_event_entry(ESCAPE_CODE); + add_event_entry(MODULE_LOADED_CODE); + up(&buffer_sem); +#endif + return 0; +} + + +static struct notifier_block task_free_nb = { + .notifier_call = task_free_notify, +}; + +static struct notifier_block task_exit_nb = { + .notifier_call = task_exit_notify, +}; + +static struct notifier_block munmap_nb = { + .notifier_call = munmap_notify, +}; + +static struct notifier_block module_load_nb = { + .notifier_call = module_load_notify, +}; + + +static void end_sync(void) +{ + end_cpu_work(); + /* make sure we don't leak task structs */ + process_task_mortuary(); + process_task_mortuary(); +} + + +int sync_start(void) +{ + int err; + + start_cpu_work(); + + err = task_handoff_register(&task_free_nb); + if (err) + goto out1; + err = profile_event_register(PROFILE_TASK_EXIT, &task_exit_nb); + if (err) + goto out2; + err = profile_event_register(PROFILE_MUNMAP, &munmap_nb); + if (err) + goto out3; + err = register_module_notifier(&module_load_nb); + if (err) + goto out4; + +out: + return err; +out4: + profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); +out3: + profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); +out2: + task_handoff_unregister(&task_free_nb); +out1: + end_sync(); + goto out; +} + + +void sync_stop(void) +{ + unregister_module_notifier(&module_load_nb); + profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); + profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); + task_handoff_unregister(&task_free_nb); + end_sync(); +} + + +/* Optimisation. We can manage without taking the dcookie sem + * because we cannot reach this code without at least one + * dcookie user still being registered (namely, the reader + * of the event buffer). */ +static inline unsigned long fast_get_dcookie(struct dentry * dentry, + struct vfsmount * vfsmnt) +{ + unsigned long cookie; + + if (dentry->d_cookie) + return (unsigned long)dentry; + get_dcookie(dentry, vfsmnt, &cookie); + return cookie; +} + + +/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, + * which corresponds loosely to "application name". This is + * not strictly necessary but allows oprofile to associate + * shared-library samples with particular applications + */ +static unsigned long get_exec_dcookie(struct mm_struct * mm) +{ + unsigned long cookie = 0; + struct vm_area_struct * vma; + + if (!mm) + goto out; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (!(vma->vm_flags & VM_EXECUTABLE)) + continue; + cookie = fast_get_dcookie(vma->vm_file->f_dentry, + vma->vm_file->f_vfsmnt); + break; + } + +out: + return cookie; +} + + +/* Convert the EIP value of a sample into a persistent dentry/offset + * pair that can then be added to the global event buffer. We make + * sure to do this lookup before a mm->mmap modification happens so + * we don't lose track. + */ +static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) +{ + unsigned long cookie = 0; + struct vm_area_struct * vma; + + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + + if (!vma->vm_file) + continue; + + if (addr < vma->vm_start || addr >= vma->vm_end) + continue; + + cookie = fast_get_dcookie(vma->vm_file->f_dentry, + vma->vm_file->f_vfsmnt); + *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start; + break; + } + + return cookie; +} + + +static unsigned long last_cookie = ~0UL; + +static void add_cpu_switch(int i) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(CPU_SWITCH_CODE); + add_event_entry(i); + last_cookie = ~0UL; +} + +static void add_mode_switch(unsigned int cpu_mode) +{ + add_event_entry(ESCAPE_CODE); + if (cpu_mode == 0) + add_event_entry(USER_ENTER_SWITCH_CODE); + else if (cpu_mode == 1) + add_event_entry(KERNEL_ENTER_SWITCH_CODE); + else + add_event_entry(XEN_ENTER_SWITCH_CODE); +} + +static void add_dom_switch(int domain_id) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(DOMAIN_SWITCH_CODE); + add_event_entry(domain_id); +} + +static void +add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(CTX_SWITCH_CODE); + add_event_entry(task->pid); + add_event_entry(cookie); + /* Another code for daemon back-compat */ + add_event_entry(ESCAPE_CODE); + add_event_entry(CTX_TGID_CODE); + add_event_entry(task->tgid); +} + + +static void add_cookie_switch(unsigned long cookie) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(COOKIE_SWITCH_CODE); + add_event_entry(cookie); +} + + +static void add_sample_entry(unsigned long offset, unsigned long event) +{ + add_event_entry(offset); + add_event_entry(event); +} + + +static void add_us_sample(struct mm_struct * mm, struct op_sample * s) +{ + unsigned long cookie; + off_t offset; + + cookie = lookup_dcookie(mm, s->eip, &offset); + + if (!cookie) { + atomic_inc(&oprofile_stats.sample_lost_no_mapping); + return; + } + + if (cookie != last_cookie) { + add_cookie_switch(cookie); + last_cookie = cookie; + } + + add_sample_entry(offset, s->event); +} + + +/* Add a sample to the global event buffer. If possible the + * sample is converted into a persistent dentry/offset pair + * for later lookup from userspace. + */ +static void add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) +{ + if (cpu_mode) { + add_sample_entry(s->eip, s->event); + } else if (mm) { + add_us_sample(mm, s); + } else { + atomic_inc(&oprofile_stats.sample_lost_no_mm); + } +} + + +static void release_mm(struct mm_struct * mm) +{ + if (!mm) + return; + up_read(&mm->mmap_sem); + mmput(mm); +} + + +static struct mm_struct * take_tasks_mm(struct task_struct * task) +{ + struct mm_struct * mm = get_task_mm(task); + if (mm) + down_read(&mm->mmap_sem); + return mm; +} + + +static inline int is_ctx_switch(unsigned long val) +{ + return val == ~0UL; +} + +static inline int is_dom_switch(unsigned long val) +{ + return val == ~1UL; +} + + +/* "acquire" as many cpu buffer slots as we can */ +static unsigned long get_slots(struct oprofile_cpu_buffer * b) +{ + unsigned long head = b->head_pos; + unsigned long tail = b->tail_pos; + + /* + * Subtle. This resets the persistent last_task + * and in_kernel values used for switching notes. + * BUT, there is a small window between reading + * head_pos, and this call, that means samples + * can appear at the new head position, but not + * be prefixed with the notes for switching + * kernel mode or a task switch. This small hole + * can lead to mis-attribution or samples where + * we don't know if it's in the kernel or not, + * at the start of an event buffer. + */ + cpu_buffer_reset(b); + + if (head >= tail) + return head - tail; + + return head + (b->buffer_size - tail); +} + + +static void increment_tail(struct oprofile_cpu_buffer * b) +{ + unsigned long new_tail = b->tail_pos + 1; + + rmb(); + + if (new_tail < (b->buffer_size)) + b->tail_pos = new_tail; + else + b->tail_pos = 0; +} + + +/* Move tasks along towards death. Any tasks on dead_tasks + * will definitely have no remaining references in any + * CPU buffers at this point, because we use two lists, + * and to have reached the list, it must have gone through + * one full sync already. + */ +void process_task_mortuary(void) +{ + struct list_head * pos; + struct list_head * pos2; + struct task_struct * task; + + spin_lock(&task_mortuary); + + list_for_each_safe(pos, pos2, &dead_tasks) { + task = list_entry(pos, struct task_struct, tasks); + list_del(&task->tasks); + free_task(task); + } + + list_for_each_safe(pos, pos2, &dying_tasks) { + task = list_entry(pos, struct task_struct, tasks); + list_del(&task->tasks); + list_add_tail(&task->tasks, &dead_tasks); + } + + spin_unlock(&task_mortuary); +} + + +static void mark_done(int cpu) +{ + int i; + + cpu_set(cpu, marked_cpus); + + for_each_online_cpu(i) { + if (!cpu_isset(i, marked_cpus)) + return; + } + + /* All CPUs have been processed at least once, + * we can process the mortuary once + */ + process_task_mortuary(); + + cpus_clear(marked_cpus); +} + + +/* Sync one of the CPU's buffers into the global event buffer. + * Here we need to go through each batch of samples punctuated + * by context switch notes, taking the task's mmap_sem and doing + * lookup in task->mm->mmap to convert EIP into dcookie/offset + * value. + */ +void sync_buffer(int cpu) +{ + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; + int cpu_mode = 1; + unsigned int i; + unsigned long available; + int domain_switch = 0; + + down(&buffer_sem); + + add_cpu_switch(cpu); + + /* Remember, only we can modify tail_pos */ + + available = get_slots(cpu_buf); + + for (i=0; i < available; ++i) { + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + + if (is_dom_switch(s->eip)) { + add_dom_switch((int)s->event); + domain_switch = 1; + } else if (is_ctx_switch(s->eip)) { + if (s->event <= 2) { + /* xen/kernel/userspace switch */ + cpu_mode = s->event; + add_mode_switch(s->event); + } else { + struct mm_struct * oldmm = mm; + + /* userspace context switch */ + new = (struct task_struct *)s->event; + + release_mm(oldmm); + mm = take_tasks_mm(new); + if (mm != oldmm) + cookie = get_exec_dcookie(mm); + add_user_ctx_switch(new, cookie); + } + } else { + if (domain_switch) { + // PC is irrelevant for other domains + s->eip = 0; + add_sample(NULL, s, 2); + domain_switch = 0; + } else + add_sample(mm, s, cpu_mode); + } + + increment_tail(cpu_buf); + } + release_mm(mm); + + mark_done(cpu); + + up(&buffer_sem); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.h linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/buffer_sync.h 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,22 @@ +/** + * @file buffer_sync.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OPROFILE_BUFFER_SYNC_H +#define OPROFILE_BUFFER_SYNC_H + +/* add the necessary profiling hooks */ +int sync_start(void); + +/* remove the hooks */ +void sync_stop(void); + +/* sync the given CPU's buffer */ +void sync_buffer(int cpu); + +#endif /* OPROFILE_BUFFER_SYNC_H */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.c linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,242 @@ +/** + * @file cpu_buffer.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global + * event buffer by sync_buffer(). + * + * We use a local buffer for two reasons: an NMI or similar + * interrupt cannot synchronise, and high sampling rates + * would lead to catastrophic global synchronisation if + * a global buffer was used. + */ + +#include +#include +#include + +#include "cpu_buffer.h" +#include "buffer_sync.h" +#include "oprof.h" + +struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned; + +static void wq_sync_buffer(void *); + +#define DEFAULT_TIMER_EXPIRE (HZ / 10) +int work_enabled; + +static void __free_cpu_buffers(int num) +{ + int i; + + for_each_online_cpu(i) { + if (cpu_buffer[i].buffer) + vfree(cpu_buffer[i].buffer); + } +} + + +int alloc_cpu_buffers(void) +{ + int i; + + unsigned long buffer_size = fs_cpu_buffer_size; + + for_each_online_cpu(i) { + struct oprofile_cpu_buffer * b = &cpu_buffer[i]; + + b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size); + if (!b->buffer) + goto fail; + + b->last_task = NULL; + b->last_cpu_mode = -1; + b->buffer_size = buffer_size; + b->tail_pos = 0; + b->head_pos = 0; + b->sample_received = 0; + b->sample_lost_overflow = 0; + b->cpu = i; + INIT_WORK(&b->work, wq_sync_buffer, b); + } + return 0; + +fail: + __free_cpu_buffers(i); + return -ENOMEM; +} + + +void free_cpu_buffers(void) +{ + __free_cpu_buffers(NR_CPUS); +} + + +void start_cpu_work(void) +{ + int i; + + work_enabled = 1; + + for_each_online_cpu(i) { + struct oprofile_cpu_buffer * b = &cpu_buffer[i]; + + /* + * Spread the work by 1 jiffy per cpu so they dont all + * fire at once. + */ + schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i); + } +} + + +void end_cpu_work(void) +{ + int i; + + work_enabled = 0; + + for_each_online_cpu(i) { + struct oprofile_cpu_buffer * b = &cpu_buffer[i]; + + cancel_delayed_work(&b->work); + } + + flush_scheduled_work(); +} + + +/* compute number of available slots in cpu_buffer queue */ +static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b) +{ + unsigned long head = b->head_pos; + unsigned long tail = b->tail_pos; + + if (tail > head) + return (tail - head) - 1; + + return tail + (b->buffer_size - head) - 1; +} + + +static void increment_head(struct oprofile_cpu_buffer * b) +{ + unsigned long new_head = b->head_pos + 1; + + /* Ensure anything written to the slot before we + * increment is visible */ + wmb(); + + if (new_head < (b->buffer_size)) + b->head_pos = new_head; + else + b->head_pos = 0; +} + + +/* This must be safe from any context. It's safe writing here + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * + * is_kernel is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at + * eip. We tag this in the buffer by generating kernel enter/exit + * events whenever is_kernel changes + */ +void oprofile_add_sample(unsigned long eip, unsigned int cpu_mode, + unsigned long event, int cpu) +{ + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; + struct task_struct * task; + + // aravind: In old code, why do we do the following? + // is_kernel = !!is_kernel; + + cpu_buf->sample_received++; + + + if (nr_available_slots(cpu_buf) < 3) { + cpu_buf->sample_lost_overflow++; + return; + } + + task = current; + + /* We treat samples from other domains in a special manner: each sample + is preceded by a record with eip equal to ~1UL. This record is non-sticky + i.e. it holds only for the following sample. The event field of this + record stores the domain id.*/ + if (eip == ~1UL) { + cpu_buf->buffer[cpu_buf->head_pos].eip = ~1UL; + cpu_buf->buffer[cpu_buf->head_pos].event = event; + increment_head(cpu_buf); + } else { + /* notice a switch from user->kernel or vice versa */ + if (cpu_buf->last_cpu_mode != cpu_mode) { + cpu_buf->last_cpu_mode = cpu_mode; + cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL; + cpu_buf->buffer[cpu_buf->head_pos].event = cpu_mode; + increment_head(cpu_buf); + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL; + cpu_buf->buffer[cpu_buf->head_pos].event = (unsigned long)task; + increment_head(cpu_buf); + } + /* Note: at this point, we lose the cpu_mode of a sample if it is from + another domain */ + + cpu_buf->buffer[cpu_buf->head_pos].eip = eip; + cpu_buf->buffer[cpu_buf->head_pos].event = event; + increment_head(cpu_buf); + } +} + + +/* Resets the cpu buffer to a sane state. */ +void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf) +{ + /* reset these to invalid values; the next sample + * collected will populate the buffer with proper + * values to initialize the buffer + */ + cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; +} + + +/* + * This serves to avoid cpu buffer overflow, and makes sure + * the task mortuary progresses + * + * By using schedule_delayed_work_on and then schedule_delayed_work + * we guarantee this will stay on the correct cpu + */ +static void wq_sync_buffer(void * data) +{ + struct oprofile_cpu_buffer * b = (struct oprofile_cpu_buffer *)data; + if (b->cpu != smp_processor_id()) { + printk("WQ on CPU%d, prefer CPU%d\n", + smp_processor_id(), b->cpu); + } + sync_buffer(b->cpu); + + /* don't re-add the work if we're shutting down */ + if (work_enabled) + schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.h linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/cpu_buffer.h 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,51 @@ +/** + * @file cpu_buffer.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OPROFILE_CPU_BUFFER_H +#define OPROFILE_CPU_BUFFER_H + +#include +#include +#include +#include + +struct task_struct; + +int alloc_cpu_buffers(void); +void free_cpu_buffers(void); + +void start_cpu_work(void); +void end_cpu_work(void); + +/* CPU buffer is composed of such entries (which are + * also used for context switch notes) + */ +struct op_sample { + unsigned long eip; + unsigned long event; +}; + +struct oprofile_cpu_buffer { + volatile unsigned long head_pos; + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; + int last_cpu_mode; + struct op_sample * buffer; + unsigned long sample_received; + unsigned long sample_lost_overflow; + int cpu; + struct work_struct work; +} ____cacheline_aligned; + +extern struct oprofile_cpu_buffer cpu_buffer[]; + +void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + +#endif /* OPROFILE_CPU_BUFFER_H */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.c linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,187 @@ +/** + * @file event_buffer.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * This is the global event buffer that the user-space + * daemon reads from. The event buffer is an untyped array + * of unsigned longs. Entries are prefixed by the + * escape value ESCAPE_CODE followed by an identifying code. + */ + +#include +#include +#include +#include +#include +#include + +#include "oprof.h" +#include "event_buffer.h" +#include "oprofile_stats.h" + +DECLARE_MUTEX(buffer_sem); + +static unsigned long buffer_opened; +static DECLARE_WAIT_QUEUE_HEAD(buffer_wait); +static unsigned long * event_buffer; +static unsigned long buffer_size; +static unsigned long buffer_watershed; +static size_t buffer_pos; +/* atomic_t because wait_event checks it outside of buffer_sem */ +static atomic_t buffer_ready = ATOMIC_INIT(0); + +/* Add an entry to the event buffer. When we + * get near to the end we wake up the process + * sleeping on the read() of the file. + */ +void add_event_entry(unsigned long value) +{ + if (buffer_pos == buffer_size) { + atomic_inc(&oprofile_stats.event_lost_overflow); + return; + } + + event_buffer[buffer_pos] = value; + if (++buffer_pos == buffer_size - buffer_watershed) { + atomic_set(&buffer_ready, 1); + wake_up(&buffer_wait); + } +} + + +/* Wake up the waiting process if any. This happens + * on "echo 0 >/dev/oprofile/enable" so the daemon + * processes the data remaining in the event buffer. + * aravind: also called on echo 1 > /dev/oprofile/dump + */ +void wake_up_buffer_waiter(void) +{ + down(&buffer_sem); + atomic_set(&buffer_ready, 1); + wake_up(&buffer_wait); + up(&buffer_sem); +} + + +int alloc_event_buffer(void) +{ + int err = -ENOMEM; + + spin_lock(&oprofilefs_lock); + buffer_size = fs_buffer_size; + buffer_watershed = fs_buffer_watershed; + spin_unlock(&oprofilefs_lock); + + if (buffer_watershed >= buffer_size) + return -EINVAL; + + event_buffer = vmalloc(sizeof(unsigned long) * buffer_size); + if (!event_buffer) + goto out; + + err = 0; +out: + return err; +} + + +void free_event_buffer(void) +{ + vfree(event_buffer); +} + + +int event_buffer_open(struct inode * inode, struct file * file) +{ + int err = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (test_and_set_bit(0, &buffer_opened)) + return -EBUSY; + + /* Register as a user of dcookies + * to ensure they persist for the lifetime of + * the open event file + */ + err = -EINVAL; + file->private_data = dcookie_register(); + if (!file->private_data) + goto out; + + if ((err = oprofile_setup())) + goto fail; + + /* NB: the actual start happens from userspace + * echo 1 >/dev/oprofile/enable + */ + + return 0; + +fail: + dcookie_unregister(file->private_data); +out: + clear_bit(0, &buffer_opened); + return err; +} + + +int event_buffer_release(struct inode * inode, struct file * file) +{ + oprofile_stop(); + oprofile_shutdown(); + dcookie_unregister(file->private_data); + buffer_pos = 0; + atomic_set(&buffer_ready, 0); + clear_bit(0, &buffer_opened); + return 0; +} + + +ssize_t event_buffer_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + int retval = -EINVAL; + size_t const max = buffer_size * sizeof(unsigned long); + + /* handling partial reads is more trouble than it's worth */ + if (count != max || *offset) + return -EINVAL; + + wait_event_interruptible(buffer_wait, atomic_read(&buffer_ready)); + + if (signal_pending(current)) + return -EINTR; + + /* can't currently happen */ + if (!atomic_read(&buffer_ready)) + return -EAGAIN; + + down(&buffer_sem); + + atomic_set(&buffer_ready, 0); + + retval = -EFAULT; + + count = buffer_pos * sizeof(unsigned long); + + if (copy_to_user(buf, event_buffer, count)) + goto out; + + retval = count; + buffer_pos = 0; + +out: + up(&buffer_sem); + return retval; +} + +struct file_operations event_buffer_fops = { + .open = event_buffer_open, + .release = event_buffer_release, + .read = event_buffer_read, +}; diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.h linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/event_buffer.h 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,53 @@ +/** + * @file event_buffer.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#ifndef EVENT_BUFFER_H +#define EVENT_BUFFER_H + +#include +#include + +int alloc_event_buffer(void); + +void free_event_buffer(void); + +/* wake up the process sleeping on the event file */ +void wake_up_buffer_waiter(void); + +/* Each escaped entry is prefixed by ESCAPE_CODE + * then one of the following codes, then the + * relevant data. + */ +#define ESCAPE_CODE ~0UL +#define CTX_SWITCH_CODE 1 +#define CPU_SWITCH_CODE 2 +#define COOKIE_SWITCH_CODE 3 +#define KERNEL_ENTER_SWITCH_CODE 4 +#define USER_ENTER_SWITCH_CODE 5 +#define MODULE_LOADED_CODE 6 +#define CTX_TGID_CODE 7 + +#define XEN_ENTER_SWITCH_CODE 10 +#define DOMAIN_SWITCH_CODE 11 + +/* add data to the event buffer */ +void add_event_entry(unsigned long data); + +extern struct file_operations event_buffer_fops; + +/* mutex between sync_cpu_buffers() and the + * file reading code. + */ +extern struct semaphore buffer_sem; + +#endif /* EVENT_BUFFER_H */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprof.c linux-2.6.11-xen-sparse/drivers/oprofile/oprof.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprof.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/oprof.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,202 @@ +/** + * @file oprof.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include +#include +#include +#include +#include +#include + +#include "oprof.h" +#include "event_buffer.h" +#include "cpu_buffer.h" +#include "buffer_sync.h" +#include "oprofile_stats.h" + +struct oprofile_operations * oprofile_ops; +unsigned long oprofile_started; +static unsigned long is_setup; +static DECLARE_MUTEX(start_sem); + +/* timer + 0 - use performance monitoring hardware if available + 1 - use the timer int mechanism regardless + */ +static int timer = 0; + +extern unsigned int adomains, pdomains; +extern int active_domains[MAX_OPROF_DOMAINS], passive_domains[MAX_OPROF_DOMAINS]; + +int oprofile_set_active(void) +{ + if (oprofile_ops->set_active) + return oprofile_ops->set_active(active_domains, adomains); + + return -EINVAL; +} + +int oprofile_set_passive(void) +{ + if (oprofile_ops->set_passive) + return oprofile_ops->set_passive(passive_domains, pdomains); + + return -EINVAL; +} + +int oprofile_setup(void) +{ + int err; + + down(&start_sem); + + if ((err = alloc_cpu_buffers())) + goto out; + + if ((err = alloc_event_buffer())) + goto out1; + + if (oprofile_ops->setup && (err = oprofile_ops->setup())) + goto out2; + + /* Note even though this starts part of the + * profiling overhead, it's necessary to prevent + * us missing task deaths and eventually oopsing + * when trying to process the event buffer. + */ + if ((err = sync_start())) + goto out3; + + is_setup = 1; + up(&start_sem); + return 0; + +out3: + if (oprofile_ops->shutdown) + oprofile_ops->shutdown(); +out2: + free_event_buffer(); +out1: + free_cpu_buffers(); +out: + up(&start_sem); + return err; +} + + +/* Actually start profiling (echo 1>/dev/oprofile/enable) */ +int oprofile_start(void) +{ + int err = -EINVAL; + + down(&start_sem); + + if (!is_setup) + goto out; + + err = 0; + + if (oprofile_started) + goto out; + + oprofile_reset_stats(); + + if ((err = oprofile_ops->start())) + goto out; + + oprofile_started = 1; +out: + up(&start_sem); + return err; +} + + +/* echo 0>/dev/oprofile/enable */ +void oprofile_stop(void) +{ + down(&start_sem); + if (!oprofile_started) + goto out; + oprofile_ops->stop(); + oprofile_started = 0; + /* wake up the daemon to read what remains */ + wake_up_buffer_waiter(); +out: + up(&start_sem); +} + + +void oprofile_shutdown(void) +{ + down(&start_sem); + sync_stop(); + if (oprofile_ops->shutdown) + oprofile_ops->shutdown(); + is_setup = 0; + free_event_buffer(); + free_cpu_buffers(); + up(&start_sem); +} + + +extern void timer_init(struct oprofile_operations ** ops); + + +static int __init oprofile_init(void) +{ + /* Architecture must fill in the interrupt ops and the + * logical CPU type, or we can fall back to the timer + * interrupt profiler. + */ + int err = oprofile_arch_init(&oprofile_ops); + + if (err == -ENODEV || timer) { + timer_init(&oprofile_ops); + err = 0; + } else if (err) { + goto out; + } + + if (!oprofile_ops->cpu_type) { + printk(KERN_ERR "oprofile: cpu_type not set !\n"); + err = -EFAULT; + } else { + err = oprofilefs_register(); + } + + if (err) + goto out_exit; +out: + return err; +out_exit: + oprofile_arch_exit(); + goto out; +} + + +static void __exit oprofile_exit(void) +{ + oprofilefs_unregister(); + oprofile_arch_exit(); +} + + +module_init(oprofile_init); +module_exit(oprofile_exit); + +module_param_named(timer, timer, int, 0644); +MODULE_PARM_DESC(timer, "force use of timer interrupt"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("John Levon "); +MODULE_DESCRIPTION("OProfile system profiler"); diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprof.h linux-2.6.11-xen-sparse/drivers/oprofile/oprof.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprof.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/oprof.h 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,35 @@ +/** + * @file oprof.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OPROF_H +#define OPROF_H + +int oprofile_setup(void); +void oprofile_shutdown(void); + +int oprofilefs_register(void); +void oprofilefs_unregister(void); + +int oprofile_start(void); +void oprofile_stop(void); + +struct oprofile_operations; + +extern unsigned long fs_buffer_size; +extern unsigned long fs_cpu_buffer_size; +extern unsigned long fs_buffer_watershed; +extern struct oprofile_operations * oprofile_ops; +extern unsigned long oprofile_started; + +struct super_block; +struct dentry; + +void oprofile_create_files(struct super_block * sb, struct dentry * root); + +#endif /* OPROF_H */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_files.c linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_files.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_files.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_files.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,238 @@ +/** + * @file oprofile_files.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include +#include +#include +#include + +#include "event_buffer.h" +#include "oprofile_stats.h" +#include "oprof.h" + +unsigned long fs_buffer_size = 131072; +unsigned long fs_cpu_buffer_size = 8192; +unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ + + +static ssize_t pointer_size_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + return oprofilefs_ulong_to_user(sizeof(void *), buf, count, offset); +} + + +static struct file_operations pointer_size_fops = { + .read = pointer_size_read, +}; + + +static ssize_t cpu_type_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + return oprofilefs_str_to_user(oprofile_ops->cpu_type, buf, count, offset); +} + + +static struct file_operations cpu_type_fops = { + .read = cpu_type_read, +}; + + +static ssize_t enable_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + return oprofilefs_ulong_to_user(oprofile_started, buf, count, offset); +} + + +static ssize_t enable_write(struct file *file, char const __user * buf, size_t count, loff_t * offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val) + retval = oprofile_start(); + else + oprofile_stop(); + + if (retval) + return retval; + return count; +} + + +static struct file_operations enable_fops = { + .read = enable_read, + .write = enable_write, +}; + + +static ssize_t dump_write(struct file *file, char const __user * buf, size_t count, loff_t * offset) +{ + wake_up_buffer_waiter(); + return count; +} + + +static struct file_operations dump_fops = { + .write = dump_write, +}; + +#define TMPBUFSIZE 50 + +unsigned int adomains = 0; +long active_domains[MAX_OPROF_DOMAINS]; + +extern int oprofile_set_active(void); + +static ssize_t adomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + char *startp = tmpbuf; + char *endp = tmpbuf; + int i; + unsigned long val; + + if (*offset) + return -EINVAL; + if (!count) + return 0; + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + for (i = 0; i < MAX_OPROF_DOMAINS; i++) + active_domains[i] = -1; + adomains = 0; + + while (1) { + val = simple_strtol(startp, &endp, 0); + if (endp == startp) + break; + while (ispunct(*endp)) + endp++; + active_domains[adomains++] = val; + if (adomains >= MAX_OPROF_DOMAINS) + break; + startp = endp; + } + if (oprofile_set_active()) + return -EINVAL; + return count; +} + +static ssize_t adomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t len = 0; + int i; + /* This is all screwed up if we run out of space */ + for (i = 0; i < adomains; i++) + len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)active_domains[i]); + len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n"); + return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len); +} + + +static struct file_operations active_domain_ops = { + .read = adomain_read, + .write = adomain_write, +}; + +unsigned int pdomains = 0; +long passive_domains[MAX_OPROF_DOMAINS]; + +extern int oprofile_set_passive(void); + +static ssize_t pdomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + char *startp = tmpbuf; + char *endp = tmpbuf; + int i; + unsigned long val; + + if (*offset) + return -EINVAL; + if (!count) + return 0; + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + for (i = 0; i < MAX_OPROF_DOMAINS; i++) + passive_domains[i] = -1; + pdomains = 0; + + while (1) { + val = simple_strtol(startp, &endp, 0); + if (endp == startp) + break; + while (ispunct(*endp)) + endp++; + passive_domains[pdomains++] = val; + if (pdomains >= MAX_OPROF_DOMAINS) + break; + startp = endp; + } + if (oprofile_set_passive()) + return -EINVAL; + return count; +} + +static ssize_t pdomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t len = 0; + int i; + /* This is all screwed up if we run out of space */ + for (i = 0; i < pdomains; i++) + len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)passive_domains[i]); + len += snprintf (tmpbuf + len, TMPBUFSIZE - len, "\n"); + return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len); +} + +static struct file_operations passive_domain_ops = { + .read = pdomain_read, + .write = pdomain_write, +}; + +void oprofile_create_files(struct super_block * sb, struct dentry * root) +{ + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); + oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size); + oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); + oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); + oprofile_create_stats_files(sb, root); + if (oprofile_ops->create_files) + oprofile_ops->create_files(sb, root); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofilefs.c linux-2.6.11-xen-sparse/drivers/oprofile/oprofilefs.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofilefs.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/oprofilefs.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,304 @@ +/** + * @file oprofilefs.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * A simple filesystem for configuration and + * access of oprofile. + */ + +#include +#include +#include +#include +#include +#include + +#include "oprof.h" + +#define OPROFILEFS_MAGIC 0x6f70726f + +spinlock_t oprofilefs_lock = SPIN_LOCK_UNLOCKED; + +static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + + +static struct super_operations s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + + +ssize_t oprofilefs_str_to_user(char const * str, char __user * buf, size_t count, loff_t * offset) +{ + return simple_read_from_buffer(buf, count, offset, str, strlen(str)); +} + + +#define TMPBUFSIZE 50 + +ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user * buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val); + if (maxlen > TMPBUFSIZE) + maxlen = TMPBUFSIZE; + return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen); +} + + +int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, size_t count) +{ + char tmpbuf[TMPBUFSIZE]; + + if (!count) + return 0; + + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + spin_lock(&oprofilefs_lock); + *val = simple_strtoul(tmpbuf, NULL, 0); + spin_unlock(&oprofilefs_lock); + return 0; +} + + +static ssize_t ulong_read_file(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + unsigned long * val = file->private_data; + return oprofilefs_ulong_to_user(*val, buf, count, offset); +} + + +static ssize_t ulong_write_file(struct file * file, char const __user * buf, size_t count, loff_t * offset) +{ + unsigned long * value = file->private_data; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(value, buf, count); + + if (retval) + return retval; + return count; +} + + +static int default_open(struct inode * inode, struct file * filp) +{ + if (inode->u.generic_ip) + filp->private_data = inode->u.generic_ip; + return 0; +} + + +static struct file_operations ulong_fops = { + .read = ulong_read_file, + .write = ulong_write_file, + .open = default_open, +}; + + +static struct file_operations ulong_ro_fops = { + .read = ulong_read_file, + .open = default_open, +}; + + +static struct dentry * __oprofilefs_create_file(struct super_block * sb, + struct dentry * root, char const * name, struct file_operations * fops, + int perm) +{ + struct dentry * dentry; + struct inode * inode; + struct qstr qname; + qname.name = name; + qname.len = strlen(name); + qname.hash = full_name_hash(qname.name, qname.len); + dentry = d_alloc(root, &qname); + if (!dentry) + return NULL; + inode = oprofilefs_get_inode(sb, S_IFREG | perm); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_fop = fops; + d_add(dentry, inode); + return dentry; +} + + +int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root, + char const * name, unsigned long * val) +{ + struct dentry * d = __oprofilefs_create_file(sb, root, name, + &ulong_fops, 0644); + if (!d) + return -EFAULT; + + d->d_inode->u.generic_ip = val; + return 0; +} + + +int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root, + char const * name, unsigned long * val) +{ + struct dentry * d = __oprofilefs_create_file(sb, root, name, + &ulong_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->u.generic_ip = val; + return 0; +} + + +static ssize_t atomic_read_file(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + atomic_t * val = file->private_data; + return oprofilefs_ulong_to_user(atomic_read(val), buf, count, offset); +} + + +static struct file_operations atomic_ro_fops = { + .read = atomic_read_file, + .open = default_open, +}; + + +int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root, + char const * name, atomic_t * val) +{ + struct dentry * d = __oprofilefs_create_file(sb, root, name, + &atomic_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->u.generic_ip = val; + return 0; +} + + +int oprofilefs_create_file(struct super_block * sb, struct dentry * root, + char const * name, struct file_operations * fops) +{ + if (!__oprofilefs_create_file(sb, root, name, fops, 0644)) + return -EFAULT; + return 0; +} + + +int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root, + char const * name, struct file_operations * fops, int perm) +{ + if (!__oprofilefs_create_file(sb, root, name, fops, perm)) + return -EFAULT; + return 0; +} + + +struct dentry * oprofilefs_mkdir(struct super_block * sb, + struct dentry * root, char const * name) +{ + struct dentry * dentry; + struct inode * inode; + struct qstr qname; + qname.name = name; + qname.len = strlen(name); + qname.hash = full_name_hash(qname.name, qname.len); + dentry = d_alloc(root, &qname); + if (!dentry) + return NULL; + inode = oprofilefs_get_inode(sb, S_IFDIR | 0755); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + d_add(dentry, inode); + return dentry; +} + + +static int oprofilefs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode * root_inode; + struct dentry * root_dentry; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = OPROFILEFS_MAGIC; + sb->s_op = &s_ops; + + root_inode = oprofilefs_get_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + root_dentry = d_alloc_root(root_inode); + if (!root_dentry) { + iput(root_inode); + return -ENOMEM; + } + + sb->s_root = root_dentry; + + oprofile_create_files(sb, root_dentry); + + // FIXME: verify kill_litter_super removes our dentries + return 0; +} + + +static struct super_block *oprofilefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, oprofilefs_fill_super); +} + + +static struct file_system_type oprofilefs_type = { + .owner = THIS_MODULE, + .name = "oprofilefs", + .get_sb = oprofilefs_get_sb, + .kill_sb = kill_litter_super, +}; + + +int __init oprofilefs_register(void) +{ + return register_filesystem(&oprofilefs_type); +} + + +void __exit oprofilefs_unregister(void) +{ + unregister_filesystem(&oprofilefs_type); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.c linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,70 @@ +/** + * @file oprofile_stats.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include +#include + +#include "oprofile_stats.h" +#include "cpu_buffer.h" + +struct oprofile_stat_struct oprofile_stats; + +void oprofile_reset_stats(void) +{ + struct oprofile_cpu_buffer * cpu_buf; + int i; + + for_each_cpu(i) { + cpu_buf = &cpu_buffer[i]; + cpu_buf->sample_received = 0; + cpu_buf->sample_lost_overflow = 0; + } + + atomic_set(&oprofile_stats.sample_lost_no_mm, 0); + atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); + atomic_set(&oprofile_stats.event_lost_overflow, 0); +} + + +void oprofile_create_stats_files(struct super_block * sb, struct dentry * root) +{ + struct oprofile_cpu_buffer * cpu_buf; + struct dentry * cpudir; + struct dentry * dir; + char buf[10]; + int i; + + dir = oprofilefs_mkdir(sb, root, "stats"); + if (!dir) + return; + + for_each_cpu(i) { + cpu_buf = &cpu_buffer[i]; + snprintf(buf, 10, "cpu%d", i); + cpudir = oprofilefs_mkdir(sb, dir, buf); + + /* Strictly speaking access to these ulongs is racy, + * but we can't simply lock them, and they are + * informational only. + */ + oprofilefs_create_ro_ulong(sb, cpudir, "sample_received", + &cpu_buf->sample_received); + oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow", + &cpu_buf->sample_lost_overflow); + } + + oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm", + &oprofile_stats.sample_lost_no_mm); + oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mapping", + &oprofile_stats.sample_lost_no_mapping); + oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow", + &oprofile_stats.event_lost_overflow); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.h linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/oprofile_stats.h 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,32 @@ +/** + * @file oprofile_stats.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OPROFILE_STATS_H +#define OPROFILE_STATS_H + +#include + +struct oprofile_stat_struct { + atomic_t sample_lost_no_mm; + atomic_t sample_lost_no_mapping; + atomic_t event_lost_overflow; +}; + +extern struct oprofile_stat_struct oprofile_stats; + +/* reset all stats to zero */ +void oprofile_reset_stats(void); + +struct super_block; +struct dentry; + +/* create the stats/ dir */ +void oprofile_create_stats_files(struct super_block * sb, struct dentry * root); + +#endif /* OPROFILE_STATS_H */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/timer_int.c linux-2.6.11-xen-sparse/drivers/oprofile/timer_int.c --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/drivers/oprofile/timer_int.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/drivers/oprofile/timer_int.c 2005-04-15 09:01:48.000000000 -0500 @@ -0,0 +1,50 @@ +/** + * @file timer_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include +#include +#include +#include +#include + +static int timer_notify(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long eip = profile_pc(regs); + + oprofile_add_sample(eip, !user_mode(regs), 0, cpu); + return 0; +} + +static int timer_start(void) +{ + return register_timer_hook(timer_notify); +} + + +static void timer_stop(void) +{ + unregister_timer_hook(timer_notify); +} + + +static struct oprofile_operations timer_ops = { + .start = timer_start, + .stop = timer_stop, + .cpu_type = "timer" +}; + + +void __init timer_init(struct oprofile_operations ** ops) +{ + *ops = &timer_ops; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); +} diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h 2005-04-15 08:27:45.000000000 -0500 +++ linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h 2005-04-15 09:01:49.000000000 -0500 @@ -4,6 +4,10 @@ * Linux-specific hypervisor handling. * * Copyright (c) 2002-2004, K A Fraser + * + * Modified by Aravind Menon for supporting oprofile + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -135,4 +139,20 @@ unsigned long allocate_empty_lowmem_regi #include +static inline int +HYPERVISOR_pmc_op( + int op, unsigned int arg1, unsigned int arg2) +{ + int ret; + unsigned long ign1, ign2, ign3; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a"(ret), "=b"(ign1), "=c"(ign2), "=d"(ign3) + : "0"(__HYPERVISOR_pmc_op), "1"(op), "2"(arg1), "3"(arg2) + : "memory" ); + + return ret; +} + #endif /* __HYPERVISOR_H__ */ diff -Naurp ../xeno-unstable.bk/linux-2.6.11-xen-sparse/include/linux/oprofile.h linux-2.6.11-xen-sparse/include/linux/oprofile.h --- ../xeno-unstable.bk/linux-2.6.11-xen-sparse/include/linux/oprofile.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.11-xen-sparse/include/linux/oprofile.h 2005-04-15 09:01:49.000000000 -0500 @@ -0,0 +1,117 @@ +/** + * @file oprofile.h + * + * API for machine-specific interrupts to interface + * to oprofile. + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#ifndef OPROFILE_H +#define OPROFILE_H + +#include +#include +#include + +struct super_block; +struct dentry; +struct file_operations; + +/* Operations structure to be filled in */ +struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); + /* setup active domains with Xen */ + int (*set_active)(int *active_domains, unsigned int adomains); + /* setup passive domains with Xen */ + int (*set_passive)(int *passive_domains, unsigned int pdomains); + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ + void (*shutdown)(void); + /* Start delivering interrupts. */ + int (*start)(void); + /* Stop delivering interrupts. */ + void (*stop)(void); + /* CPU identification string. */ + char * cpu_type; +}; + +/** + * One-time initialisation. *ops must be set to a filled-in + * operations structure. This is called even in timer interrupt + * mode. + * + * Return 0 on success. + */ +int oprofile_arch_init(struct oprofile_operations ** ops); + +/** + * One-time exit/cleanup for the arch. + */ +void oprofile_arch_exit(void); + +/** + * Add a sample. This may be called from any context. Pass + * smp_processor_id() as cpu. + */ +extern void oprofile_add_sample(unsigned long eip, unsigned int is_kernel, + unsigned long event, int cpu); + +/** + * Create a file of the given name as a child of the given root, with + * the specified file operations. + */ +int oprofilefs_create_file(struct super_block * sb, struct dentry * root, + char const * name, struct file_operations * fops); + +int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root, + char const * name, struct file_operations * fops, int perm); + +/** Create a file for read/write access to an unsigned long. */ +int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root, + char const * name, ulong * val); + +/** Create a file for read-only access to an unsigned long. */ +int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root, + char const * name, ulong * val); + +/** Create a file for read-only access to an atomic_t. */ +int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root, + char const * name, atomic_t * val); + +/** create a directory */ +struct dentry * oprofilefs_mkdir(struct super_block * sb, struct dentry * root, + char const * name); + +/** + * Write the given asciz string to the given user buffer @buf, updating *offset + * appropriately. Returns bytes written or -EFAULT. + */ +ssize_t oprofilefs_str_to_user(char const * str, char __user * buf, size_t count, loff_t * offset); + +/** + * Convert an unsigned long value into ASCII and copy it to the user buffer @buf, + * updating *offset appropriately. Returns bytes written or -EFAULT. + */ +ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user * buf, size_t count, loff_t * offset); + +/** + * Read an ASCII string for a number from a userspace buffer and fill *val on success. + * Returns 0 on success, < 0 on error. + */ +int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, size_t count); + +/** lock for read/write safety */ +extern spinlock_t oprofilefs_lock; + +#endif /* OPROFILE_H */