WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ia64-devel

[Xen-ia64-devel] [PATCH 1/3] Sample implementation of Xenoprof for ia64

To: xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-ia64-devel] [PATCH 1/3] Sample implementation of Xenoprof for ia64
From: SUZUKI Kazuhiro <kaz@xxxxxxxxxxxxxx>
Date: Mon, 03 Jul 2006 18:34:25 +0900 (JST)
Delivery-date: Mon, 03 Jul 2006 08:14:50 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ia64-devel-request@lists.xensource.com?subject=help>
List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>
List-post: <mailto:xen-ia64-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ia64-devel>, <mailto:xen-ia64-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-ia64-devel-bounces@xxxxxxxxxxxxxxxxxxx
patch for xen tree

Signed-off-by: SUZUKI Kazuhiro <kaz@xxxxxxxxxxxxxx>

diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/Makefile 
xenoprof-ia64-unstable/xen/arch/ia64/Makefile
--- xen-ia64-unstable.hg/xen/arch/ia64/Makefile 2006-06-30 11:23:29.000000000 
+0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/Makefile       2006-06-30 
15:25:01.000000000 +0900
@@ -2,6 +2,7 @@
 subdir-y += vmx
 subdir-y += linux
 subdir-y += linux-xen
+subdir-y += oprofile
 
 $(TARGET)-syms: linux-xen/head.o $(ALL_OBJS) xen.lds.s
        $(LD) $(LDFLAGS) -T xen.lds.s -N \
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/Makefile 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/Makefile
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/Makefile       2006-06-22 
13:37:13.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/Makefile     2006-06-30 
15:25:01.000000000 +0900
@@ -14,3 +14,4 @@
 obj-y += unaligned.o
 obj-y += unwind.o
 obj-y += iosapic.o
+obj-y += perfmon.o
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon.c 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon.c
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon.c      1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon.c    2006-07-03 
15:06:40.000000000 +0900
@@ -0,0 +1,3023 @@
+/*
+ * This file implements the perfmon-2 subsystem which is used
+ * to program the IA-64 Performance Monitoring Unit (PMU).
+ *
+ * The initial version of perfmon.c was written by
+ * Ganesh Venkitachalam, IBM Corp.
+ *
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
+ * David Mosberger, Hewlett Packard Co.
+ *
+ * Version Perfmon-2.x is a rewrite of perfmon-1.x
+ * by Stephane Eranian, Hewlett Packard Co.
+ *
+ * Copyright (C) 1999-2005  Hewlett Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ *               David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * More information about perfmon available at:
+ *     http://www.hpl.hp.com/research/linux/perfmon
+ */
+
+#include <xen/config.h>
+#include <xen/kernel.h>
+#include <xen/sched.h>
+/* #include <linux/interrupt.h> */
+/* #include <linux/seq_file.h> */
+#include <xen/init.h>
+#include <asm/hw_irq.h>
+#include <xen/irq.h>
+#include <xen/mm.h>
+#include <linux/sysctl.h>
+#include <xen/guest_access.h>
+#include <xen/list.h>
+/* #include <linux/file.h> */
+#include <xen/bitops.h>
+#include <linux/completion.h>
+
+#include <asm/intrinsics.h>
+#include <asm/vcpu.h>
+#include <asm/page.h>
+#include <asm/perfmon.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_PERFMON
+extern rwlock_t tasklist_lock;
+/*
+ * perfmon context state
+ */
+#define PFM_CTX_UNLOADED       1       /* context is not loaded onto any task 
*/
+#define PFM_CTX_LOADED         2       /* context is loaded onto a task */
+#define PFM_CTX_MASKED         3       /* context is loaded but monitoring is 
masked due to overflow */
+#define PFM_CTX_ZOMBIE         4       /* owner of the context is closing it */
+
+#define PFM_INVALID_ACTIVATION (~0UL)
+
+/*
+ * depth of message queue
+ */
+#define PFM_MAX_MSGS           32
+#define PFM_CTXQ_EMPTY(g)      ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
+
+/*
+ * type of a PMU register (bitmask).
+ * bitmask structure:
+ *     bit0   : register implemented
+ *     bit1   : end marker
+ *     bit2-3 : reserved
+ *     bit4   : pmc has pmc.pm
+ *     bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
+ *     bit6-7 : register type
+ *     bit8-31: reserved
+ */
+#define PFM_REG_NOTIMPL                0x0 /* not implemented at all */
+#define PFM_REG_IMPL           0x1 /* register implemented */
+#define PFM_REG_END            0x2 /* end marker */
+#define PFM_REG_MONITOR                (0x1<<4|PFM_REG_IMPL) /* a PMC with a 
pmc.pm field only */
+#define PFM_REG_COUNTING       (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ 
PMD used as a counter */
+#define PFM_REG_CONTROL                (0x4<<4|PFM_REG_IMPL) /* PMU control 
register */
+#define        PFM_REG_CONFIG          (0x8<<4|PFM_REG_IMPL) /* configuration 
register */
+#define PFM_REG_BUFFER         (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
+
+#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
+#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
+
+#define PMC_OVFL_NOTIFY(i)     (ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
+
+/* i assumed unsigned */
+#define PMC_IS_IMPL(i)   (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & 
PFM_REG_IMPL))
+#define PMD_IS_IMPL(i)   (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & 
PFM_REG_IMPL))
+
+/* XXX: these assume that register i is implemented */
+#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == 
PFM_REG_COUNTING)
+#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == 
PFM_REG_COUNTING)
+#define PMC_IS_MONITOR(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR)  == 
PFM_REG_MONITOR)
+#define PMC_IS_CONTROL(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL)  == 
PFM_REG_CONTROL)
+
+#define PMC_DFL_VAL(i)     pmu_conf->pmc_desc[i].default_value
+#define PMC_RSVD_MASK(i)   pmu_conf->pmc_desc[i].reserved_mask
+#define PMD_PMD_DEP(i)    pmu_conf->pmd_desc[i].dep_pmd[0]
+#define PMC_PMD_DEP(i)    pmu_conf->pmc_desc[i].dep_pmd[0]
+
+#define PFM_NUM_IBRS     IA64_NUM_DBG_REGS
+#define PFM_NUM_DBRS     IA64_NUM_DBG_REGS
+
+#define CTX_OVFL_NOBLOCK(c)    ((c)->ctx_fl_block == 0)
+#define CTX_HAS_SMPL(c)                ((c)->ctx_fl_is_sampling)
+#define PFM_CTX_TASK(h)                (h)->ctx_task
+
+#define PMU_PMC_OI             5 /* position of pmc.oi bit */
+
+/* XXX: does not support more than 64 PMDs */
+#define CTX_USED_PMD(mask) ctx_used_pmds[0] |= (mask)
+#define CTX_IS_USED_PMD(c) ((ctx_used_pmds[0] & (1UL << (c))) != 0UL)
+
+#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
+
+#define CTX_USED_IBR(ctx,n)    (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USED_DBR(ctx,n)    (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USES_DBREGS(ctx)   (((pfm_context_t 
*)(ctx))->ctx_fl_using_dbreg==1)
+#define PFM_CODE_RR    0       /* requesting code range restriction */
+#define PFM_DATA_RR    1       /* requestion data range restriction */
+
+#define PFM_CPUINFO_CLEAR(v)   pfm_get_cpu_var(pfm_syst_info) &= ~(v)
+#define PFM_CPUINFO_SET(v)     pfm_get_cpu_var(pfm_syst_info) |= (v)
+#define PFM_CPUINFO_GET()      pfm_get_cpu_var(pfm_syst_info)
+
+#define RDEP(x)        (1UL<<(x))
+
+/*
+ * context protection macros
+ * in SMP:
+ *     - we need to protect against CPU concurrency (spin_lock)
+ *     - we need to protect against PMU overflow interrupts (local_irq_disable)
+ * in UP:
+ *     - we need to protect against PMU overflow interrupts (local_irq_disable)
+ *
+ * spin_lock_irqsave()/spin_lock_irqrestore():
+ *     in SMP: local_irq_disable + spin_lock
+ *     in UP : local_irq_disable
+ *
+ * spin_lock()/spin_lock():
+ *     in UP : removed automatically
+ *     in SMP: protect against context accesses from other CPU. interrupts
+ *             are not masked. This is useful for the PMU interrupt handler
+ *             because we know we will not get PMU concurrency in that code.
+ */
+#define PROTECT_CTX(c, f) \
+       do {  \
+               DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, 
current->vcpu_id)); \
+               spin_lock_irqsave(&(c)->ctx_lock, f); \
+               DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->vcpu_id)); \
+       } while(0)
+
+#define UNPROTECT_CTX(c, f) \
+       do { \
+               DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, 
current->vcpu_id)); \
+               spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+       } while(0)
+
+#define PROTECT_CTX_NOPRINT(f) \
+       do {  \
+               spin_lock_irqsave(&ctx_lock, f); \
+       } while(0)
+
+
+#define UNPROTECT_CTX_NOPRINT(f) \
+       do { \
+               spin_unlock_irqrestore(&ctx_lock, f); \
+       } while(0)
+
+
+#define PROTECT_CTX_NOIRQ(c) \
+       do {  \
+               spin_lock(&(c)->ctx_lock); \
+       } while(0)
+
+#define UNPROTECT_CTX_NOIRQ(c) \
+       do { \
+               spin_unlock(&(c)->ctx_lock); \
+       } while(0)
+
+
+#ifdef CONFIG_SMP
+
+#define GET_ACTIVATION()       pfm_get_cpu_var(pmu_activation_number)
+#define INC_ACTIVATION()       pfm_get_cpu_var(pmu_activation_number)++
+#define SET_ACTIVATION(c)      (c)->ctx_last_activation = GET_ACTIVATION()
+
+#else /* !CONFIG_SMP */
+#define SET_ACTIVATION(t)      do {} while(0)
+#define GET_ACTIVATION(t)      do {} while(0)
+#define INC_ACTIVATION(t)      do {} while(0)
+#endif /* CONFIG_SMP */
+
+#define SET_PMU_OWNER(t, c)    do { pfm_get_cpu_var(pmu_owner) = (t); 
pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
+#define GET_PMU_OWNER()                pfm_get_cpu_var(pmu_owner)
+#define GET_PMU_CTX()          pfm_get_cpu_var(pmu_ctx)
+
+#define LOCK_PFS(g)            spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
+#define UNLOCK_PFS(g)          spin_unlock_irqrestore(&pfm_sessions.pfs_lock, 
g)
+
+#define PFM_REG_RETFLAG_SET(flags, val)        do { flags &= 
~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
+
+/*
+ * cmp0 must be the value of pmc0
+ */
+#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
+
+#define PFMFS_MAGIC 0xa0b4d889
+
+static int pfm_sysctl_debug = 0;
+static int pfm_sysctl_debug_ovfl = 0;
+
+/*
+ * debugging
+ */
+#define PFM_DEBUGGING 1
+#ifdef PFM_DEBUGGING
+#if 1
+#define DPRINT(a) \
+       do { \
+               if (unlikely(pfm_sysctl_debug >0)) { printk("%s.%d: CPU%d [%d] 
", __FUNCTION__, __LINE__, smp_processor_id(), current->vcpu_id); printk a; } \
+       } while (0)
+
+#define DPRINT_ovfl(a) \
+       do { \
+               if (unlikely(pfm_sysctl_debug > 0 && pfm_sysctl_debug_ovfl >0)) 
{ printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), 
current->vcpu_id); printk a; } \
+       } while (0)
+#else
+#define DPRINT(a)      do {} while(0)
+#define DPRINT_ovfl(a) do {} while(0)
+#endif
+#endif
+
+/*
+ * 64-bit software counter structure
+ *
+ * the next_reset_type is applied to the next call to pfm_reset_regs()
+ */
+typedef struct {
+       unsigned long   val;            /* virtual 64bit counter value */
+       unsigned long   lval;           /* last reset value */
+       unsigned long   long_reset;     /* reset value on sampling overflow */
+       unsigned long   short_reset;    /* reset value on overflow */
+       unsigned long   reset_pmds[4];  /* which other pmds to reset when this 
counter overflows */
+       unsigned long   smpl_pmds[4];   /* which pmds are accessed when counter 
overflow */
+       unsigned long   seed;           /* seed for random-number generator */
+       unsigned long   mask;           /* mask for random-number generator */
+       unsigned int    flags;          /* notify/do not notify */
+       unsigned long   eventid;        /* overflow event identifier */
+} pfm_counter_t;
+
+/*
+ * context flags
+ */
+typedef struct {
+       unsigned int block:1;           /* when 1, task will blocked on user 
notifications */
+       unsigned int system:1;          /* do system wide monitoring */
+       unsigned int using_dbreg:1;     /* using range restrictions (debug 
registers) */
+       unsigned int is_sampling:1;     /* true if using a custom format */
+       unsigned int excl_idle:1;       /* exclude idle task in system wide 
session */
+       unsigned int going_zombie:1;    /* context is zombie (MASKED+blocking) 
*/
+       unsigned int trap_reason:2;     /* reason for going into 
pfm_handle_work() */
+       unsigned int no_msg:1;          /* no message sent on overflow */
+       unsigned int can_restart:1;     /* allowed to issue a PFM_RESTART */
+       unsigned int reserved:22;
+} pfm_context_flags_t;
+
+#define PFM_TRAP_REASON_NONE           0x0     /* default value */
+#define PFM_TRAP_REASON_BLOCK          0x1     /* we need to block on overflow 
*/
+#define PFM_TRAP_REASON_RESET          0x2     /* we need to reset PMDs */
+
+
+/*
+ * perfmon context: encapsulates all the state of a monitoring session
+ */
+
+typedef struct pfm_context {
+       spinlock_t              ctx_lock;               /* context protection */
+
+       pfm_context_flags_t     ctx_flags;              /* bitmask of flags  
(block reason incl.) */
+       unsigned int            ctx_state;              /* state: 
active/inactive (no bitfield) */
+
+       struct task_struct      *ctx_task;              /* task to which 
context is attached */
+
+       unsigned long           ctx_ovfl_regs[4];       /* which registers 
overflowed (notification) */
+
+       struct completion       ctx_restart_done;       /* use for blocking 
notification mode */
+
+       unsigned long           ctx_used_pmds[4];       /* bitmask of PMD used  
          */
+       unsigned long           ctx_all_pmds[4];        /* bitmask of all 
accessible PMDs */
+       unsigned long           ctx_reload_pmds[4];     /* bitmask of force 
reload PMD on ctxsw in */
+
+       unsigned long           ctx_all_pmcs[4];        /* bitmask of all 
accessible PMCs */
+       unsigned long           ctx_reload_pmcs[4];     /* bitmask of force 
reload PMC on ctxsw in */
+       unsigned long           ctx_used_monitors[4];   /* bitmask of monitor 
PMC being used */
+
+       unsigned long           ctx_pmcs[IA64_NUM_PMC_REGS];    /*  saved 
copies of PMC values */
+
+       unsigned int            ctx_used_ibrs[1];               /* bitmask of 
used IBR (speedup ctxsw in) */
+       unsigned int            ctx_used_dbrs[1];               /* bitmask of 
used DBR (speedup ctxsw in) */
+       unsigned long           ctx_dbrs[IA64_NUM_DBG_REGS];    /* DBR values 
(cache) when not loaded */
+       unsigned long           ctx_ibrs[IA64_NUM_DBG_REGS];    /* IBR values 
(cache) when not loaded */
+
+       pfm_counter_t           ctx_pmds[IA64_NUM_PMD_REGS]; /* software state 
for PMDS */
+
+       u64                     ctx_saved_psr_up;       /* only contains psr.up 
value */
+
+       unsigned long           ctx_last_activation;    /* context last 
activation number for last_cpu */
+       unsigned int            ctx_last_cpu;           /* CPU id of current or 
last CPU used (SMP only) */
+       unsigned int            ctx_cpu;                /* cpu to which perfmon 
is applied (system wide) */
+
+       int                     ctx_fd;                 /* file descriptor used 
my this context */
+       pfm_ovfl_arg_t          ctx_ovfl_arg;           /* argument to custom 
buffer format handler */
+
+       pfm_buffer_fmt_t        *ctx_buf_fmt;           /* buffer format 
callbacks */
+       void                    *ctx_smpl_hdr;          /* points to sampling 
buffer header kernel vaddr */
+       unsigned long           ctx_smpl_size;          /* size of sampling 
buffer */
+       void                    *ctx_smpl_vaddr;        /* user level virtual 
address of smpl buffer */
+
+       wait_queue_head_t       ctx_msgq_wait;
+       pfm_msg_t               ctx_msgq[PFM_MAX_MSGS];
+       int                     ctx_msgq_head;
+       int                     ctx_msgq_tail;
+       struct fasync_struct    *ctx_async_queue;
+
+       wait_queue_head_t       ctx_zombieq;            /* termination cleanup 
wait queue */
+} pfm_context_t;
+
+spinlock_t     ctx_lock;               /* context protection */
+unsigned long  ctx_pmcs[IA64_NUM_PMC_REGS];    /*  saved copies of PMC values 
*/
+static pfm_counter_t   ctx_pmds[IA64_NUM_PMD_REGS];    /* software state for 
PMDS */
+static pfm_buffer_fmt_t *ctx_buf_fmt;  /* buffer format callbacks */
+unsigned int   ctx_state;              /* state: active/inactive (no bitfield) 
*/
+unsigned long  ctx_used_pmds[4];       /* bitmask of PMD used            */
+
+/*
+ * magic number used to verify that structure is really
+ * a perfmon context
+ */
+#define PFM_IS_FILE(f)         ((f)->f_op == &pfm_file_ops)
+
+#define PFM_GET_CTX(t)         ((pfm_context_t *)(t)->arch._thread.pfm_context)
+
+#ifdef CONFIG_SMP
+#define SET_LAST_CPU(ctx, v)   (ctx)->ctx_last_cpu = (v)
+#define GET_LAST_CPU(ctx)      (ctx)->ctx_last_cpu
+#else
+#define SET_LAST_CPU(ctx, v)   do {} while(0)
+#define GET_LAST_CPU(ctx)      do {} while(0)
+#endif
+
+
+#define ctx_fl_block           ctx_flags.block
+#define ctx_fl_system          ctx_flags.system
+#define ctx_fl_using_dbreg     ctx_flags.using_dbreg
+#define ctx_fl_is_sampling     ctx_flags.is_sampling
+#define ctx_fl_excl_idle       ctx_flags.excl_idle
+#define ctx_fl_going_zombie    ctx_flags.going_zombie
+#define ctx_fl_trap_reason     ctx_flags.trap_reason
+#define ctx_fl_no_msg          ctx_flags.no_msg
+#define ctx_fl_can_restart     ctx_flags.can_restart
+
+#define PFM_SET_WORK_PENDING(t, v)     do { 
(t)->arch._thread.pfm_needs_checking = v; } while(0);
+#define PFM_GET_WORK_PENDING(t)                
(t)->arch._thread.pfm_needs_checking
+
+/*
+ * global information about all sessions
+ * mostly used to synchronize between system wide and per-process
+ */
+typedef struct {
+       spinlock_t              pfs_lock;                  /* lock the 
structure */
+
+       unsigned int            pfs_task_sessions;         /* number of per 
task sessions */
+       unsigned int            pfs_sys_sessions;          /* number of per 
system wide sessions */
+       unsigned int            pfs_sys_use_dbregs;        /* incremented when 
a system wide session uses debug regs */
+       unsigned int            pfs_ptrace_use_dbregs;     /* incremented when 
a process uses debug regs */
+       struct task_struct      *pfs_sys_session[NR_CPUS]; /* point to task 
owning a system-wide session */
+} pfm_session_t;
+
+/* assume cnum is a valid monitor */
+#define PMC_PM(cnum, val)      (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 
0x1)
+
+/*
+ * PMU specific flags
+ */
+#define PFM_PMU_IRQ_RESEND     1       /* PMU needs explicit IRQ resend */
+
+/*
+ * debug register related type definitions
+ */
+typedef struct {
+       unsigned long ibr_mask:56;
+       unsigned long ibr_plm:4;
+       unsigned long ibr_ig:3;
+       unsigned long ibr_x:1;
+} ibr_mask_reg_t;
+
+typedef struct {
+       unsigned long dbr_mask:56;
+       unsigned long dbr_plm:4;
+       unsigned long dbr_ig:2;
+       unsigned long dbr_w:1;
+       unsigned long dbr_r:1;
+} dbr_mask_reg_t;
+
+typedef union {
+       unsigned long  val;
+       ibr_mask_reg_t ibr;
+       dbr_mask_reg_t dbr;
+} dbreg_t;
+
+
+/*
+ * perfmon command descriptions
+ */
+typedef struct {
+       int             (*cmd_func)(pfm_context_t *ctx, void *arg, int count, 
struct pt_regs *regs);
+       char            *cmd_name;
+       int             cmd_flags;
+       unsigned int    cmd_narg;
+       size_t          cmd_argsize;
+       int             (*cmd_getsize)(void *arg, size_t *sz);
+} pfm_cmd_desc_t;
+
+#define PFM_CMD_FD             0x01    /* command requires a file descriptor */
+#define PFM_CMD_ARG_READ       0x02    /* command must read argument(s) */
+#define PFM_CMD_ARG_RW         0x04    /* command must read/write argument(s) 
*/
+#define PFM_CMD_STOP           0x08    /* command does not work on zombie 
context */
+
+
+#define PFM_CMD_NAME(cmd)      pfm_cmd_tab[(cmd)].cmd_name
+#define PFM_CMD_READ_ARG(cmd)  (pfm_cmd_tab[(cmd)].cmd_flags & 
PFM_CMD_ARG_READ)
+#define PFM_CMD_RW_ARG(cmd)    (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
+#define PFM_CMD_USE_FD(cmd)    (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
+#define PFM_CMD_STOPPED(cmd)   (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
+
+#define PFM_CMD_ARG_MANY       -1 /* cannot be zero */
+
+/*
+ * perfmon internal variables
+ */
+static pfm_session_t           pfm_sessions;   /* global sessions information 
*/
+
+/* static DEFINE_SPINLOCK(pfm_alt_install_check); */
+
+/* static struct proc_dir_entry        *perfmon_dir; */
+/* static pfm_uuid_t           pfm_null_uuid = {0,}; */
+
+static spinlock_t              pfm_buffer_fmt_lock;
+static LIST_HEAD(pfm_buffer_fmt_list);
+
+pmu_config_t           *pmu_conf;
+
+#define pfm_get_cpu_var(v)             __ia64_per_cpu_var(v)
+#define pfm_get_cpu_data(a,b)          per_cpu(a, b)
+
+static int interrupt_count = 0;
+static int ring_count[4] = {0, 0, 0, 0};
+
+static inline void
+pfm_set_task_notify(struct task_struct *task)
+{
+       struct thread_info *info;
+
+       info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
+       set_bit(TIF_NOTIFY_RESUME, &info->flags);
+}
+
+static inline void
+pfm_clear_task_notify(void)
+{
+       clear_thread_flag(TIF_NOTIFY_RESUME);
+}
+
+static inline void
+pfm_reserve_page(unsigned long a)
+{
+/*     SetPageReserved(vmalloc_to_page((void *)a)); */
+}
+static inline void
+pfm_unreserve_page(unsigned long a)
+{
+/*     ClearPageReserved(vmalloc_to_page((void*)a)); */
+}
+
+static inline unsigned long
+pfm_protect_ctx_ctxsw(void)
+{
+       spin_lock(&ctx_lock);
+       return 0UL;
+}
+
+static inline void
+pfm_unprotect_ctx_ctxsw(unsigned long f)
+{
+       spin_unlock(&ctx_lock);
+}
+
+DEFINE_PER_CPU(unsigned long, pfm_syst_info);
+DEFINE_PER_CPU(struct task_struct *, pmu_owner);
+DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
+DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+DEFINE_PER_CPU(pfm_ovfl_arg_t, ovfl_arg);
+
+/*
+ * forward declarations
+ */
+#ifndef CONFIG_SMP
+static void pfm_lazy_save_regs (struct task_struct *ta);
+#endif
+
+void dump_pmu_state(const char *);
+static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int 
count, struct pt_regs *regs);
+
+#include "perfmon_itanium.h"
+#include "perfmon_mckinley.h"
+#include "perfmon_montecito.h"
+#include "perfmon_generic.h"
+
+static pmu_config_t *pmu_confs[]={
+       &pmu_conf_mont,
+       &pmu_conf_mck,
+       &pmu_conf_ita,
+       &pmu_conf_gen, /* must be last */
+       NULL
+};
+
+
+static inline void
+pfm_clear_psr_pp(void)
+{
+       ia64_rsm(IA64_PSR_PP);
+       ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_pp(void)
+{
+       ia64_ssm(IA64_PSR_PP);
+       ia64_srlz_i();
+}
+
+static inline void
+pfm_clear_psr_up(void)
+{
+       ia64_rsm(IA64_PSR_UP);
+       ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_up(void)
+{
+       ia64_ssm(IA64_PSR_UP);
+       ia64_srlz_i();
+}
+
+static inline unsigned long
+pfm_get_psr(void)
+{
+       unsigned long tmp;
+       tmp = ia64_getreg(_IA64_REG_PSR);
+       ia64_srlz_i();
+       return tmp;
+}
+
+static inline void
+pfm_set_psr_l(unsigned long val)
+{
+       ia64_setreg(_IA64_REG_PSR_L, val);
+       ia64_srlz_i();
+}
+
+static inline void
+pfm_freeze_pmu(void)
+{
+       ia64_set_pmc(0,1UL);
+       ia64_srlz_d();
+}
+
+static inline void
+pfm_unfreeze_pmu(void)
+{
+       ia64_set_pmc(0,0UL);
+       ia64_srlz_d();
+}
+
+static inline void
+pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
+{
+       int i;
+
+       for (i=0; i < nibrs; i++) {
+               ia64_set_ibr(i, ibrs[i]);
+               ia64_dv_serialize_instruction();
+       }
+       ia64_srlz_i();
+}
+
+static inline void
+pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
+{
+       int i;
+
+       for (i=0; i < ndbrs; i++) {
+               ia64_set_dbr(i, dbrs[i]);
+               ia64_dv_serialize_data();
+       }
+       ia64_srlz_d();
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline void
+pfm_write_soft_counter(int i, unsigned long val)
+{
+       unsigned long ovfl_val = pmu_conf->ovfl_val;
+
+       ctx_pmds[i].val = val  & ~ovfl_val;
+       /*
+        * writing to unimplemented part is ignore, so we do not need to
+        * mask off top part
+        */
+       ia64_set_pmd(i, val & ovfl_val);
+
+/*     DPRINT_ovfl(("<< ctx_pmd[%d] pmd=0x%lx ovfl_val=0x%lx\n", */
+/*                  i, */
+/*                  ia64_get_pmd(i) & ovfl_val, */
+/*                  ovfl_val)); */
+}
+
+static void
+pfm_context_free(pfm_context_t *ctx)
+{
+       if (ctx) {
+               DPRINT(("free ctx @%p\n", ctx));
+               kfree(ctx);
+       }
+}
+
+static inline void
+pfm_save_pmds(unsigned long *pmds, unsigned long mask)
+{
+       int i;
+
+       ia64_srlz_d();
+
+       for (i=0; mask; i++, mask>>=1) {
+               if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
+       }
+}
+
+/*
+ * reload from thread state (used for ctxw only)
+ */
+static inline void
+pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
+{
+       int i;
+       unsigned long val, ovfl_val = pmu_conf->ovfl_val;
+
+       for (i=0; mask; i++, mask>>=1) {
+               if ((mask & 0x1) == 0) continue;
+               val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
+               ia64_set_pmd(i, val);
+       }
+       ia64_srlz_d();
+}
+
+/*
+ * propagate PMD from context to thread-state
+ */
+static inline void
+pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
+{
+       struct thread_struct *thread = &task->arch._thread;
+       unsigned long ovfl_val = pmu_conf->ovfl_val;
+       unsigned long mask = ctx->ctx_all_pmds[0];
+       unsigned long val;
+       int i;
+
+       DPRINT(("mask=0x%lx\n", mask));
+
+       for (i=0; mask; i++, mask>>=1) {
+
+               val = ctx->ctx_pmds[i].val;
+
+               /*
+                * We break up the 64 bit value into 2 pieces
+                * the lower bits go to the machine state in the
+                * thread (will be reloaded on ctxsw in).
+                * The upper part stays in the soft-counter.
+                */
+               if (PMD_IS_COUNTING(i)) {
+                       ctx->ctx_pmds[i].val = val & ~ovfl_val;
+                        val &= ovfl_val;
+               }
+               thread->pmds[i] = val;
+
+               DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
+                       i,
+                       thread->pmds[i],
+                       ctx->ctx_pmds[i].val));
+       }
+}
+
+/*
+ * propagate PMC from context to thread-state
+ */
+static inline void
+pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
+{
+       struct thread_struct *thread = &task->arch._thread;
+       unsigned long mask = ctx->ctx_all_pmcs[0];
+       int i;
+
+       DPRINT(("mask=0x%lx\n", mask));
+
+       for (i=0; mask; i++, mask>>=1) {
+               /* masking 0 with ovfl_val yields 0 */
+               thread->pmcs[i] = ctx->ctx_pmcs[i];
+               DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
+       }
+}
+
+
+
+static inline void
+pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
+{
+       int i;
+
+       for (i=0; mask; i++, mask>>=1) {
+               if ((mask & 0x1) == 0) continue;
+               ia64_set_pmc(i, pmcs[i]);
+               printk("pfm_restore_pmcs: pmc[%d]: %lx\n", i, pmcs[i]);
+       }
+       ia64_srlz_d();
+}
+
+static inline int
+pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
+{
+       return memcmp(a, b, sizeof(pfm_uuid_t));
+}
+
+static inline int
+pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, 
struct pt_regs *regs)
+{
+       int ret = 0;
+       if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
+       return ret;
+}
+
+static inline int
+pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned 
int flags, int cpu, void *arg, unsigned long *size)
+{
+       int ret = 0;
+       if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, 
size);
+       return ret;
+}
+
+
+static inline int
+pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned 
int flags,
+                    int cpu, void *arg)
+{
+       int ret = 0;
+       if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, 
arg);
+       return ret;
+}
+
+static inline int
+pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, 
unsigned int flags,
+                    int cpu, void *arg)
+{
+       int ret = 0;
+       if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
+       return ret;
+}
+
+static inline int
+pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, 
pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+       int ret = 0;
+       if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
+       return ret;
+}
+
+static inline int
+pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, 
pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+       int ret = 0;
+       if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, 
ctrl, buf, regs);
+       return ret;
+}
+
+static pfm_buffer_fmt_t *
+__pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+       struct list_head * pos;
+       pfm_buffer_fmt_t * entry;
+
+       list_for_each(pos, &pfm_buffer_fmt_list) {
+               entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
+               if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
+                       return entry;
+       }
+       return NULL;
+}
+ 
+int
+pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
+{
+       int ret = 0;
+
+       /* some sanity checks */
+       if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
+
+       /* we need at least a handler */
+       if (fmt->fmt_handler == NULL) return -EINVAL;
+
+       /*
+        * XXX: need check validity of fmt_arg_size
+        */
+
+       spin_lock(&pfm_buffer_fmt_lock);
+
+       ctx_buf_fmt = fmt;
+
+       if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
+               printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", 
fmt->fmt_name);
+               ret = -EBUSY;
+               goto out;
+       } 
+       list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
+       printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
+
+out:
+       spin_unlock(&pfm_buffer_fmt_lock);
+       return ret;
+}
+EXPORT_SYMBOL(pfm_register_buffer_fmt);
+
+int
+pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
+{
+       pfm_buffer_fmt_t *fmt;
+       int ret = 0;
+
+       spin_lock(&pfm_buffer_fmt_lock);
+
+       fmt = __pfm_find_buffer_fmt(uuid);
+       if (!fmt) {
+               printk(KERN_ERR "perfmon: cannot unregister format, not 
found\n");
+               ret = -EINVAL;
+               goto out;
+       }
+       list_del_init(&fmt->fmt_list);
+       printk(KERN_INFO "perfmon: removed sampling format: %s\n", 
fmt->fmt_name);
+
+out:
+       spin_unlock(&pfm_buffer_fmt_lock);
+       return ret;
+
+}
+EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
+
+extern void update_pal_halt_status(int);
+
+static int
+pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
+{
+       unsigned long flags;
+       /*
+        * validy checks on cpu_mask have been done upstream
+        */
+       LOCK_PFS(flags);
+
+       DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d 
cpu=%u\n",
+               pfm_sessions.pfs_sys_sessions,
+               pfm_sessions.pfs_task_sessions,
+               pfm_sessions.pfs_sys_use_dbregs,
+               is_syswide,
+               cpu));
+
+       if (is_syswide) {
+               /*
+                * cannot mix system wide and per-task sessions
+                */
+               if (pfm_sessions.pfs_task_sessions > 0UL) {
+                       DPRINT(("system wide not possible, %u conflicting 
task_sessions\n",
+                               pfm_sessions.pfs_task_sessions));
+                       goto abort;
+               }
+
+               if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
+
+               DPRINT(("reserving system wide session on CPU%u currently on 
CPU%u\n", cpu, smp_processor_id()));
+
+               pfm_sessions.pfs_sys_session[cpu] = task;
+
+               pfm_sessions.pfs_sys_sessions++ ;
+
+       } else {
+               if (pfm_sessions.pfs_sys_sessions) goto abort;
+               pfm_sessions.pfs_task_sessions++;
+       }
+
+       DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d 
cpu=%u\n",
+               pfm_sessions.pfs_sys_sessions,
+               pfm_sessions.pfs_task_sessions,
+               pfm_sessions.pfs_sys_use_dbregs,
+               is_syswide,
+               cpu));
+
+       /*
+        * disable default_idle() to go to PAL_HALT
+        */
+       update_pal_halt_status(0);
+
+       UNLOCK_PFS(flags);
+
+       return 0;
+
+error_conflict:
+       DPRINT(("system wide not possible, conflicting session on CPU%d\n",
+               cpu));
+abort:
+       UNLOCK_PFS(flags);
+
+       return -EBUSY;
+
+}
+
+static int
+pfm_unreserve_session(int is_syswide, unsigned int cpu)
+{
+       unsigned long flags;
+       /*
+        * validy checks on cpu_mask have been done upstream
+        */
+       LOCK_PFS(flags);
+
+       DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d 
cpu=%u\n",
+               pfm_sessions.pfs_sys_sessions,
+               pfm_sessions.pfs_task_sessions,
+               pfm_sessions.pfs_sys_use_dbregs,
+               is_syswide,
+               cpu));
+
+
+       if (is_syswide) {
+               pfm_sessions.pfs_sys_session[cpu] = NULL;
+               /*
+                * would not work with perfmon+more than one bit in cpu_mask
+                */
+/*             if (ctx && ctx->ctx_fl_using_dbreg) { */
+/*                     if (pfm_sessions.pfs_sys_use_dbregs == 0) { */
+/*                             printk(KERN_ERR "perfmon: invalid release for 
ctx %p sys_use_dbregs=0\n", ctx); */
+/*                     } else { */
+/*                             pfm_sessions.pfs_sys_use_dbregs--; */
+/*                     } */
+/*             } */
+               pfm_sessions.pfs_sys_sessions--;
+       } else {
+               pfm_sessions.pfs_task_sessions--;
+       }
+       DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d 
cpu=%u\n",
+               pfm_sessions.pfs_sys_sessions,
+               pfm_sessions.pfs_task_sessions,
+               pfm_sessions.pfs_sys_use_dbregs,
+               is_syswide,
+               cpu));
+
+       /*
+        * if possible, enable default_idle() to go into PAL_HALT
+        */
+       if (pfm_sessions.pfs_task_sessions == 0 && 
pfm_sessions.pfs_sys_sessions == 0)
+               update_pal_halt_status(1);
+
+       UNLOCK_PFS(flags);
+
+       return 0;
+}
+
+#if 0  /* FIX ME */
+/*
+ * XXX: do something better here
+ */
+static int
+pfm_bad_permissions(struct task_struct *task)
+{
+       /* inspired by ptrace_attach() */
+       DPRINT(("cur: id=%d euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
+               current->vcpu_id,
+               task->euid,
+               task->suid,
+               task->uid,
+               task->egid,
+               task->sgid));
+
+       return ((current->uid != task->euid)
+           || (current->uid != task->suid)
+           || (current->uid != task->uid)
+           || (current->gid != task->egid)
+           || (current->gid != task->sgid)
+           || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
+}
+#endif /* FIX ME */
+
+static inline unsigned long
+pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
+{
+       unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
+
+       reg->lval = val;
+       return val;
+}
+
+static void
+pfm_reset_regs(unsigned long *ovfl_regs, int is_long_reset)
+{
+       unsigned long mask = ovfl_regs[0];
+       unsigned long reset_others = 0UL;
+       unsigned long val;
+       int i;
+
+/*     DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], 
is_long_reset)); */
+
+       /*
+        * now restore reset value on sampling overflowed counters
+        */
+       mask >>= PMU_FIRST_COUNTER;
+       for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
+
+               if ((mask & 0x1UL) == 0UL) continue;
+
+               val           = pfm_new_counter_value(ctx_pmds+ i, 
is_long_reset);
+               reset_others |= ctx_pmds[i].reset_pmds[0];
+
+/*             DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? 
"long" : "short", i, val)); */
+
+               pfm_write_soft_counter(i, val);
+       }
+
+       /*
+        * Now take care of resetting the other registers
+        */
+       for(i = 0; reset_others; i++, reset_others >>= 1) {
+
+               if ((reset_others & 0x1) == 0) continue;
+
+               val = pfm_new_counter_value(ctx_pmds + i, is_long_reset);
+
+               if (PMD_IS_COUNTING(i)) {
+                       pfm_write_soft_counter(i, val);
+               } else {
+                       ia64_set_pmd(i, val);
+               }
+               DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
+                         is_long_reset ? "long" : "short", i, val));
+       }
+       ia64_srlz_d();
+}
+
+static int
+pfm_write_pmcs(pfarg_reg_t *req, int count, struct pt_regs *regs)
+{
+       unsigned long value, pmc_pm;
+       unsigned long smpl_pmds, reset_pmds, impl_pmds;
+       unsigned int cnum, reg_flags, flags, pmc_type;
+       int i, can_access_pmu = 0, is_loaded, is_system;
+       int is_monitor, is_counting;
+       int ret = -EINVAL;
+       pfm_reg_check_t wr_func;
+
+       impl_pmds = pmu_conf->impl_pmds[0];
+/*     is_loaded = ctx_state == PFM_CTX_LOADED ? 1 : 0; */
+       is_loaded = 1;
+       is_system = 1;
+
+       /*
+        * In system wide and when the context is loaded, access can only happen
+        * when the caller is running on the CPU being monitored by the session.
+        * It does not have to be the owner (ctx_task) of the context per se.
+        */
+       can_access_pmu = 1;
+       
+
+       for (i = 0; i < count; i++, req++) {
+
+               cnum       = req->reg_num;
+               reg_flags  = req->reg_flags;
+               value      = req->reg_value;
+               smpl_pmds  = req->reg_smpl_pmds[0];
+               reset_pmds = req->reg_reset_pmds[0];
+               flags      = 0;
+
+
+               if (cnum >= PMU_MAX_PMCS) {
+                       DPRINT(("pmc%u is invalid\n", cnum));
+                       goto error;
+               }
+
+               pmc_type   = pmu_conf->pmc_desc[cnum].type;
+               pmc_pm     = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
+               is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING 
? 1 : 0;
+               is_monitor  = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 
1 : 0;
+
+               /*
+                * we reject all non implemented PMC as well
+                * as attempts to modify PMC[0-3] which are used
+                * as status registers by the PMU
+                */
+               if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & 
PFM_REG_CONTROL) == PFM_REG_CONTROL) {
+                       DPRINT(("pmc%u is unimplemented or no-access 
pmc_type=%x\n", cnum, pmc_type));
+                       goto error;
+               }
+               wr_func = pmu_conf->pmc_desc[cnum].write_check;
+               /*
+                * If the PMC is a monitor, then if the value is not the 
default:
+                *      - system-wide session: PMCx.pm=1 (privileged monitor)
+                *      - per-task           : PMCx.pm=0 (user monitor)
+                */
+               if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ 
pmc_pm) {
+                       DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
+                               cnum,
+                               pmc_pm,
+                               is_system));
+/*                     goto error; */
+               }
+
+               if (is_counting) {
+                       /*
+                        * enforce generation of overflow interrupt. Necessary 
on all
+                        * CPUs.
+                        */
+                       value |= 1 << PMU_PMC_OI;
+
+                       if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
+                               flags |= PFM_REGFL_OVFL_NOTIFY;
+                       }
+
+                       if (reg_flags & PFM_REGFL_RANDOM) flags |= 
PFM_REGFL_RANDOM;
+
+                       /* verify validity of smpl_pmds */
+                       if ((smpl_pmds & impl_pmds) != smpl_pmds) {
+                               DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", 
smpl_pmds, cnum));
+                               goto error;
+                       }
+
+                       /* verify validity of reset_pmds */
+                       if ((reset_pmds & impl_pmds) != reset_pmds) {
+                               DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", 
reset_pmds, cnum));
+                               goto error;
+                       }
+               } else {
+                       if (reg_flags & 
(PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
+                               DPRINT(("cannot set ovfl_notify or random on 
pmc%u\n", cnum));
+                               goto error;
+                       }
+                       /* eventid on non-counting monitors are ignored */
+               }
+
+               /*
+                * execute write checker, if any
+                */
+               if (likely(wr_func != NULL)) {
+                       ret = (*wr_func)(NULL, NULL, cnum, &value, regs);
+                       printk("write_check: %d\n", ret);
+                       if (ret) goto error;
+                       ret = -EINVAL;
+               }
+
+               /*
+                * no error on this register
+                */
+               PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+               /*
+                * Now we commit the changes to the software state
+                */
+
+               /*
+                * update overflow information
+                */
+               if (is_counting) {
+                       /*
+                        * full flag update each time a register is programmed
+                        */
+                       ctx_pmds[cnum].flags = flags;
+
+                       ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
+                       ctx_pmds[cnum].smpl_pmds[0]  = smpl_pmds;
+                       ctx_pmds[cnum].eventid       = req->reg_smpl_eventid;
+
+                       /*
+                        * Mark all PMDS to be accessed as used.
+                        *
+                        * We do not keep track of PMC because we have to
+                        * systematically restore ALL of them.
+                        *
+                        * We do not update the used_monitors mask, because
+                        * if we have not programmed them, then will be in
+                        * a quiescent state, therefore we will not need to
+                        * mask/restore then when context is MASKED.
+                        */
+                       CTX_USED_PMD(reset_pmds);
+                       CTX_USED_PMD(smpl_pmds);
+                       /*
+                        * make sure we do not try to reset on
+                        * restart because we have established new values
+                        */
+               }
+               /*
+                * Needed in case the user does not initialize the equivalent
+                * PMD. Clearing is done indirectly via pfm_reset_pmu_state() 
so there is no
+                * possible leak here.
+                */
+               CTX_USED_PMD(pmu_conf->pmc_desc[cnum].dep_pmd[0]);
+
+               /*
+                * keep track of the monitor PMC that we are using.
+                * we save the value of the pmc in ctx_pmcs[] and if
+                * the monitoring is not stopped for the context we also
+                * place it in the saved state area so that it will be
+                * picked up later by the context switch code.
+                *
+                * The value in ctx_pmcs[] can only be changed in 
pfm_write_pmcs().
+                *
+                * The value in thread->pmcs[] may be modified on overflow, 
i.e.,  when
+                * monitoring needs to be stopped.
+                */
+/*             if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); */
+
+               /*
+                * update context state
+                */
+               ctx_pmcs[cnum] = value;
+
+               if (is_loaded) {
+                       /*
+                        * write hardware register if we can
+                        */
+                       if (can_access_pmu) {
+                               u64 pmc;
+                               printk("ia64_set_pmc: pmc[%d]: %lx\n", cnum, 
value);
+                               ia64_set_pmc(cnum, value);
+
+                               pmc = ia64_get_pmc(cnum);
+                               printk("ia64_get_pmc: pmc[%d]: %lx\n", cnum, 
pmc);
+                       }
+#ifdef CONFIG_SMP
+                       else {
+                               /*
+                                * per-task SMP only here
+                                *
+                                * we are guaranteed that the task is not 
running on the other CPU,
+                                * we indicate that this PMD will need to be 
reloaded if the task
+                                * is rescheduled on the CPU it ran last on.
+                                */
+/*                             ctx->ctx_reload_pmcs[0] |= 1UL << cnum; */
+                       }
+#endif
+               }
+
+               DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x  eventid=%ld\n 
smpl_pmds=0x%lx reset_pmds=0x%lx\n",
+                         cnum,
+                         value,
+                         is_loaded,
+                         can_access_pmu,
+                         flags,
+                         ctx_pmds[cnum].eventid,
+                         smpl_pmds,
+                         reset_pmds));
+       }
+
+       /*
+        * make sure the changes are visible
+        */
+       if (can_access_pmu) ia64_srlz_d();
+
+       return 0;
+error:
+       PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+       return ret;
+}
+
+static int
+pfm_write_pmds(pfarg_reg_t *req, int count, struct pt_regs *regs)
+{
+       unsigned long value, hw_value, ovfl_mask;
+       unsigned int cnum;
+       int i, can_access_pmu = 0;
+       int is_counting, is_loaded;
+       int ret = -EINVAL;
+       pfm_reg_check_t wr_func;
+
+       is_loaded = 1;
+       ovfl_mask = pmu_conf->ovfl_val;
+
+       /*
+        * on both UP and SMP, we can only write to the PMC when the task is
+        * the owner of the local PMU.
+        */
+       if (likely(is_loaded)) {
+               /*
+                * In system wide and when the context is loaded, access can 
only happen
+                * when the caller is running on the CPU being monitored by the 
session.
+                * It does not have to be the owner (ctx_task) of the context 
per se.
+                */
+/*             if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) 
{ */
+/*                     DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 
*/
+/*                     return -EBUSY; */
+/*             } */
+               can_access_pmu = 1;
+       }
+
+       for (i = 0; i < count; i++, req++) {
+
+               cnum  = req->reg_num;
+               value = req->reg_value;
+               DPRINT(("pmd[%u]: %lx\n", cnum, value));
+
+               if (!PMD_IS_IMPL(cnum)) {
+                       DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
+                       goto abort_mission;
+               }
+               is_counting = PMD_IS_COUNTING(cnum);
+               wr_func     = pmu_conf->pmd_desc[cnum].write_check;
+
+               /*
+                * execute write checker, if any
+                */
+/*             if (unlikely(expert_mode == 0 && wr_func)) { */
+/*                     unsigned long v = value; */
+
+/*                     ret = (*wr_func)(task, ctx, cnum, &v, regs); */
+/*                     if (ret) goto abort_mission; */
+
+/*                     value = v; */
+/*                     ret   = -EINVAL; */
+/*             } */
+
+
+               /*
+                * no error on this register
+                */
+               PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+               /*
+                * now commit changes to software state
+                */
+               hw_value = value;
+
+               /*
+                * update virtualized (64bits) counter
+                */
+               if (is_counting) {
+                       /*
+                        * write context state
+                        */
+                       ctx_pmds[cnum].lval = value;
+
+                       /*
+                        * when context is load we use the split value
+                        */
+                       if (is_loaded) {
+                               hw_value = value &  ovfl_mask;
+                               value    = value & ~ovfl_mask;
+                       }
+               }
+
+               /*
+                * update reset values (not just for counters)
+                */
+               ctx_pmds[cnum].long_reset  = req->reg_long_reset;
+               ctx_pmds[cnum].short_reset = req->reg_short_reset;
+
+               /*
+                * update randomization parameters (not just for counters)
+                */
+               ctx_pmds[cnum].seed = req->reg_random_seed;
+               ctx_pmds[cnum].mask = req->reg_random_mask;
+
+               /*
+                * update context value
+                */
+               ctx_pmds[cnum].val  = value;
+
+               /*
+                * Keep track of what we use
+                *
+                * We do not keep track of PMC because we have to
+                * systematically restore ALL of them.
+                */
+               CTX_USED_PMD(PMD_PMD_DEP(cnum));
+
+               /*
+                * mark this PMD register used as well
+                */
+               CTX_USED_PMD(RDEP(cnum));
+
+               /*
+                * make sure we do not try to reset on
+                * restart because we have established new values
+                */
+               if (is_loaded) {
+                       /*
+                        * write hardware register if we can
+                        */
+                       if (can_access_pmu) {
+                               ia64_set_pmd(cnum, hw_value);
+                       } else {
+#ifdef CONFIG_SMP
+                               /*
+                                * we are guaranteed that the task is not 
running on the other CPU,
+                                * we indicate that this PMD will need to be 
reloaded if the task
+                                * is rescheduled on the CPU it ran last on.
+                                */
+/*                             ctx->ctx_reload_pmds[0] |= 1UL << cnum; */
+#endif
+                       }
+               }
+
+/*             DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx 
ctx_pmd=0x%lx  short_reset=0x%lx " */
+/*                       "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx 
used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx 
ovfl_regs=0x%lx\n", */
+/*                     cnum, */
+/*                     value, */
+/*                     is_loaded, */
+/*                     can_access_pmu, */
+/*                     hw_value, */
+/*                     ctx->ctx_pmds[cnum].val, */
+/*                     ctx->ctx_pmds[cnum].short_reset, */
+/*                     ctx->ctx_pmds[cnum].long_reset, */
+/*                     PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', */
+/*                     ctx->ctx_pmds[cnum].seed, */
+/*                     ctx->ctx_pmds[cnum].mask, */
+/*                     ctx->ctx_used_pmds[0], */
+/*                     ctx->ctx_pmds[cnum].reset_pmds[0], */
+/*                     ctx->ctx_reload_pmds[0], */
+/*                     ctx->ctx_all_pmds[0], */
+/*                     ctx->ctx_ovfl_regs[0])); */
+       }
+
+       /*
+        * make changes visible
+        */
+       if (can_access_pmu) ia64_srlz_d();
+
+       return 0;
+
+abort_mission:
+       /*
+        * for now, we have only one possibility for error
+        */
+       PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+       return ret;
+}
+
+/*
+ * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this 
function.
+ * Therefore we know, we do not have to worry about the PMU overflow 
interrupt. If an
+ * interrupt is delivered during the call, it will be kept pending until we 
leave, making
+ * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least 
we are
+ * guaranteed to return consistent data to the user, it may simply be old. It 
is not
+ * trivial to treat the overflow while inside the call because you may end up 
in
+ * some module sampling buffer code causing deadlocks.
+ */
+static int
+pfm_read_pmds(pfm_context_t *ctx, pfarg_reg_t *req, int count, struct pt_regs 
*regs)
+{
+       struct thread_struct *thread = NULL;
+       struct task_struct *task;
+       unsigned long val = 0UL, lval, ovfl_mask, sval;
+       unsigned int cnum, reg_flags = 0;
+       int i, can_access_pmu = 0, state;
+       int is_loaded, is_system, is_counting, expert_mode = 0;
+       int ret = -EINVAL;
+       pfm_reg_check_t rd_func;
+
+       /*
+        * access is possible when loaded only for
+        * self-monitoring tasks or in UP mode
+        */
+
+       state     = ctx->ctx_state;
+       is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+       is_system = ctx->ctx_fl_system;
+       ovfl_mask = pmu_conf->ovfl_val;
+       task      = ctx->ctx_task;
+
+       if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+       if (likely(is_loaded)) {
+               thread = &task->arch._thread;
+               /*
+                * In system wide and when the context is loaded, access can 
only happen
+                * when the caller is running on the CPU being monitored by the 
session.
+                * It does not have to be the owner (ctx_task) of the context 
per se.
+                */
+               if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+                       DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+                       return -EBUSY;
+               }
+               /*
+                * this can be true when not self-monitoring only in UP
+                */
+               can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+
+               if (can_access_pmu) ia64_srlz_d();
+       }
+
+       DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
+               is_loaded,
+               can_access_pmu,
+               state));
+
+       /*
+        * on both UP and SMP, we can only read the PMD from the hardware 
register when
+        * the task is the owner of the local PMU.
+        */
+
+       for (i = 0; i < count; i++, req++) {
+
+               cnum        = req->reg_num;
+               reg_flags   = req->reg_flags;
+
+               if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
+               /*
+                * we can only read the register that we use. That includes
+                * the one we explicitely initialize AND the one we want 
included
+                * in the sampling buffer (smpl_regs).
+                *
+                * Having this restriction allows optimization in the ctxsw 
routine
+                * without compromising security (leaks)
+                */
+               if (unlikely(!CTX_IS_USED_PMD(cnum))) goto error;
+
+               sval        = ctx->ctx_pmds[cnum].val;
+               lval        = ctx->ctx_pmds[cnum].lval;
+               is_counting = PMD_IS_COUNTING(cnum);
+
+               /*
+                * If the task is not the current one, then we check if the
+                * PMU state is still in the local live register due to lazy 
ctxsw.
+                * If true, then we read directly from the registers.
+                */
+               if (can_access_pmu){
+                       val = ia64_get_pmd(cnum);
+               } else {
+                       /*
+                        * context has been saved
+                        * if context is zombie, then task does not exist 
anymore.
+                        * In this case, we use the full value saved in the 
context (pfm_flush_regs()).
+                        */
+                       val = is_loaded ? thread->pmds[cnum] : 0UL;
+               }
+               rd_func = pmu_conf->pmd_desc[cnum].read_check;
+
+               if (is_counting) {
+                       /*
+                        * XXX: need to check for overflow when loaded
+                        */
+                       val &= ovfl_mask;
+                       val += sval;
+               }
+
+               /*
+                * execute read checker, if any
+                */
+               if (unlikely(expert_mode == 0 && rd_func)) {
+                       unsigned long v = val;
+                       ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
+                       if (ret) goto error;
+                       val = v;
+                       ret = -EINVAL;
+               }
+
+               PFM_REG_RETFLAG_SET(reg_flags, 0);
+
+               DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
+
+               /*
+                * update register return value, abort all if problem during 
copy.
+                * we only modify the reg_flags field. no check mode is fine 
because
+                * access has been verified upfront in sys_perfmonctl().
+                */
+               req->reg_value            = val;
+               req->reg_flags            = reg_flags;
+               req->reg_last_reset_val   = lval;
+       }
+
+       return 0;
+
+error:
+       PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+       return ret;
+}
+
+/*
+ * Only call this function when a process it trying to
+ * write the debug registers (reading is always allowed)
+ */
+int
+pfm_use_debug_registers(struct task_struct *task)
+{
+       pfm_context_t *ctx = task->arch._thread.pfm_context;
+       unsigned long flags;
+       int ret = 0;
+
+       if (pmu_conf->use_rr_dbregs == 0) return 0;
+
+       DPRINT(("called for [%d]\n", task->vcpu_id));
+
+       /*
+        * do it only once
+        */
+       if (task->arch._thread.flags & IA64_THREAD_DBG_VALID) return 0;
+
+       /*
+        * Even on SMP, we do not need to use an atomic here because
+        * the only way in is via ptrace() and this is possible only when the
+        * process is stopped. Even in the case where the ctxsw out is not 
totally
+        * completed by the time we come here, there is no way the 'stopped' 
process
+        * could be in the middle of fiddling with the pfm_write_ibr_dbr() 
routine.
+        * So this is always safe.
+        */
+       if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
+
+       LOCK_PFS(flags);
+
+       /*
+        * We cannot allow setting breakpoints when system wide monitoring
+        * sessions are using the debug registers.
+        */
+       if (pfm_sessions.pfs_sys_use_dbregs> 0)
+               ret = -1;
+       else
+               pfm_sessions.pfs_ptrace_use_dbregs++;
+
+       DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
+                 pfm_sessions.pfs_ptrace_use_dbregs,
+                 pfm_sessions.pfs_sys_use_dbregs,
+                 task->vcpu_id, ret));
+
+       UNLOCK_PFS(flags);
+
+       return ret;
+}
+
+/*
+ * This function is called for every task that exits with the
+ * IA64_THREAD_DBG_VALID set. This indicates a task which was
+ * able to use the debug registers for debugging purposes via
+ * ptrace(). Therefore we know it was not using them for
+ * perfmormance monitoring, so we only decrement the number
+ * of "ptraced" debug register users to keep the count up to date
+ */
+int
+pfm_release_debug_registers(struct task_struct *task)
+{
+       unsigned long flags;
+       int ret;
+
+       if (pmu_conf->use_rr_dbregs == 0) return 0;
+
+       LOCK_PFS(flags);
+       if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
+               printk(KERN_ERR "perfmon: invalid release for [%d] 
ptrace_use_dbregs=0\n", task->vcpu_id);
+               ret = -1;
+       }  else {
+               pfm_sessions.pfs_ptrace_use_dbregs--;
+               ret = 0;
+       }
+       UNLOCK_PFS(flags);
+
+       return ret;
+}
+
+static int
+pfm_restart(pfm_context_t *ctx, struct pt_regs *regs)
+{
+       struct task_struct *task;
+       pfm_buffer_fmt_t *fmt;
+       pfm_ovfl_ctrl_t rst_ctrl;
+       int state, is_system;
+       int ret = 0;
+
+       state     = ctx->ctx_state;
+       fmt       = ctx_buf_fmt;
+       is_system = ctx->ctx_fl_system;
+       task      = PFM_CTX_TASK(ctx);
+
+       switch(state) {
+               case PFM_CTX_MASKED:
+                       break;
+               case PFM_CTX_LOADED: 
+                       if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
+                       /* fall through */
+               case PFM_CTX_UNLOADED:
+               case PFM_CTX_ZOMBIE:
+                       DPRINT(("invalid state=%d\n", state));
+                       return -EBUSY;
+               default:
+                       DPRINT(("state=%d, cannot operate (no active_restart 
handler)\n", state));
+                       return -EINVAL;
+       }
+
+       /*
+        * In system wide and when the context is loaded, access can only happen
+        * when the caller is running on the CPU being monitored by the session.
+        * It does not have to be the owner (ctx_task) of the context per se.
+        */
+       if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+               DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+               return -EBUSY;
+       }
+
+       /* sanity check */
+       if (unlikely(task == NULL)) {
+               printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", 
current->vcpu_id);
+               return -EINVAL;
+       }
+
+       if (task == current || is_system) {
+
+               fmt = ctx->ctx_buf_fmt;
+
+               DPRINT(("restarting self %d ovfl=0x%lx\n",
+                       task->vcpu_id,
+                       ctx->ctx_ovfl_regs[0]));
+
+               if (CTX_HAS_SMPL(ctx)) {
+
+                       prefetch(ctx->ctx_smpl_hdr);
+
+                       rst_ctrl.bits.mask_monitoring = 0;
+                       rst_ctrl.bits.reset_ovfl_pmds = 0;
+
+                       if (state == PFM_CTX_LOADED)
+                               ret = pfm_buf_fmt_restart_active(fmt, task, 
&rst_ctrl, ctx->ctx_smpl_hdr, regs);
+                       else
+                               ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, 
ctx->ctx_smpl_hdr, regs);
+               } else {
+                       rst_ctrl.bits.mask_monitoring = 0;
+                       rst_ctrl.bits.reset_ovfl_pmds = 1;
+               }
+
+               if (ret == 0) {
+                       if (rst_ctrl.bits.reset_ovfl_pmds)
+                               pfm_reset_regs(ctx->ctx_ovfl_regs, 
PFM_PMD_LONG_RESET);
+
+                       if (rst_ctrl.bits.mask_monitoring == 0) {
+                               DPRINT(("resuming monitoring for [%d]\n", 
task->vcpu_id));
+
+                       } else {
+                               DPRINT(("keeping monitoring stopped for 
[%d]\n", task->vcpu_id));
+
+                               // cannot use pfm_stop_monitoring(task, regs);
+                       }
+               }
+               /*
+                * clear overflowed PMD mask to remove any stale information
+                */
+               ctx->ctx_ovfl_regs[0] = 0UL;
+
+               /*
+                * back to LOADED state
+                */
+               ctx->ctx_state = PFM_CTX_LOADED;
+
+               /*
+                * XXX: not really useful for self monitoring
+                */
+               ctx->ctx_fl_can_restart = 0;
+
+               return 0;
+       }
+
+       return 0;
+}
+
+/*
+ * arg can be NULL and count can be zero for this function
+ */
+static int
+pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct 
pt_regs *regs)
+{
+       pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
+       unsigned long flags;
+       dbreg_t dbreg;
+       unsigned int rnum;
+       int first_time;
+       int ret = 0, state;
+       int i, can_access_pmu = 0;
+       int is_system, is_loaded;
+
+       if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
+
+       state     = ctx->ctx_state;
+       is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+       is_system = 1;
+
+       if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+       /*
+        * on both UP and SMP, we can only write to the PMC when the task is
+        * the owner of the local PMU.
+        */
+       if (is_loaded) {
+               /*
+                * In system wide and when the context is loaded, access can 
only happen
+                * when the caller is running on the CPU being monitored by the 
session.
+                * It does not have to be the owner (ctx_task) of the context 
per se.
+                */
+               if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+                       DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+                       return -EBUSY;
+               }
+               can_access_pmu = 1;
+       }
+
+       /*
+        * we do not need to check for ipsr.db because we do clear ibr.x, 
dbr.r, and dbr.w
+        * ensuring that no real breakpoint can be installed via this call.
+        *
+        * IMPORTANT: regs can be NULL in this function
+        */
+
+       first_time = ctx->ctx_fl_using_dbreg == 0;
+
+       /*
+        * don't bother if we are loaded and task is being debugged
+        */
+/*     if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { */
+/*             DPRINT(("debug registers already in use for [%d]\n", 
task->vcpu_id)); */
+/*             return -EBUSY; */
+/*     } */
+
+       /*
+        * check for debug registers in system wide mode
+        *
+        * If though a check is done in pfm_context_load(),
+        * we must repeat it here, in case the registers are
+        * written after the context is loaded
+        */
+       if (is_loaded) {
+               LOCK_PFS(flags);
+
+               if (first_time && is_system) {
+                       if (pfm_sessions.pfs_ptrace_use_dbregs)
+                               ret = -EBUSY;
+                       else
+                               pfm_sessions.pfs_sys_use_dbregs++;
+               }
+               UNLOCK_PFS(flags);
+       }
+
+       if (ret != 0) return ret;
+
+       /*
+        * mark ourself as user of the debug registers for
+        * perfmon purposes.
+        */
+/*     ctx->ctx_fl_using_dbreg = 1; */
+
+       /*
+        * clear hardware registers to make sure we don't
+        * pick up stale state.
+        *
+        * for a system wide session, we do not use
+        * thread.dbr, thread.ibr because this process
+        * never leaves the current CPU and the state
+        * is shared by all processes running on it
+        */
+       if (first_time && can_access_pmu) {
+               DPRINT(("clearing ibrs, dbrs\n"));
+               for (i=0; i < pmu_conf->num_ibrs; i++) {
+                       ia64_set_ibr(i, 0UL);
+                       ia64_dv_serialize_instruction();
+               }
+               ia64_srlz_i();
+               for (i=0; i < pmu_conf->num_dbrs; i++) {
+                       ia64_set_dbr(i, 0UL);
+                       ia64_dv_serialize_data();
+               }
+               ia64_srlz_d();
+       }
+
+       /*
+        * Now install the values into the registers
+        */
+       for (i = 0; i < count; i++, req++) {
+
+               rnum      = req->dbreg_num;
+               dbreg.val = req->dbreg_value;
+
+               ret = -EINVAL;
+
+               if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == 
PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
+                       DPRINT(("invalid register %u val=0x%lx mode=%d i=%d 
count=%d\n",
+                                 rnum, dbreg.val, mode, i, count));
+
+                       goto abort_mission;
+               }
+
+               /*
+                * make sure we do not install enabled breakpoint
+                */
+               if (rnum & 0x1) {
+                       if (mode == PFM_CODE_RR)
+                               dbreg.ibr.ibr_x = 0;
+                       else
+                               dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
+               }
+
+               PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
+
+               /*
+                * Debug registers, just like PMC, can only be modified
+                * by a kernel call. Moreover, perfmon() access to those
+                * registers are centralized in this routine. The hardware
+                * does not modify the value of these registers, therefore,
+                * if we save them as they are written, we can avoid having
+                * to save them on context switch out. This is made possible
+                * by the fact that when perfmon uses debug registers, ptrace()
+                * won't be able to modify them concurrently.
+                */
+               if (mode == PFM_CODE_RR) {
+                       CTX_USED_IBR(ctx, rnum);
+
+                       if (can_access_pmu) {
+                               ia64_set_ibr(rnum, dbreg.val);
+                               ia64_dv_serialize_instruction();
+                       }
+
+                       ctx->ctx_ibrs[rnum] = dbreg.val;
+
+                       DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d 
apmu=%d\n",
+                               rnum, dbreg.val, ctx->ctx_used_ibrs[0], 
is_loaded, can_access_pmu));
+               } else {
+                       CTX_USED_DBR(ctx, rnum);
+
+                       if (can_access_pmu) {
+                               ia64_set_dbr(rnum, dbreg.val);
+                               ia64_dv_serialize_data();
+                       }
+                       ctx->ctx_dbrs[rnum] = dbreg.val;
+
+                       DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d 
apmu=%d\n",
+                               rnum, dbreg.val, ctx->ctx_used_dbrs[0], 
is_loaded, can_access_pmu));
+               }
+       }
+
+       return 0;
+
+abort_mission:
+       /*
+        * in case it was our first attempt, we undo the global modifications
+        */
+       if (first_time) {
+               LOCK_PFS(flags);
+               if (ctx->ctx_fl_system) {
+                       pfm_sessions.pfs_sys_use_dbregs--;
+               }
+               UNLOCK_PFS(flags);
+               ctx->ctx_fl_using_dbreg = 0;
+       }
+       /*
+        * install error return flag
+        */
+       PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
+
+       return ret;
+}
+
+static int
+pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+       return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
+}
+
+static int
+pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+       return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
+}
+
+int
+pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, 
struct pt_regs *regs)
+{
+       pfm_context_t *ctx;
+
+       if (req == NULL) return -EINVAL;
+
+       ctx = GET_PMU_CTX();
+
+       if (ctx == NULL) return -EINVAL;
+
+       /*
+        * for now limit to current task, which is enough when calling
+        * from overflow handler
+        */
+       if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+       return pfm_write_ibrs(ctx, req, nreq, regs);
+}
+/* EXPORT_SYMBOL(pfm_mod_write_ibrs); */
+
+int
+pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, 
struct pt_regs *regs)
+{
+       pfm_context_t *ctx;
+
+       if (req == NULL) return -EINVAL;
+
+       ctx = GET_PMU_CTX();
+
+       if (ctx == NULL) return -EINVAL;
+
+       /*
+        * for now limit to current task, which is enough when calling
+        * from overflow handler
+        */
+       if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+       return pfm_write_dbrs(ctx, req, nreq, regs);
+}
+/* EXPORT_SYMBOL(pfm_mod_write_dbrs); */
+
+
+static int
+pfm_get_features(pfarg_features_t *req)
+{
+       req->ft_version = PFM_VERSION;
+       return 0;
+}
+
+static int
+pfm_stop(struct pt_regs *regs)
+{
+       int is_system = 1;
+
+       int i;
+
+       for (i = 0; i < 4; i++) {
+         DPRINT(("RING%d=%d\n", i, ring_count[i]));
+         ring_count[i] = 0;
+       }
+
+       DPRINT(("interrupt_count: %d\n", interrupt_count));
+
+       /*
+        * in system mode, we need to update the PMU directly
+        * and the user level state of the caller, which may not
+        * necessarily be the creator of the context.
+        */
+       if (is_system) {
+               /*
+                * Update local PMU first
+                *
+                * disable dcr pp
+                */
+               ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & 
~IA64_DCR_PP);
+               ia64_srlz_i();
+
+               current->arch.dcr &= ~(0x1UL << 0);
+
+               /*
+                * update local cpuinfo
+                */
+               PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+
+               /*
+                * stop monitoring, does srlz.i
+                */
+               pfm_clear_psr_pp();
+
+               /*
+                * stop monitoring in the caller
+                */
+               ia64_psr(regs)->pp = 0;
+
+               return 0;
+       }
+
+
+       /*
+        * per-task mode
+        */
+
+       /* stop monitoring  at kernel level */
+       pfm_clear_psr_up();
+
+       /*
+        * stop monitoring at the user level
+        */
+       ia64_psr(regs)->up = 0;
+
+       return 0;
+}
+
+
+static int
+pfm_start(struct pt_regs *regs)
+{
+       int is_system = 1;
+
+       interrupt_count = 0;
+
+       /*
+        * in system mode, we need to update the PMU directly
+        * and the user level state of the caller, which may not
+        * necessarily be the creator of the context.
+        */
+       if (is_system) {
+               struct domain *d;
+               struct vcpu *v;
+               struct pt_regs *r;
+
+               /*
+                * set user level psr.pp for the caller
+                */
+               ia64_psr(regs)->pp = 1;
+
+               /*
+                * now update the local PMU and cpuinfo
+                */
+               PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
+
+               /*
+                * start monitoring at kernel level
+                */
+               pfm_set_psr_pp();
+
+               /* enable dcr pp */
+               current->arch.dcr |= 0x1UL << 0;
+
+               ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | 
IA64_DCR_PP);
+               ia64_srlz_i();
+
+               for_each_domain(d) {
+                 for_each_vcpu (d, v) {
+                   r = vcpu_regs(v);
+                   ia64_psr(r)->pp = 1;
+                 }
+               }
+
+               return 0;
+       }
+
+       /*
+        * per-process mode
+        */
+
+       /* start monitoring at kernel level */
+       pfm_set_psr_up();
+
+       /*
+        * activate monitoring at user level
+        */
+       ia64_psr(regs)->up = 1;
+
+       return 0;
+}
+
+static int
+pfm_get_pmc_reset(pfarg_reg_t *req, unsigned int count)
+{
+       unsigned int cnum;
+       int i;
+       int ret = -EINVAL;
+
+       for (i = 0; i < count; i++, req++) {
+
+               cnum = req->reg_num;
+
+               if (!PMC_IS_IMPL(cnum)) goto abort_mission;
+
+               req->reg_value = PMC_DFL_VAL(cnum);
+
+               PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+               DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
+       }
+       return 0;
+
+abort_mission:
+       PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+       return ret;
+}
+
+static int
+pfm_context_load(void *arg, int count, struct pt_regs *regs)
+{
+       unsigned long flags;
+       int the_cpu;
+       int ret = 0;
+       int is_system = 1;
+
+       /*
+        * can only load from unloaded or terminated state
+        */
+       if (ctx_state != PFM_CTX_UNLOADED) {
+               DPRINT(("cannot load context, invalid ctx_state=%d\n",
+                       ctx_state));
+               return -EBUSY;
+       }
+
+       the_cpu = smp_processor_id();
+
+       ret = pfm_reserve_session(current, is_system, the_cpu);
+       if (ret) goto error;
+
+       ctx_state = PFM_CTX_LOADED;
+
+       return 0;
+
+error:
+       /*
+        * we must undo the dbregs setting (for system-wide)
+        */
+       if (ret && !pfm_sessions.pfs_ptrace_use_dbregs) {
+               LOCK_PFS(flags);
+               pfm_sessions.pfs_sys_use_dbregs--;
+               UNLOCK_PFS(flags);
+       }
+       return ret;
+}
+
+/*
+ * in this function, we do not need to increase the use count
+ * for the task via get_task_struct(), because we hold the
+ * context lock. If the task were to disappear while having
+ * a context attached, it would go through pfm_exit_thread()
+ * which also grabs the context lock  and would therefore be blocked
+ * until we are here.
+ */
+
+static int
+pfm_context_unload(struct pt_regs *regs)
+{
+       int prev_state;
+       int ret;
+
+       DPRINT(("ctx_state=%d\n", ctx_state));
+
+       prev_state = ctx_state;
+
+       /*
+        * unload only when necessary
+        */
+       if (prev_state == PFM_CTX_UNLOADED) {
+               DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
+               return 0;
+       }
+
+       /*
+        * clear psr and dcr bits
+        */
+       ret = pfm_stop(regs);
+       if (ret) return ret;
+
+       ctx_state = PFM_CTX_UNLOADED;
+
+       if (prev_state != PFM_CTX_ZOMBIE) 
+               pfm_unreserve_session(1 , smp_processor_id());
+       /*
+        * save PMDs in context
+        * release ownership
+        */
+
+       return 0;
+}
+
+
+long do_perfmon_op(unsigned int cmd, XEN_GUEST_HANDLE(void) arg1, 
XEN_GUEST_HANDLE(void) arg2, unsigned int arg3)
+{
+    long rc = 0;
+    pfm_context_t *ctx;
+    struct pt_regs *regs = vcpu_regs(current);;
+
+    if ( copy_from_guest(&ctx, arg1, 1) )
+      return -EFAULT;
+
+    switch ( cmd ) {
+    case PFM_WRITE_PMCS:
+      {
+       pfarg_reg_t req;
+
+       if ( copy_from_guest(&req, arg2, 1) )
+         return -EFAULT;
+       rc = pfm_write_pmcs(&req, arg3, regs);
+       break;
+      }
+    case PFM_WRITE_PMDS:
+      {
+       pfarg_reg_t req;
+
+       if ( copy_from_guest(&req, arg2, 1) )
+         return -EFAULT;
+        rc = pfm_write_pmds(&req, arg3, regs);
+       break;
+      }
+    case PFM_READ_PMDS:
+      {
+       pfarg_reg_t req;
+
+       if ( copy_from_guest(&req, arg2, 1) )
+         return -EFAULT;
+       rc = pfm_read_pmds(ctx, &req, arg3, regs);
+       if ( copy_to_guest(arg2, &req, 1) )
+         return -EFAULT;
+       break;
+      }
+    case PFM_STOP:
+       rc = pfm_stop(regs);
+       break;
+    case PFM_START:
+       rc = pfm_start(regs);
+       break;
+    case PFM_ENABLE:           /* 0x06 obsolete */
+    case PFM_DISABLE:          /* 0x07 obsolete */
+       DPRINT(("invalid cmd=%d\n", cmd));
+       rc = -EINVAL;
+       break;
+    case PFM_DESTROY_CONTEXT:  /* 0x09 obsolete use close() */
+       DPRINT(("invalid cmd=%d\n", cmd));
+       rc = -EINVAL;
+       break;
+    case PFM_RESTART:          /* 0x0a */
+       rc = pfm_restart(ctx, regs);
+    case PFM_PROTECT_CONTEXT:  /* 0x0b obsolete */
+       DPRINT(("invalid cmd=%d\n", cmd));
+       rc = -EINVAL;
+       break;
+    case PFM_GET_FEATURES:     /* 0x0c */
+      {
+       pfarg_features_t req;
+
+       if ( copy_from_guest(&req, arg2, 1) )
+         return -EFAULT;
+       rc = pfm_get_features(&req);
+       if ( copy_to_guest(arg2, &req, 1) )
+         return -EINVAL;
+       break;
+      }
+    case PFM_DEBUG:            /* 0x0d */
+      {
+       unsigned int m;
+       if ( copy_from_guest(&m, arg2, 1) )
+         return -EFAULT;
+/*     rc = pfm_debug(m); */
+       break;
+      }
+    case PFM_UNPROTECT_CONTEXT:        /* 0x0e obsolete */
+       DPRINT(("invalid cmd=%d\n", cmd));
+       rc = -EINVAL;
+       break;
+    case PFM_GET_PMC_RESET_VAL:        /* 0x0f */
+      {
+       pfarg_reg_t req;
+
+       if ( copy_from_guest(&req, arg2, 1) )
+         return -EFAULT;
+       rc = pfm_get_pmc_reset(&req, arg3);
+       if ( copy_to_guest(arg2, &req, 1) )
+         return -EINVAL;
+       break;
+      }
+    case PFM_LOAD_CONTEXT:     /* 0x10 */
+      {
+       pfarg_load_t req;
+       if ( copy_from_guest(&req, arg2, 1) )
+         return -EFAULT;
+       rc = pfm_context_load(&req, arg3, regs);
+       break;
+      }
+    case PFM_UNLOAD_CONTEXT:   /* 0x11 */
+       rc = pfm_context_unload(regs);
+       break;
+    case PFM_FREE_CONTEXT:     /* 0x12 */
+       pfm_context_free(ctx);
+       rc = 0;
+       break;
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
+}
+
+/*
+ * main overflow processing routine.
+ * it can be called from the interrupt path or explicitely during the context 
switch code
+ */
+static void
+pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
+{
+       pfm_ovfl_arg_t *ovfl_arg;
+       unsigned long mask;
+       unsigned long old_val, ovfl_val, new_val;
+       unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, reset_pmds;
+       unsigned long tstamp;
+       unsigned int i, has_smpl;
+
+       /*
+        * sanity test. Should never happen
+        */
+       if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
+
+       tstamp   = ia64_get_itc();
+       mask     = pmc0 >> PMU_FIRST_COUNTER;
+       ovfl_val = pmu_conf->ovfl_val;
+       has_smpl = 1;
+
+       DPRINT_ovfl(("pmc0=0x%lx iip=0x%lx ri=0x%lx\n",
+                    pmc0,
+                    (regs ? regs->cr_iip : 0),
+                    (regs ? ia64_psr(regs)->ri: 0L)));
+
+       /*
+        * first we update the virtual counters
+        * assume there was a prior ia64_srlz_d() issued
+        */
+       for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
+
+               /* skip pmd which did not overflow */
+               if ((mask & 0x1) == 0) continue;
+
+               /*
+                * Note that the pmd is not necessarily 0 at this point as 
qualified events
+                * may have happened before the PMU was frozen. The residual 
count is not
+                * taken into consideration here but will be with any read of 
the pmd via
+                * pfm_read_pmds().
+                */
+               old_val              = new_val = ctx_pmds[i].val;
+               new_val             += 1 + ovfl_val;
+               ctx_pmds[i].val      = new_val;
+
+               /*
+                * check for overflow condition
+                */
+               if (likely(old_val > new_val)) {
+                       ovfl_pmds |= 1UL << i;
+                       if (PMC_OVFL_NOTIFY(i)) ovfl_notify |= 1UL << i;
+               }
+       }
+
+       /*
+        * there was no 64-bit overflow, nothing else to do
+        */
+       if (ovfl_pmds == 0UL) return;
+
+       /* 
+        * reset all control bits
+        */
+       reset_pmds    = 0UL;
+
+       /*
+        * if a sampling format module exists, then we "cache" the overflow by 
+        * calling the module's handler() routine.
+        */
+       if (has_smpl) {
+               unsigned long pmd_mask;
+               int ret = 0;
+
+               pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
+               ovfl_arg = &pfm_get_cpu_var(ovfl_arg);
+
+               for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask 
>>=1) {
+                       u64 psr;
+
+                       mask = 1UL << i;
+
+                       if ((pmd_mask & 0x1) == 0) continue;
+
+                       ovfl_arg->ovfl_pmd      = (unsigned char )i;
+                       ovfl_arg->active_set    = 0;
+                       ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all 
fields */
+                       ovfl_arg->pmd_eventid    = ctx_pmds[i].eventid;
+
+                       psr = pfm_get_psr();
+
+                       DPRINT_ovfl((">> ctx_pmd[%d] pmd=0x%lx ovfl_val=0x%lx 
ovfl_pmds=0x%lx regs=0x%p cregs=0x%p psr_pp=%d psr.pp=%d\n",
+                                    i,
+                                    ia64_get_pmd(i) & ovfl_val,
+                                    ovfl_val,
+                                    ovfl_pmds,
+                                    regs,
+                                    vcpu_regs(current),
+                                    ia64_psr(regs)->pp,
+                                    psr & IA64_PSR_PP ? 1 : 0));
+
+                       ring_count[(regs->cr_ipsr & 0x300000000) >> 32]++;
+
+                       /*
+                        * call custom buffer format record (handler) routine
+                        */
+                       ret = (*ctx_buf_fmt->fmt_handler)(task, NULL, ovfl_arg, 
regs, tstamp);
+
+                       /*
+                        * build the bitmask of pmds to reset now
+                        */
+                       if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) 
reset_pmds |= mask;
+               }
+               /*
+                * when the module cannot handle the rest of the overflows, we 
abort right here
+                */
+               if (ret && pmd_mask) {
+                       DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
+                               pmd_mask<<PMU_FIRST_COUNTER));
+               }
+               /*
+                * remove the pmds we reset now from the set of pmds to reset 
in pfm_restart()
+                */
+               ovfl_pmds &= ~reset_pmds;
+       }
+
+/*     DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, 
reset_pmds)); */
+
+       /*
+        * reset the requested PMD registers using the short reset values
+        */
+       if (reset_pmds) {
+               unsigned long bm = reset_pmds;
+               pfm_reset_regs(&bm, PFM_PMD_SHORT_RESET);
+       }
+
+/*     DPRINT_ovfl(("owner [%d] ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", */
+/*                  GET_PMU_OWNER() ? GET_PMU_OWNER()->vcpu_id : -1, */
+/*                  ovfl_pmds, */
+/*                  ovfl_notify)); */
+       return;
+
+sanity_check:
+       printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
+                       smp_processor_id(),
+                       task ? task->vcpu_id : -1,
+                       pmc0);
+       return;
+
+/* stop_monitoring: */
+       /*
+        * in SMP, zombie context is never restored but reclaimed in 
pfm_load_regs().
+        * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore 
we can
+        * come here as zombie only if the task is the current task. In which 
case, we
+        * can access the PMU  hardware directly.
+        *
+        * Note that zombies do have PM_VALID set. So here we do the minimal.
+        *
+        * In case the context was zombified it could not be reclaimed at the 
time
+        * the monitoring program exited. At this point, the PMU reservation 
has been
+        * returned, the sampiing buffer has been freed. We must convert this 
call
+        * into a spurious interrupt. However, we must also avoid infinite 
overflows
+        * by stopping monitoring for this task. We can only come here for a 
per-task
+        * context. All we need to do is to stop monitoring using the psr bits 
which
+        * are always task private. By re-enabling secure montioring, we ensure 
that
+        * the monitored task will not be able to re-activate monitoring.
+        * The task will eventually be context switched out, at which point the 
context
+        * will be reclaimed (that includes releasing ownership of the PMU).
+        *
+        * So there might be a window of time where the number of per-task 
session is zero
+        * yet one PMU might have a owner and get at most one overflow 
interrupt for a zombie
+        * context. This is safe because if a per-task session comes in, it 
will push this one
+        * out and by the virtue on pfm_save_regs(), this one will disappear. 
If a system wide
+        * session is force on that CPU, given that we use task pinning, 
pfm_save_regs() will
+        * also push our zombie context out.
+        *
+        * Overall pretty hairy stuff....
+        */
+       DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? 
task->vcpu_id: -1));
+       pfm_clear_psr_up();
+       ia64_psr(regs)->up = 0;
+       ia64_psr(regs)->sp = 1;
+       return;
+}
+
+static int
+pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+{
+       struct task_struct *task;
+       unsigned long flags;
+       u64 pmc0;
+       int retval = 0;
+
+       interrupt_count++;
+
+       /*
+        * srlz.d done before arriving here
+        */
+       pmc0 = ia64_get_pmc(0);
+
+       task = GET_PMU_OWNER();
+
+       /*
+        * if we have some pending bits set
+        * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
+        */
+/*     if (PMC0_HAS_OVFL(pmc0)  && task) { */
+       if (PMC0_HAS_OVFL(pmc0)) {
+               /*
+                * we assume that pmc0.fr is always set here
+                */
+
+               PROTECT_CTX_NOPRINT(flags);
+
+               pfm_overflow_handler(task, pmc0, regs);
+
+               UNPROTECT_CTX_NOPRINT(flags);
+
+       } else {
+               retval = -1;
+       }
+       /*
+        * keep it unfrozen at all times
+        */
+       pfm_unfreeze_pmu();
+
+       return retval;
+
+}
+
+static irqreturn_t
+pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+{
+       int this_cpu;
+       int ret;
+
+       this_cpu = get_cpu();
+
+       ret = pfm_do_interrupt_handler(irq, arg, regs);
+
+       put_cpu_no_resched();
+
+       return;
+}
+
+/*
+ * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
+ * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
+ * is active or inactive based on mode. We must rely on the value in
+ * local_cpu_data->pfm_syst_info
+ */
+void
+pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int 
is_ctxswin)
+{
+       struct pt_regs *regs;
+       unsigned long dcr;
+       unsigned long dcr_pp;
+
+       dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
+
+       /*
+        * pid 0 is guaranteed to be the idle task. There is one such task with 
pid 0
+        * on every CPU, so we can rely on the pid to identify the idle task.
+        */
+       if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->vcpu_id) {
+               regs = vcpu_regs(task);
+               ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
+               return;
+       }
+       /*
+        * if monitoring has started
+        */
+       if (dcr_pp) {
+               dcr = ia64_getreg(_IA64_REG_CR_DCR);
+               /*
+                * context switching in?
+                */
+               if (is_ctxswin) {
+                       /* mask monitoring for the idle task */
+                       ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
+                       pfm_clear_psr_pp();
+                       ia64_srlz_i();
+                       return;
+               }
+               /*
+                * context switching out
+                * restore monitoring for next task
+                *
+                * Due to inlining this odd if-then-else construction generates
+                * better code.
+                */
+               ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
+               pfm_set_psr_pp();
+               ia64_srlz_i();
+       }
+}
+
+void
+pfm_save_regs(struct task_struct *task)
+{
+       struct thread_struct *t;
+       unsigned long flags;
+       u64 psr;
+
+       t = &task->arch._thread;
+
+       /*
+        * we always come here with interrupts ALREADY disabled by
+        * the scheduler. So we simply need to protect against concurrent
+        * access, not CPU concurrency.
+        */
+       flags = pfm_protect_ctx_ctxsw();
+
+       /*
+        * save current PSR: needed because we modify it
+        */
+       ia64_srlz_d();
+       psr = pfm_get_psr();
+
+/*     BUG_ON(psr & (IA64_PSR_I)); */
+
+       /*
+        * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
+        * we will need it on the restore path to check
+        * for pending overflow.
+        */
+       t->pmcs[0] = ia64_get_pmc(0);
+
+       /*
+        * unfreeze PMU if had pending overflows
+        */
+       if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+
+       /*
+        * finally, allow context access.
+        * interrupts will still be masked after this call.
+        */
+       pfm_unprotect_ctx_ctxsw(flags);
+}
+
+
+void
+pfm_load_regs (struct task_struct *task)
+{
+       struct thread_struct *t;
+       unsigned long flags;
+       u64 psr;
+       int need_irq_resend;
+
+       t = &task->arch._thread;
+       flags = pfm_protect_ctx_ctxsw();
+       psr   = pfm_get_psr();
+
+       need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
+
+/*     BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); */
+/*     BUG_ON(psr & IA64_PSR_I); */
+
+       /*
+        * check for pending overflow at the time the state
+        * was saved.
+        */
+       if (unlikely(PMC0_HAS_OVFL(t->pmcs[0])) || 1) {
+               /*
+                * reload pmc0 with the overflow information
+                * On McKinley PMU, this will trigger a PMU interrupt
+                */
+               ia64_set_pmc(0, t->pmcs[0]);
+               ia64_srlz_d();
+               t->pmcs[0] = 0UL;
+
+               /*
+                * will replay the PMU interrupt
+                */
+               if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+       }
+
+       /*
+        * allow concurrent access to context
+        */
+       pfm_unprotect_ctx_ctxsw(flags);
+}
+
+
+static struct irqaction perfmon_irqaction = {
+       .handler = (void*)pfm_interrupt_handler,
+       .name    = "perfmon"
+};
+
+/*
+ * perfmon initialization routine, called from the initcall() table
+ */
+static int __init
+pfm_probe_pmu(void)
+{
+       pmu_config_t **p;
+       int family;
+
+       family = local_cpu_data->family;
+       p      = pmu_confs;
+
+       while(*p) {
+               if ((*p)->probe) {
+                       if ((*p)->probe() == 0) goto found;
+               } else if ((*p)->pmu_family == family || (*p)->pmu_family == 
0xff) {
+                       goto found;
+               }
+               p++;
+       }
+       return -1;
+found:
+       pmu_conf = *p;
+       return 0;
+}
+
+
+int __init
+pfm_init(void)
+{
+       unsigned int n, n_counters, i;
+
+       printk("perfmon: version %u.%u IRQ 0x%x\n",
+               PFM_VERSION_MAJ,
+               PFM_VERSION_MIN,
+               IA64_PERFMON_VECTOR);
+
+       if (pfm_probe_pmu()) {
+               printk(KERN_INFO "perfmon: disabled, there is no support for 
processor family %d\n", 
+                               local_cpu_data->family);
+               return -ENODEV;
+       }
+
+       /*
+        * compute the number of implemented PMD/PMC from the
+        * description tables
+        */
+       n = 0;
+       for (i=0; PMC_IS_LAST(i) == 0;  i++) {
+               if (PMC_IS_IMPL(i) == 0) continue;
+               pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
+               n++;
+       }
+       pmu_conf->num_pmcs = n;
+
+       n = 0; n_counters = 0;
+       for (i=0; PMD_IS_LAST(i) == 0;  i++) {
+               if (PMD_IS_IMPL(i) == 0) continue;
+               pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
+               n++;
+               if (PMD_IS_COUNTING(i)) n_counters++;
+       }
+       pmu_conf->num_pmds      = n;
+       pmu_conf->num_counters  = n_counters;
+
+       /*
+        * sanity checks on the number of debug registers
+        */
+       if (pmu_conf->use_rr_dbregs) {
+               if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
+                       printk(KERN_INFO "perfmon: unsupported number of code 
debug registers (%u)\n", pmu_conf->num_ibrs);
+                       pmu_conf = NULL;
+                       return -1;
+               }
+               if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
+                       printk(KERN_INFO "perfmon: unsupported number of data 
debug registers (%u)\n", pmu_conf->num_ibrs);
+                       pmu_conf = NULL;
+                       return -1;
+               }
+       }
+
+       printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu 
bits)\n",
+              pmu_conf->pmu_name,
+              pmu_conf->num_pmcs,
+              pmu_conf->num_pmds,
+              pmu_conf->num_counters,
+              ffz(pmu_conf->ovfl_val));
+
+       /* sanity check */
+       if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= 
IA64_NUM_PMC_REGS) {
+               printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon 
disabled\n");
+               pmu_conf = NULL;
+               return -1;
+       }
+
+       /*
+        * initialize all our spinlocks
+        */
+       spin_lock_init(&pfm_sessions.pfs_lock);
+       spin_lock_init(&pfm_buffer_fmt_lock);
+
+       spin_lock_init(&ctx_lock);
+
+       ctx_state = PFM_CTX_UNLOADED;
+
+       return 0;
+}
+__initcall(pfm_init);
+
+/*
+ * this function is called before pfm_init()
+ */
+void
+pfm_init_percpu (void)
+{
+       /*
+        * make sure no measurement is active
+        * (may inherit programmed PMCs from EFI).
+        */
+       pfm_clear_psr_pp();
+       pfm_clear_psr_up();
+
+       /*
+        * we run with the PMU not frozen at all times
+        */
+       pfm_unfreeze_pmu();
+
+       if (smp_processor_id() == 0)
+               register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+
+       ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
+       ia64_srlz_d();
+}
+
+/*
+ * used for debug purposes only
+ */
+void
+dump_pmu_state(const char *from)
+{
+       struct task_struct *task;
+       struct thread_struct *t;
+       struct pt_regs *regs;
+       pfm_context_t *ctx;
+       unsigned long psr, dcr, info, flags;
+       int i, this_cpu;
+
+       local_irq_save(flags);
+
+       this_cpu = smp_processor_id();
+       regs     = vcpu_regs(current);
+       info     = PFM_CPUINFO_GET();
+       dcr      = ia64_getreg(_IA64_REG_CR_DCR);
+
+       if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
+               local_irq_restore(flags);
+               return;
+       }
+
+       printk("CPU%d from %s() current [%d] iip=0x%lx\n", 
+               this_cpu, 
+               from, 
+               current->vcpu_id, 
+               regs->cr_iip);
+
+       task = GET_PMU_OWNER();
+       ctx  = GET_PMU_CTX();
+
+       printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->vcpu_id : 
-1, ctx);
+
+       psr = pfm_get_psr();
+
+       printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d 
syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 
+               this_cpu,
+               ia64_get_pmc(0),
+               psr & IA64_PSR_PP ? 1 : 0,
+               psr & IA64_PSR_UP ? 1 : 0,
+               dcr & IA64_DCR_PP ? 1 : 0,
+               info,
+               ia64_psr(regs)->up,
+               ia64_psr(regs)->pp);
+
+       ia64_psr(regs)->up = 0;
+       ia64_psr(regs)->pp = 0;
+
+       t = &current->arch._thread;
+
+       for (i=1; PMC_IS_LAST(i) == 0; i++) {
+               if (PMC_IS_IMPL(i) == 0) continue;
+               printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", 
this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
+       }
+
+       for (i=1; PMD_IS_LAST(i) == 0; i++) {
+               if (PMD_IS_IMPL(i) == 0) continue;
+               printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", 
this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
+       }
+
+       if (ctx) {
+               printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d 
ctx_task=[%d] saved_psr_up=0x%lx\n",
+                               this_cpu,
+                               ctx->ctx_state,
+                               ctx->ctx_smpl_vaddr,
+                               ctx->ctx_smpl_hdr,
+                               ctx->ctx_msgq_head,
+                               ctx->ctx_msgq_tail,
+                               ctx->ctx_saved_psr_up);
+       }
+       local_irq_restore(flags);
+}
+
+/*
+ * called from process.c:copy_thread(). task is new child.
+ */
+void
+pfm_inherit(struct task_struct *task, struct pt_regs *regs)
+{
+       struct thread_struct *thread;
+
+       DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", 
task->vcpu_id));
+
+       thread = &task->arch._thread;
+
+       /*
+        * cut links inherited from parent (current)
+        */
+       thread->pfm_context = NULL;
+
+       PFM_SET_WORK_PENDING(task, 0);
+
+       /*
+        * the psr bits are already set properly in copy_threads()
+        */
+}
+#endif /* CONFIG_PERFMON */
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_default_smpl.c 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_default_smpl.c
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_default_smpl.c 
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_default_smpl.c       
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ *
+ * This file implements the default sampling buffer format
+ * for the Linux/ia64 perfmon-2 subsystem.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/delay.h>
+#include <linux/smp.h>
+
+#include <asm/perfmon.h>
+#include <asm/perfmon_default_smpl.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@xxxxxxxxxx>");
+MODULE_DESCRIPTION("perfmon default sampling format");
+MODULE_LICENSE("GPL");
+
+#define DEFAULT_DEBUG 1
+
+#ifdef DEFAULT_DEBUG
+#define DPRINT(a) \
+       do { \
+               if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", 
__FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+       } while (0)
+
+#define DPRINT_ovfl(a) \
+       do { \
+               if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) 
{ printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk 
a; } \
+       } while (0)
+
+#else
+#define DPRINT(a)
+#define DPRINT_ovfl(a)
+#endif
+
+static int
+default_validate(struct task_struct *task, unsigned int flags, int cpu, void 
*data)
+{
+       pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
+       int ret = 0;
+
+       if (data == NULL) {
+               DPRINT(("[%d] no argument passed\n", task->pid));
+               return -EINVAL;
+       }
+
+       DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+
+       /*
+        * must hold at least the buffer header + one minimally sized entry
+        */
+       if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
+
+       DPRINT(("buf_size=%lu\n", arg->buf_size));
+
+       return ret;
+}
+
+static int
+default_get_size(struct task_struct *task, unsigned int flags, int cpu, void 
*data, unsigned long *size)
+{
+       pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+       /*
+        * size has been validated in default_validate
+        */
+       *size = arg->buf_size;
+
+       return 0;
+}
+
+static int
+default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, 
void *data)
+{
+       pfm_default_smpl_hdr_t *hdr;
+       pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+       hdr = (pfm_default_smpl_hdr_t *)buf;
+
+       hdr->hdr_version      = PFM_DEFAULT_SMPL_VERSION;
+       hdr->hdr_buf_size     = arg->buf_size;
+       hdr->hdr_cur_offs     = sizeof(*hdr);
+       hdr->hdr_overflows    = 0UL;
+       hdr->hdr_count        = 0UL;
+
+       DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u 
cur_offs=%lu\n",
+               task->pid,
+               buf,
+               hdr->hdr_buf_size,
+               sizeof(*hdr),
+               hdr->hdr_version,
+               hdr->hdr_cur_offs));
+
+       return 0;
+}
+
+static int
+default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, 
struct pt_regs *regs, unsigned long stamp)
+{
+       pfm_default_smpl_hdr_t *hdr;
+       pfm_default_smpl_entry_t *ent;
+       void *cur, *last;
+       unsigned long *e, entry_size;
+       unsigned int npmds, i;
+       unsigned char ovfl_pmd;
+       unsigned char ovfl_notify;
+
+       if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == 
NULL)) {
+               DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, 
buf, arg));
+               return -EINVAL;
+       }
+
+       hdr         = (pfm_default_smpl_hdr_t *)buf;
+       cur         = buf+hdr->hdr_cur_offs;
+       last        = buf+hdr->hdr_buf_size;
+       ovfl_pmd    = arg->ovfl_pmd;
+       ovfl_notify = arg->ovfl_notify;
+
+       /*
+        * precheck for sanity
+        */
+       if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+       npmds = hweight64(arg->smpl_pmds[0]);
+
+       ent = (pfm_default_smpl_entry_t *)cur;
+
+       prefetch(arg->smpl_pmds_values);
+
+       entry_size = sizeof(*ent) + (npmds << 3);
+
+       /* position for first pmd */
+       e = (unsigned long *)(ent+1);
+
+       hdr->hdr_count++;
+
+       DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d 
ovfl_notify=%d npmds=%u\n",
+                       task->pid,
+                       hdr->hdr_count,
+                       cur, last,
+                       last-cur,
+                       ovfl_pmd,
+                       ovfl_notify, npmds));
+
+       /*
+        * current = task running at the time of the overflow.
+        *
+        * per-task mode:
+        *      - this is ususally the task being monitored.
+        *        Under certain conditions, it might be a different task
+        *
+        * system-wide:
+        *      - this is not necessarily the task controlling the session
+        */
+       ent->pid            = current->pid;
+       ent->ovfl_pmd       = ovfl_pmd;
+       ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
+
+       /*
+        * where did the fault happen (includes slot number)
+        */
+       ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
+
+       ent->tstamp    = stamp;
+       ent->cpu       = smp_processor_id();
+       ent->set       = arg->active_set;
+       ent->tgid      = current->tgid;
+
+       /*
+        * selectively store PMDs in increasing index number
+        */
+       if (npmds) {
+               unsigned long *val = arg->smpl_pmds_values;
+               for(i=0; i < npmds; i++) {
+                       *e++ = *val++;
+               }
+       }
+
+       /*
+        * update position for next entry
+        */
+       hdr->hdr_cur_offs += entry_size;
+       cur               += entry_size;
+
+       /*
+        * post check to avoid losing the last sample
+        */
+       if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+       /*
+        * keep same ovfl_pmds, ovfl_notify
+        */
+       arg->ovfl_ctrl.bits.notify_user     = 0;
+       arg->ovfl_ctrl.bits.block_task      = 0;
+       arg->ovfl_ctrl.bits.mask_monitoring = 0;
+       arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from 
interrupt handler */
+
+       return 0;
+full:
+       DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, 
ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
+
+       /*
+        * increment number of buffer overflow.
+        * important to detect duplicate set of samples.
+        */
+       hdr->hdr_overflows++;
+
+       /*
+        * if no notification requested, then we saturate the buffer
+        */
+       if (ovfl_notify == 0) {
+               arg->ovfl_ctrl.bits.notify_user     = 0;
+               arg->ovfl_ctrl.bits.block_task      = 0;
+               arg->ovfl_ctrl.bits.mask_monitoring = 1;
+               arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
+       } else {
+               arg->ovfl_ctrl.bits.notify_user     = 1;
+               arg->ovfl_ctrl.bits.block_task      = 1; /* ignored for 
non-blocking context */
+               arg->ovfl_ctrl.bits.mask_monitoring = 1;
+               arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
+       }
+       return -1; /* we are full, sorry */
+}
+
+static int
+default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, 
struct pt_regs *regs)
+{
+       pfm_default_smpl_hdr_t *hdr;
+
+       hdr = (pfm_default_smpl_hdr_t *)buf;
+
+       hdr->hdr_count    = 0UL;
+       hdr->hdr_cur_offs = sizeof(*hdr);
+
+       ctrl->bits.mask_monitoring = 0;
+       ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
+
+       return 0;
+}
+
+static int
+default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+       DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+       return 0;
+}
+
+static pfm_buffer_fmt_t default_fmt={
+       .fmt_name           = "default_format",
+       .fmt_uuid           = PFM_DEFAULT_SMPL_UUID,
+       .fmt_arg_size       = sizeof(pfm_default_smpl_arg_t),
+       .fmt_validate       = default_validate,
+       .fmt_getsize        = default_get_size,
+       .fmt_init           = default_init,
+       .fmt_handler        = default_handler,
+       .fmt_restart        = default_restart,
+       .fmt_restart_active = default_restart,
+       .fmt_exit           = default_exit,
+};
+
+static int __init
+pfm_default_smpl_init_module(void)
+{
+       int ret;
+
+       ret = pfm_register_buffer_fmt(&default_fmt);
+       if (ret == 0) {
+               printk("perfmon_default_smpl: %s v%u.%u registered\n",
+                       default_fmt.fmt_name,
+                       PFM_DEFAULT_SMPL_VERSION_MAJ,
+                       PFM_DEFAULT_SMPL_VERSION_MIN);
+       } else {
+               printk("perfmon_default_smpl: %s cannot register ret=%d\n",
+                       default_fmt.fmt_name,
+                       ret);
+       }
+
+       return ret;
+}
+
+static void __exit
+pfm_default_smpl_cleanup_module(void)
+{
+       int ret;
+       ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
+
+       printk("perfmon_default_smpl: unregister %s=%d\n", 
default_fmt.fmt_name, ret);
+}
+
+module_init(pfm_default_smpl_init_module);
+module_exit(pfm_default_smpl_cleanup_module);
+
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_generic.h 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_generic.h
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_generic.h      
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_generic.h    
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,45 @@
+/*
+ * This file contains the generic PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ */
+
+static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd3  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_gen={
+       .pmu_name   = "Generic",
+       .pmu_family = 0xff, /* any */
+       .ovfl_val   = (1UL << 32) - 1,
+       .num_ibrs   = 0, /* does not use */
+       .num_dbrs   = 0, /* does not use */
+       .pmd_desc   = pfm_gen_pmd_desc,
+       .pmc_desc   = pfm_gen_pmc_desc
+};
+
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_itanium.h 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_itanium.h
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_itanium.h      
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_itanium.h    
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,115 @@
+/*
+ * This file contains the Itanium PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ */
+static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, 
unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_ita_pmc_desc[]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, 
{0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, 
{0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, 
{RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, 
pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 
0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL,
 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x0003ffff00000001UL, -1UL, NULL, 
pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 
0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 
0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, 
{RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, 
{RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, 
{RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, 
{RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, 
{RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+static int
+pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int 
cnum, unsigned long *val, struct pt_regs *regs)
+{
+       int ret;
+       int is_loaded;
+
+       /* sanitfy check */
+       if (ctx == NULL) return -EINVAL;
+
+       is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == 
PFM_CTX_MASKED;
+
+       /*
+        * we must clear the (instruction) debug registers if pmc13.ta bit is 
cleared
+        * before they are written (fl_using_dbreg==0) to avoid picking up 
stale information.
+        */
+       if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && 
ctx->ctx_fl_using_dbreg == 0) {
+
+               DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing 
ibr\n", cnum, *val));
+
+               /* don't mix debug with perfmon */
+/*             if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) 
return -EINVAL; */
+
+               /*
+                * a count of 0 will mark the debug registers as in use and also
+                * ensure that they are properly cleared.
+                */
+               ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
+               if (ret) return ret;
+       }
+
+       /*
+        * we must clear the (data) debug registers if pmc11.pt bit is cleared
+        * before they are written (fl_using_dbreg==0) to avoid picking up 
stale information.
+        */
+       if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && 
ctx->ctx_fl_using_dbreg == 0) {
+
+               DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing 
dbr\n", cnum, *val));
+
+               /* don't mix debug with perfmon */
+/*             if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) 
return -EINVAL; */
+
+               /*
+                * a count of 0 will mark the debug registers as in use and also
+                * ensure that they are properly cleared.
+                */
+               ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
+               if (ret) return ret;
+       }
+       return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_ita={
+       .pmu_name      = "Itanium",
+       .pmu_family    = 0x7,
+       .ovfl_val      = (1UL << 32) - 1,
+       .pmd_desc      = pfm_ita_pmd_desc,
+       .pmc_desc      = pfm_ita_pmc_desc,
+       .num_ibrs      = 8,
+       .num_dbrs      = 8,
+       .use_rr_dbregs = 1, /* debug register are use for range retrictions */
+};
+
+
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_mckinley.h 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_mckinley.h
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_mckinley.h     
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_mckinley.h   
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,180 @@
+/*
+ * This file contains the McKinley PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ */
+static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, 
unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, 
pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  
pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  
pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  
pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, 
NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, 
NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, 
{RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_pmc_check, 
{RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_pmc_check, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL,
 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, 
NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_CONFIG  , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, 
pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_CONFIG  , 0, 0x00000000fffffff0UL, 0xfUL, NULL, 
pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 
0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 
0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 
0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 
0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, 
{RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+       unsigned long tmp1, tmp2, ival = *val;
+
+       /* remove reserved areas from user value */
+       tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+       /* get reserved fields values */
+       tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+       *val = tmp1 | tmp2;
+
+       DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+                 cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+       return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int 
cnum, unsigned long *val, struct pt_regs *regs)
+{
+       int ret = 0, check_case1 = 0;
+       unsigned long val8 = 0, val14 = 0, val13 = 0;
+       int is_loaded;
+       
+       printk("pfm_mck_pmc_check\n");
+
+       /* first preserve the reserved fields */
+       pfm_mck_reserved(cnum, val, regs);
+
+       is_loaded = 1;
+
+       /*
+        * we must clear the debug registers if pmc13 has a value which enable
+        * memory pipeline event constraints. In this case we need to clear the
+        * the debug registers if they have not yet been accessed. This is 
required
+        * to avoid picking stale state.
+        * PMC13 is "active" if:
+        *      one of the pmc13.cfg_dbrpXX field is different from 0x3
+        * AND
+        *      at the corresponding pmc13.ena_dbrpXX is set.
+        */
+       DPRINT(("cnum=%u val=0x%lx, loaded=%d\n", cnum, *val, is_loaded));
+
+       if (cnum == 13 && is_loaded
+           && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 
0x18181818UL) {
+
+               DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing 
dbr\n", cnum, *val));
+
+               /*
+                * a count of 0 will mark the debug registers as in use and also
+                * ensure that they are properly cleared.
+                */
+               ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+               if (ret) return ret;
+       }
+       /*
+        * we must clear the (instruction) debug registers if any pmc14.ibrpX 
bit is enabled
+        * before they are (fl_using_dbreg==0) to avoid picking up stale 
information.
+        */
+       if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL)) {
+
+               DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing 
ibr\n", cnum, *val));
+
+               /*
+                * a count of 0 will mark the debug registers as in use and also
+                * ensure that they are properly cleared.
+                */
+               ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+               if (ret) return ret;
+
+       }
+
+       switch(cnum) {
+               case  4: *val |= 1UL << 23; /* force power enable bit */
+                        break;
+               case  8: val8 = *val;
+                        val13 = ctx_pmcs[13];
+                        val14 = ctx_pmcs[14];
+                        check_case1 = 1;
+                        break;
+               case 13: val8  = ctx_pmcs[8];
+                        val13 = *val;
+                        val14 = ctx_pmcs[14];
+                        check_case1 = 1;
+                        break;
+               case 14: val8  = ctx_pmcs[8];
+                        val13 = ctx_pmcs[13];
+                        val14 = *val;
+                        check_case1 = 1;
+                        break;
+       }
+       /* check illegal configuration which can produce inconsistencies in 
tagging
+        * i-side events in L1D and L2 caches
+        */
+       if (check_case1) {
+               ret =   ((val13 >> 45) & 0xf) == 0
+                  && ((val8 & 0x1) == 0)
+                  && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
+                      ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 
0x0));
+
+               if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
+       }
+
+       return ret ? -EINVAL : 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mck={
+       .pmu_name      = "Itanium 2",
+       .pmu_family    = 0x1f,
+       .flags         = PFM_PMU_IRQ_RESEND,
+       .ovfl_val      = (1UL << 47) - 1,
+       .pmd_desc      = pfm_mck_pmd_desc,
+       .pmc_desc      = pfm_mck_pmc_desc,
+       .num_ibrs       = 8,
+       .num_dbrs       = 8,
+       .use_rr_dbregs = 1 /* debug register are use for range retrictions */
+};
+
+
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_montecito.h 
xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_montecito.h
--- xen-ia64-unstable.hg/xen/arch/ia64/linux-xen/perfmon_montecito.h    
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/linux-xen/perfmon_montecito.h  
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,269 @@
+/*
+ * This file contains the Montecito PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ *               Contributed by Stephane Eranian <eranian@xxxxxxxxxx>
+ */
+static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, 
unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+#define RDEP_MONT_ETB  
(RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
+                        
RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
+#define RDEP_MONT_DEAR  (RDEP(32)|RDEP(33)|RDEP(36))
+#define RDEP_MONT_IEAR  (RDEP(34)|RDEP(35))
+
+static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 
0}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 
0}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 
0}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 
0}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
+/* pmc8  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
+/* pmc9  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
+/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
+/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
+/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
+/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
+/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
+/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, 
pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
+/* pmc16 */ { PFM_REG_NOTIMPL, },
+/* pmc17 */ { PFM_REG_NOTIMPL, },
+/* pmc18 */ { PFM_REG_NOTIMPL, },
+/* pmc19 */ { PFM_REG_NOTIMPL, },
+/* pmc20 */ { PFM_REG_NOTIMPL, },
+/* pmc21 */ { PFM_REG_NOTIMPL, },
+/* pmc22 */ { PFM_REG_NOTIMPL, },
+/* pmc23 */ { PFM_REG_NOTIMPL, },
+/* pmc24 */ { PFM_REG_NOTIMPL, },
+/* pmc25 */ { PFM_REG_NOTIMPL, },
+/* pmc26 */ { PFM_REG_NOTIMPL, },
+/* pmc27 */ { PFM_REG_NOTIMPL, },
+/* pmc28 */ { PFM_REG_NOTIMPL, },
+/* pmc29 */ { PFM_REG_NOTIMPL, },
+/* pmc30 */ { PFM_REG_NOTIMPL, },
+/* pmc31 */ { PFM_REG_NOTIMPL, },
+/* pmc32 */ { PFM_REG_CONFIG,  0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, 
pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc33 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffff, NULL, 
pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc34 */ { PFM_REG_CONFIG,  0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, 
pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc35 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffff, NULL, 
pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc36 */ { PFM_REG_CONFIG,  0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, 
{0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, 
{RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
+/* pmc38 */ { PFM_REG_CONFIG,  0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, 
{0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, 
{RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
+/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, 
pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
+/* pmc41 */ { PFM_REG_CONFIG,  0, 0x00002078fefefefe, 0x1e00018181818, NULL, 
pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, 
{RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
+           { PFM_REG_END    , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL, }, 
+/* pmd1  */ { PFM_REG_NOTIMPL, },
+/* pmd2  */ { PFM_REG_NOTIMPL, },
+/* pmd3  */ { PFM_REG_NOTIMPL, },
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(4),0, 0, 0}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(5),0, 0, 0}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(6),0, 0, 0}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(7),0, 0, 0}},
+/* pmd8  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(8),0, 0, 0}}, 
+/* pmd9  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(9),0, 0, 0}},
+/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(10),0, 0, 0}},
+/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(11),0, 0, 0}},
+/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(12),0, 0, 0}},
+/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(13),0, 0, 0}},
+/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(14),0, 0, 0}},
+/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, 
{RDEP(15),0, 0, 0}},
+/* pmd16 */ { PFM_REG_NOTIMPL, },
+/* pmd17 */ { PFM_REG_NOTIMPL, },
+/* pmd18 */ { PFM_REG_NOTIMPL, },
+/* pmd19 */ { PFM_REG_NOTIMPL, },
+/* pmd20 */ { PFM_REG_NOTIMPL, },
+/* pmd21 */ { PFM_REG_NOTIMPL, },
+/* pmd22 */ { PFM_REG_NOTIMPL, },
+/* pmd23 */ { PFM_REG_NOTIMPL, },
+/* pmd24 */ { PFM_REG_NOTIMPL, },
+/* pmd25 */ { PFM_REG_NOTIMPL, },
+/* pmd26 */ { PFM_REG_NOTIMPL, },
+/* pmd27 */ { PFM_REG_NOTIMPL, },
+/* pmd28 */ { PFM_REG_NOTIMPL, },
+/* pmd29 */ { PFM_REG_NOTIMPL, },
+/* pmd30 */ { PFM_REG_NOTIMPL, },
+/* pmd31 */ { PFM_REG_NOTIMPL, },
+/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 
0}, {RDEP(40),0, 0, 0}},
+/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 
0}, {RDEP(40),0, 0, 0}},
+/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, 
{RDEP(37),0, 0, 0}},
+/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, 
{RDEP(37),0, 0, 0}},
+/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 
0}, {RDEP(40),0, 0, 0}},
+/* pmd37 */ { PFM_REG_NOTIMPL, },
+/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd40 */ { PFM_REG_NOTIMPL, },
+/* pmd41 */ { PFM_REG_NOTIMPL, },
+/* pmd42 */ { PFM_REG_NOTIMPL, },
+/* pmd43 */ { PFM_REG_NOTIMPL, },
+/* pmd44 */ { PFM_REG_NOTIMPL, },
+/* pmd45 */ { PFM_REG_NOTIMPL, },
+/* pmd46 */ { PFM_REG_NOTIMPL, },
+/* pmd47 */ { PFM_REG_NOTIMPL, },
+/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, 
{RDEP(39),0, 0, 0}},
+           { PFM_REG_END   , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end 
marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+       unsigned long tmp1, tmp2, ival = *val;
+
+       /* remove reserved areas from user value */
+       tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+       /* get reserved fields values */
+       tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+       *val = tmp1 | tmp2;
+
+       DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+                 cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+       return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int 
cnum, unsigned long *val, struct pt_regs *regs)
+{
+       int ret = 0;
+       unsigned long val32 = 0, val38 = 0, val41 = 0;
+       unsigned long tmpval;
+       int check_case1 = 0;
+       int is_loaded;
+
+       /* first preserve the reserved fields */
+       pfm_mont_reserved(cnum, val, regs);
+
+       tmpval = *val;
+
+       /* sanity check */
+       if (ctx == NULL) return -EINVAL;
+
+       is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == 
PFM_CTX_MASKED;
+
+       /*
+        * we must clear the debug registers if pmc41 has a value which enable
+        * memory pipeline event constraints. In this case we need to clear the
+        * the debug registers if they have not yet been accessed. This is 
required
+        * to avoid picking stale state.
+        * PMC41 is "active" if:
+        *      one of the pmc41.cfg_dtagXX field is different from 0x3
+        * AND
+        *      at the corresponding pmc41.en_dbrpXX is set.
+        * AND
+        *      ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
+        */
+       DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, 
ctx->ctx_fl_using_dbreg, is_loaded));
+
+       if (cnum == 41 && is_loaded 
+           && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 
0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+               DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing 
dbr\n", cnum, tmpval));
+
+               /* don't mix debug with perfmon */
+/*             if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) 
return -EINVAL; */
+
+               /*
+                * a count of 0 will mark the debug registers if:
+                * AND
+                */
+               ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+               if (ret) return ret;
+       }
+       /*
+        * we must clear the (instruction) debug registers if:
+        *      pmc38.ig_ibrpX is 0 (enabled)
+        * AND
+        *      ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
+        */
+       if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && 
ctx->ctx_fl_using_dbreg == 0) {
+
+               DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing 
ibr\n", tmpval));
+
+               /* don't mix debug with perfmon */
+/*             if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) 
return -EINVAL; */
+
+               /*
+                * a count of 0 will mark the debug registers as in use and also
+                * ensure that they are properly cleared.
+                */
+               ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+               if (ret) return ret;
+
+       }
+       switch(cnum) {
+               case  32: val32 = *val;
+                         val38 = ctx->ctx_pmcs[38];
+                         val41 = ctx->ctx_pmcs[41];
+                         check_case1 = 1;
+                         break;
+               case  38: val38 = *val;
+                         val32 = ctx->ctx_pmcs[32];
+                         val41 = ctx->ctx_pmcs[41];
+                         check_case1 = 1;
+                         break;
+               case  41: val41 = *val;
+                         val32 = ctx->ctx_pmcs[32];
+                         val38 = ctx->ctx_pmcs[38];
+                         check_case1 = 1;
+                         break;
+       }
+       /* check illegal configuration which can produce inconsistencies in 
tagging
+        * i-side events in L1D and L2 caches
+        */
+       if (check_case1) {
+               ret =   (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
+                    && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
+                    ||  (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 
0));
+               if (ret) {
+                       DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx 
pmc32=0x%lx\n", val38, val41, val32));
+                       return -EINVAL;
+               }
+       }
+       *val = tmpval;
+       return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mont={
+       .pmu_name        = "Montecito",
+       .pmu_family      = 0x20,
+       .flags           = PFM_PMU_IRQ_RESEND,
+       .ovfl_val        = (1UL << 47) - 1,
+       .pmd_desc        = pfm_mont_pmd_desc,
+       .pmc_desc        = pfm_mont_pmc_desc,
+       .num_ibrs        = 8,
+       .num_dbrs        = 8,
+       .use_rr_dbregs   = 1 /* debug register are use for range retrictions */
+};
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/Makefile 
xenoprof-ia64-unstable/xen/arch/ia64/oprofile/Makefile
--- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/Makefile        1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/Makefile      2006-06-30 
15:25:01.000000000 +0900
@@ -0,0 +1,2 @@
+obj-y += xenoprof.o
+obj-y += perfmon.o
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/op_counter.h 
xenoprof-ia64-unstable/xen/arch/ia64/oprofile/op_counter.h
--- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/op_counter.h    1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/op_counter.h  2006-06-30 
15:25:01.000000000 +0900
@@ -0,0 +1,33 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+    unsigned long count;
+    unsigned long enabled;
+    unsigned long event;
+    unsigned long kernel;
+    unsigned long user;
+    unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/perfmon.c 
xenoprof-ia64-unstable/xen/arch/ia64/oprofile/perfmon.c
--- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/perfmon.c       1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/perfmon.c     2006-07-03 
10:45:09.000000000 +0900
@@ -0,0 +1,143 @@
+/**
+ * @file perfmon.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+/* #include <linux/oprofile.h> */
+#include <linux/sched.h>
+#include <asm/perfmon.h>
+#include <asm/regs.h>
+#include <asm/ptrace.h>
+#include <xen/event.h>
+/* #include <asm/errno.h> */
+
+static int allow_ints;
+
+extern struct domain *primary_profiler;
+
+extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
+                          int mode, int event);
+extern int is_active(struct domain *d);
+
+static int xenoprof_samples = 0;
+
+static int
+perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
+                struct pt_regs *regs, unsigned long stamp)
+{
+       int event = arg->pmd_eventid;
+       int mode = 0;
+
+       if ( ring_2(regs) )     // DomU kernel
+               mode = 1;
+       else if ( ring_0(regs) ) // Hypervisor
+               mode = 2;
+
+       arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
+
+       /* the owner of the oprofile event buffer may have exited
+        * without perfmon being shutdown (e.g. SIGSEGV)
+        */
+       if (allow_ints) {
+               xenoprof_log_event(current, profile_pc(regs), mode, event);
+               xenoprof_samples++;
+/*             printk("perfmon_handler: 0x%lx\n", profile_pc(regs)); */
+               if ( is_active(current->domain) )
+               {
+                       if ( mode != 2)
+                       {
+                               send_guest_vcpu_virq(current, VIRQ_XENOPROF);
+                       }
+               }
+       }
+       return 0;
+}
+
+
+int perfmon_start(void)
+{
+       allow_ints = 1;
+       return 0;
+}
+
+
+void perfmon_stop(void)
+{
+       allow_ints = 0;
+       printk("xenoprof_samples: %d\n", xenoprof_samples);
+       xenoprof_samples = 0;
+}
+
+
+#define OPROFILE_FMT_UUID { \
+       0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 
0x61, 0x65, 0x0a, 0x6c }
+
+static pfm_buffer_fmt_t oprofile_fmt = {
+       .fmt_name           = "oprofile_format",
+       .fmt_uuid           = OPROFILE_FMT_UUID,
+       .fmt_handler        = perfmon_handler,
+};
+
+
+static char * get_cpu_type(void)
+{
+       __u8 family = local_cpu_data->family;
+
+       switch (family) {
+               case 0x07:
+                       return "ia64/itanium";
+               case 0x1f:
+                       return "ia64/itanium2";
+               default:
+                       return "ia64/ia64";
+       }
+}
+
+
+/* all the ops are handled via userspace for IA64 perfmon */
+
+static int using_perfmon;
+
+int perfmon_init(int *num_events, int *is_primary, char *cpu_type)
+{
+       int ret = 0;
+       int prim = 0;
+
+       if ( primary_profiler == NULL ) {
+               /* For now, only dom0 can be the primary profiler */
+               if ( current->domain->domain_id == 0 ) {
+                       ret = pfm_register_buffer_fmt(&oprofile_fmt);
+
+                       if (ret) {
+                               printk("pfm_register_buffer_fmt: %d\n", ret);
+                               return -ENODEV;
+                       }
+                       primary_profiler = current->domain;
+                       prim = 1;
+               }
+       }
+
+       strncpy (cpu_type, get_cpu_type(), XENOPROF_CPU_TYPE_SIZE - 1);
+       using_perfmon = 1;
+       *num_events = pmu_conf->num_counters;
+       *is_primary = prim;
+/*     printk("perfmon_init: ret=%d num_events=%d is_primary=%d\n", ret, 
*num_events, *is_primary); */
+
+       return 0;
+}
+
+
+void perfmon_exit(void)
+{
+       if (!using_perfmon)
+               return;
+       using_perfmon = 0;
+
+       pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
+}
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/oprofile/xenoprof.c 
xenoprof-ia64-unstable/xen/arch/ia64/oprofile/xenoprof.c
--- xen-ia64-unstable.hg/xen/arch/ia64/oprofile/xenoprof.c      1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/oprofile/xenoprof.c    2006-07-03 
10:43:21.000000000 +0900
@@ -0,0 +1,701 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon & Jose Renato Santos
+ *            (email: xenoprof@xxxxxxxxxxxxx)
+ */
+
+#include <xen/guest_access.h>
+#include <xen/sched.h>
+#include <public/xenoprof.h>
+
+#include "op_counter.h"
+
+/* Limit amount of pages used for shared buffer (per domain) */
+#define MAX_OPROF_SHARED_PAGES 32
+
+struct domain *active_domains[MAX_OPROF_DOMAINS];
+int active_ready[MAX_OPROF_DOMAINS];
+unsigned int adomains;
+
+struct domain *passive_domains[MAX_OPROF_DOMAINS];
+unsigned int pdomains;
+
+unsigned int activated;
+struct domain *primary_profiler;
+int xenoprof_state = XENOPROF_IDLE;
+
+u64 total_samples;
+u64 invalid_buffer_samples;
+u64 corrupted_buffer_samples;
+u64 lost_samples;
+u64 active_samples;
+u64 passive_samples;
+u64 idle_samples;
+u64 others_samples;
+
+#define IA64_XENOPROF_PADDR (1UL << 30)
+
+#define xenoprof_shared_maddr(d, t, i)                                       \
+    virt_to_maddr((char*)(t)->rawbuf + ((i) << PAGE_SHIFT))
+
+#ifndef        CONFIG_XEN_IA64_DOM0_VP
+# define xenoprof_shared_gmfn(d, t, i)                                       \
+    ({ ((d) == dom0) ?                                                       \
+            (virt_to_maddr((t)->rawbuf) >> PAGE_SHIFT) + (i):                \
+            assign_domain_page((d),                                          \
+                               IA64_XENOPROF_PADDR + ((i) << PAGE_SHIFT),    \
+                               xenoprof_shared_maddr((d), (t), (i))),        \
+            (IA64_XENOPROF_PADDR >> PAGE_SHIFT) + (i);})
+#else  /* CONFIG_XEN_IA64_DOM0_VP */
+#define xenoprof_shared_gmfn(d, t, i)                                        \
+    ({ assign_domain_page((d),                                               \
+                          IA64_XENOPROF_PADDR + ((i) << PAGE_SHIFT),         \
+                          xenoprof_shared_maddr((d), (t), (i)));             \
+        (IA64_XENOPROF_PADDR >> PAGE_SHIFT) + (i);})
+#endif /* CONFIG_XEN_IA64_DOM0_VP */
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+extern int perfmon_init(int *num_events, int *is_primary, char *cpu_type);
+extern int perfmon_start(void);
+extern void perfmon_stop(void);
+extern void perfmon_exit(void);
+static void xenoprof_reset_stat(void);
+
+int is_active(struct domain *d)
+{
+    struct xenoprof *x = d->xenoprof;
+    return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_ACTIVE));
+}
+
+int is_passive(struct domain *d)
+{
+    struct xenoprof *x = d->xenoprof;
+    return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_PASSIVE));
+}
+
+int is_profiled(struct domain *d)
+{
+    return (is_active(d) || is_passive(d));
+}
+
+static void xenoprof_reset_stat(void)
+{
+    total_samples = 0;
+    invalid_buffer_samples = 0;
+    corrupted_buffer_samples = 0;
+    lost_samples = 0;
+    active_samples = 0;
+    passive_samples = 0;
+    idle_samples = 0;
+    others_samples = 0;
+}
+
+static void xenoprof_reset_buf(struct domain *d)
+{
+    int j;
+    struct xenoprof_buf *buf;
+
+    if ( d->xenoprof == NULL )
+    {
+        printk("xenoprof_reset_buf: ERROR - Unexpected "
+               "Xenoprof NULL pointer \n");
+        return;
+    }
+
+    for ( j = 0; j < MAX_VIRT_CPUS; j++ )
+    {
+        buf = d->xenoprof->vcpu[j].buffer;
+        if ( buf != NULL )
+        {
+            buf->event_head = 0;
+            buf->event_tail = 0;
+        }
+    }
+}
+
+char *alloc_xenoprof_buf(struct domain *d, int npages)
+{
+    char *rawbuf;
+    int i, order;
+
+    /* allocate pages to store sample buffer shared with domain */
+    order  = get_order_from_pages(npages);
+    rawbuf = alloc_xenheap_pages(order);
+    if ( rawbuf == NULL )
+    {
+        printk("alloc_xenoprof_buf(): memory allocation failed\n");
+        return 0;
+    }
+
+    /* Share pages so that kernel can map it */
+    for ( i = 0; i < npages; i++ )
+        share_xen_page_with_guest(
+            virt_to_page(rawbuf + i * PAGE_SIZE), 
+            d, XENSHARE_writable);
+
+    return rawbuf;
+}
+
+int alloc_xenoprof_struct(struct domain *d, int max_samples, int is_passive)
+{
+    struct vcpu *v;
+    int nvcpu, npages, bufsize, max_bufsize;
+    int i;
+
+    d->xenoprof = xmalloc(struct xenoprof);
+
+    if ( d->xenoprof == NULL )
+    {
+        printk ("alloc_xenoprof_struct(): memory "
+                "allocation (xmalloc) failed\n");
+        return -ENOMEM;
+    }
+
+    memset(d->xenoprof, 0, sizeof(*d->xenoprof));
+
+    nvcpu = 0;
+    for_each_vcpu ( d, v )
+        nvcpu++;
+
+    /* reduce buffer size if necessary to limit pages allocated */
+    bufsize = sizeof(struct xenoprof_buf) +
+        (max_samples - 1) * sizeof(struct event_log);
+    max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu;
+    if ( bufsize > max_bufsize )
+    {
+        bufsize = max_bufsize;
+        max_samples = ( (max_bufsize - sizeof(struct xenoprof_buf)) /
+                        sizeof(struct event_log) ) + 1;
+    }
+
+    npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
+    
+    d->xenoprof->rawbuf = alloc_xenoprof_buf(is_passive ? dom0 : d, npages);
+
+    if ( d->xenoprof->rawbuf == NULL )
+    {
+        xfree(d->xenoprof);
+        d->xenoprof = NULL;
+        return -ENOMEM;
+    }
+
+    d->xenoprof->npages = npages;
+    d->xenoprof->nbuf = nvcpu;
+    d->xenoprof->bufsize = bufsize;
+    d->xenoprof->domain_ready = 0;
+    d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED;
+
+    /* Update buffer pointers for active vcpus */
+    i = 0;
+    for_each_vcpu ( d, v )
+    {
+        d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples;
+        d->xenoprof->vcpu[v->vcpu_id].buffer =
+            (struct xenoprof_buf *)&d->xenoprof->rawbuf[i * bufsize];
+        d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples;
+        d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id;
+
+/*         printk("vcpu_id=%d event_size=%d npages=%d\n", 
d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id, max_samples, npages); */
+
+        i++;
+        /* in the unlikely case that the number of active vcpus changes */
+        if ( i >= nvcpu )
+            break;
+    }
+    
+    return 0;
+}
+
+void free_xenoprof_pages(struct domain *d)
+{
+    struct xenoprof *x;
+    int order;
+
+    x = d->xenoprof;
+    if ( x == NULL )
+        return;
+
+    if ( x->rawbuf != NULL )
+    {
+        order = get_order_from_pages(x->npages);
+        free_xenheap_pages(x->rawbuf, order);
+    }
+
+    xfree(x);
+    d->xenoprof = NULL;
+}
+
+int active_index(struct domain *d)
+{
+    int i;
+
+    for ( i = 0; i < adomains; i++ )
+        if ( active_domains[i] == d )
+            return i;
+
+    return -1;
+}
+
+int set_active(struct domain *d)
+{
+    int ind;
+    struct xenoprof *x;
+
+    ind = active_index(d);
+    if ( ind < 0 )
+        return -EPERM;
+
+    x = d->xenoprof;
+    if ( x == NULL )
+        return -EPERM;
+
+    x->domain_ready = 1;
+    x->domain_type = XENOPROF_DOMAIN_ACTIVE;
+    active_ready[ind] = 1;
+    activated++;
+
+    return 0;
+}
+
+int reset_active(struct domain *d)
+{
+    int ind;
+    struct xenoprof *x;
+
+    ind = active_index(d);
+    if ( ind < 0 )
+        return -EPERM;
+
+    x = d->xenoprof;
+    if ( x == NULL )
+        return -EPERM;
+
+    x->domain_ready = 0;
+    x->domain_type = XENOPROF_DOMAIN_IGNORED;
+    active_ready[ind] = 0;
+    active_domains[ind] = NULL;
+    activated--;
+    put_domain(d); 
+
+    if ( activated <= 0 )
+        adomains = 0;
+
+    return 0;
+}
+
+void reset_passive(struct domain *d)
+{
+    struct xenoprof *x;
+
+    if (d==0)
+        return;
+
+    x = d->xenoprof;
+    if ( x == NULL )
+        return;
+
+    x->domain_type = XENOPROF_DOMAIN_IGNORED;
+
+    return;
+}
+
+void reset_active_list(void)
+{
+    int i;
+
+    for ( i = 0; i < adomains; i++ )
+    {
+        if ( active_ready[i] )
+        {
+            reset_active(active_domains[i]);
+        }
+    }
+
+    adomains = 0;
+    activated = 0;
+}
+
+void reset_passive_list(void)
+{
+    int i;
+
+    for ( i = 0; i < pdomains; i++ )
+    {
+        reset_passive(passive_domains[i]);
+        put_domain(passive_domains[i]);
+        passive_domains[i] = NULL;
+    }
+
+    pdomains = 0;
+}
+
+int add_active_list (domid_t domid)
+{
+    struct domain *d;
+
+    if ( adomains >= MAX_OPROF_DOMAINS )
+        return -E2BIG;
+
+    d = find_domain_by_id(domid); 
+    if ( d == NULL )
+        return -EINVAL;
+
+    active_domains[adomains] = d;
+    active_ready[adomains] = 0;
+    adomains++;
+
+    return 0;
+}
+
+int add_passive_list(XEN_GUEST_HANDLE(void) arg)
+{
+    struct xenoprof_passive passive;
+    struct domain *d;
+    int ret = 0;
+
+    if ( pdomains >= MAX_OPROF_DOMAINS )
+        return -E2BIG;
+
+    if ( copy_from_guest(&passive, arg, 1) )
+        return -EFAULT;
+
+    d = find_domain_by_id(passive.domain_id); 
+    if ( d == NULL )
+        return -EINVAL;
+
+    if ( (d->xenoprof == NULL) && 
+         ((ret = alloc_xenoprof_struct(d, passive.max_samples, 1)) < 0) ) {
+        put_domain(d);
+        return -ENOMEM;
+    }
+
+    d->xenoprof->domain_type = XENOPROF_DOMAIN_PASSIVE;
+    passive.nbuf = d->xenoprof->nbuf;
+    passive.bufsize = d->xenoprof->bufsize;
+
+    passive.buf_maddr = IA64_XENOPROF_PADDR;
+
+/*     for (i = 0; i < d->xenoprof->npages; i++) { */
+/*         unsigned long gmfn = xenoprof_shared_gmfn(d, d->xenoprof, i); */
+/*         if (i == 0) */
+/*             passive.buf_maddr = gmfn << PAGE_SHIFT; */
+/*     } */
+
+/*     passive.buf_maddr = __pa(d->xenoprof->rawbuf); */
+
+    if ( copy_to_guest(arg, &passive, 1) ) {
+        put_domain(d);
+        return -EFAULT;
+    }
+    
+    passive_domains[pdomains] = d;
+    pdomains++;
+
+    return ret;
+}
+
+void xenoprof_log_event(
+    struct vcpu *vcpu, unsigned long eip, int mode, int event)
+{
+    struct xenoprof_vcpu *v;
+    struct xenoprof_buf *buf;
+    int head;
+    int tail;
+    int size;
+
+
+    total_samples++;
+
+    /* ignore samples of un-monitored domains */
+    /* Count samples in idle separate from other unmonitored domains */
+    if ( !is_profiled(vcpu->domain) )
+    {
+        others_samples++;
+        return;
+    }
+
+    v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id];
+
+    /* Sanity check. Should never happen */ 
+    if ( v->buffer == NULL )
+    {
+        invalid_buffer_samples++;
+        return;
+    }
+
+    buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer;
+
+    head = buf->event_head;
+    tail = buf->event_tail;
+    size = v->event_size;
+
+    /* make sure indexes in shared buffer are sane */
+    if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) )
+    {
+        corrupted_buffer_samples++;
+        return;
+    }
+
+    if ( (head == tail - 1) || (head == size - 1 && tail == 0) )
+    {
+        buf->lost_samples++;
+        lost_samples++;
+    }
+    else
+    {
+        buf->event_log[head].eip = eip;
+        buf->event_log[head].mode = mode;
+        buf->event_log[head].event = event;
+        head++;
+        if ( head >= size )
+            head = 0;
+        buf->event_head = head;
+        if ( is_active(vcpu->domain) )
+            active_samples++;
+        else
+            passive_samples++;
+        if ( mode == 0 )
+            buf->user_samples++;
+        else if ( mode == 1 )
+            buf->kernel_samples++;
+        else
+            buf->xen_samples++;
+    }
+}
+
+int xenoprof_op_init(XEN_GUEST_HANDLE(void) arg)
+{
+    struct xenoprof_init xenoprof_init;
+    int is_primary, num_events;
+    struct domain *d = current->domain;
+    int ret;
+    int i;
+
+    if ( copy_from_guest(&xenoprof_init, arg, 1) )
+        return -EFAULT;
+
+    ret = perfmon_init(&num_events,
+                      &is_primary,
+                      xenoprof_init.cpu_type);
+    if ( ret < 0 )
+        goto err;
+
+    if ( is_primary )
+        primary_profiler = current->domain;
+
+    printk("domain=%d xenoprof=%p\n", d->domain_id, d->xenoprof);
+
+    /*
+     * We allocate xenoprof struct and buffers only at first time xenoprof_init
+     * is called. Memory is then kept until domain is destroyed.
+     */
+    if ( (d->xenoprof == NULL) &&
+         ((ret = alloc_xenoprof_struct(d, xenoprof_init.max_samples, 0)) < 0) )
+        goto err;
+
+    xenoprof_reset_buf(d);
+
+    d->xenoprof->domain_type  = XENOPROF_DOMAIN_IGNORED;
+    d->xenoprof->domain_ready = 0;
+    d->xenoprof->is_primary = is_primary;
+
+    xenoprof_init.is_primary = is_primary;
+    xenoprof_init.num_events = num_events;
+    xenoprof_init.nbuf = d->xenoprof->nbuf;
+    xenoprof_init.bufsize = d->xenoprof->bufsize;
+
+    for (i = 0; i < d->xenoprof->npages; i++) {
+        unsigned long gmfn = xenoprof_shared_gmfn(d, d->xenoprof, i);
+        if (i == 0)
+            xenoprof_init.buf_maddr = gmfn << PAGE_SHIFT;
+    }
+
+    if ( copy_to_guest(arg, &xenoprof_init, 1) )
+    {
+        ret = -EFAULT;
+        goto err;
+    }
+
+    return ret;
+
+ err:
+    if ( primary_profiler == current->domain )
+        primary_profiler = NULL;
+    return ret;
+}
+
+#define PRIV_OP(op) ( (op == XENOPROF_set_active)       \
+                   || (op == XENOPROF_reserve_counters) \
+                   || (op == XENOPROF_setup_events)     \
+                   || (op == XENOPROF_start)            \
+                   || (op == XENOPROF_stop)             \
+                   || (op == XENOPROF_release_counters) \
+                   || (op == XENOPROF_shutdown))
+
+int do_xenoprof_op(int op, XEN_GUEST_HANDLE(void) arg)
+{
+    int ret = 0;
+
+    if ( PRIV_OP(op) && (current->domain != primary_profiler) )
+    {
+        printk("xenoprof: dom %d denied privileged operation %d\n",
+               current->domain->domain_id, op);
+        return -EPERM;
+    }
+
+    switch ( op )
+    {
+    case XENOPROF_init:
+        ret = xenoprof_op_init(arg);
+        break;
+
+    case XENOPROF_reset_active_list:
+    {
+        reset_active_list();
+        ret = 0;
+        break;
+    }
+    case XENOPROF_reset_passive_list:
+    {
+        reset_passive_list();
+        ret = 0;
+        break;
+    }
+    case XENOPROF_set_active:
+    {
+        domid_t domid;
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        if ( copy_from_guest(&domid, arg, 1) )
+            return -EFAULT;
+        ret = add_active_list(domid);
+        break;
+    }
+    case XENOPROF_set_passive:
+    {
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        ret = add_passive_list(arg);
+        break;
+    }
+    case XENOPROF_reserve_counters:
+        if ( xenoprof_state != XENOPROF_IDLE )
+            return -EPERM;
+        xenoprof_state = XENOPROF_COUNTERS_RESERVED;
+        break;
+
+    case XENOPROF_counter:
+    {
+        struct xenoprof_counter counter;
+        if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED )
+            return -EPERM;
+        if ( adomains == 0 )
+            return -EPERM;
+
+        if ( copy_from_guest(&counter, arg, 1) )
+            return -EFAULT;
+
+        if ( counter.ind > OP_MAX_COUNTER )
+            return -E2BIG;
+
+        counter_config[counter.ind].count     = (unsigned long) counter.count;
+        counter_config[counter.ind].enabled   = (unsigned long) 
counter.enabled;
+        counter_config[counter.ind].event     = (unsigned long) counter.event;
+        counter_config[counter.ind].kernel    = (unsigned long) counter.kernel;
+        counter_config[counter.ind].user      = (unsigned long) counter.user;
+        counter_config[counter.ind].unit_mask = (unsigned long) 
counter.unit_mask;
+
+        ret = 0;
+        break;
+    }
+
+    case XENOPROF_setup_events:
+        if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED )
+            return -EPERM;
+        xenoprof_state = XENOPROF_READY;
+        break;
+
+    case XENOPROF_enable_virq:
+    {
+        int i;
+        if ( current->domain == primary_profiler )
+        {
+            xenoprof_reset_stat();
+            for ( i = 0; i < pdomains; i++ ) {
+                xenoprof_reset_buf(passive_domains[i]);
+            }
+        }
+        xenoprof_reset_buf(current->domain);
+        ret = set_active(current->domain);
+        break;
+    }
+
+    case XENOPROF_start:
+        ret = -EPERM;
+        if ( (xenoprof_state == XENOPROF_READY) &&
+             (activated == adomains) )
+            ret = perfmon_start();
+
+        if ( ret == 0 )
+            xenoprof_state = XENOPROF_PROFILING;
+        break;
+
+    case XENOPROF_stop:
+        if ( xenoprof_state != XENOPROF_PROFILING )
+            return -EPERM;
+        perfmon_stop();
+        xenoprof_state = XENOPROF_READY;
+        break;
+
+    case XENOPROF_disable_virq:
+        if ( (xenoprof_state == XENOPROF_PROFILING) && 
+             (is_active(current->domain)) )
+            return -EPERM;
+        ret = reset_active(current->domain);
+        break;
+
+    case XENOPROF_release_counters:
+        ret = -EPERM;
+        if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) ||
+             (xenoprof_state == XENOPROF_READY) )
+        {
+            xenoprof_state = XENOPROF_IDLE;
+            reset_passive_list();
+            ret = 0;
+        }
+        break;
+
+    case XENOPROF_shutdown:
+        ret = -EPERM;
+        if ( xenoprof_state == XENOPROF_IDLE )
+        {
+            activated = 0;
+            adomains=0;
+            primary_profiler = NULL;
+            ret = 0;
+           perfmon_exit();
+        }
+        break;
+
+    default:
+        ret = -EINVAL;
+    }
+
+    if ( ret < 0 )
+        printk("xenoprof: operation %d failed for dom %d (status : %d)\n",
+               op, current->domain->domain_id, ret);
+
+    return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/xen/domain.c 
xenoprof-ia64-unstable/xen/arch/ia64/xen/domain.c
--- xen-ia64-unstable.hg/xen/arch/ia64/xen/domain.c     2006-06-30 
11:23:29.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/xen/domain.c   2006-06-30 
15:26:16.000000000 +0900
@@ -128,6 +128,10 @@
     if (VMX_DOMAIN(next))
            vmx_load_state(next);
     /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
+#ifdef CONFIG_PERFMON
+    pfm_save_regs(prev);
+    pfm_load_regs(next);
+#endif
     prev = ia64_switch_to(next);
 
     /* Note: ia64_switch_to does not return here at vcpu initialization.  */
@@ -185,6 +189,39 @@
        /* nothing to do */
 }
 
+#if 1  /* By KAZ */
+static int pal_halt        = 1;
+static int can_do_pal_halt = 1;
+
+static int __init nohalt_setup(char * str)
+{
+       pal_halt = can_do_pal_halt = 0;
+       return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+void
+update_pal_halt_status(int status)
+{
+       can_do_pal_halt = pal_halt && status;
+}
+
+static void default_idle(void)
+{
+       int cpu = smp_processor_id();
+       local_irq_disable();
+       printk("default_idle\n");
+       if ( !softirq_pending(cpu)) {
+/*             if (can_do_pal_halt) */
+/*                     safe_halt(); */
+/*             else */
+                       cpu_relax();
+       }
+       local_irq_enable();
+}
+
+#else  /* By KAZ */
+
 static void default_idle(void)
 {
        int cpu = smp_processor_id();
@@ -193,6 +230,7 @@
                safe_halt();
        local_irq_enable();
 }
+#endif /* By KAZ */
 
 static void continue_cpu_idle_loop(void)
 {
@@ -245,6 +283,15 @@
            init_switch_stack(v);
        }
 
+       ia64_psr(vcpu_regs(v))->pp = 
+         ia64_psr(vcpu_regs(v))->up = 
+         ia64_psr(vcpu_regs(v))->i = 1;
+
+       printk("alloc_vcpu_struct: pp=%d up=%d i=%d\n",
+              ia64_psr(vcpu_regs(v))->pp,
+              ia64_psr(vcpu_regs(v))->up,
+              ia64_psr(vcpu_regs(v))->i);
+
        if (!is_idle_domain(d)) {
            v->arch.privregs = 
                alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
@@ -493,6 +540,8 @@
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
+extern void free_xenoprof_pages(struct domain *d);
+
 void domain_relinquish_resources(struct domain *d)
 {
     /* Relinquish every page of memory. */
@@ -502,6 +551,9 @@
 
     relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
+
+    /* Free page used by xen oprofile buffer */
+    free_xenoprof_pages(d);
 }
 
 void build_physmap_table(struct domain *d)
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/xen/hypercall.c 
xenoprof-ia64-unstable/xen/arch/ia64/xen/hypercall.c
--- xen-ia64-unstable.hg/xen/arch/ia64/xen/hypercall.c  2006-06-30 
11:23:29.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/xen/hypercall.c        2006-06-30 
15:25:01.000000000 +0900
@@ -72,10 +72,10 @@
        (hypercall_t)do_ni_hypercall,           /* do_nmi_op */
        (hypercall_t)do_sched_op,
        (hypercall_t)do_callback_op,            /*  */                  /* 30 */
-       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_xenoprof_op,            /*  */
        (hypercall_t)do_event_channel_op,
        (hypercall_t)do_physdev_op,
-       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_perfmon_op,             /*  */
        (hypercall_t)do_ni_hypercall,           /*  */                  /* 35 */
        (hypercall_t)do_ni_hypercall,           /*  */
        (hypercall_t)do_ni_hypercall,           /*  */
@@ -111,7 +111,8 @@
 {
        uint32_t cmd = (uint32_t)regs->r2;
 
-       if (cmd < nr_hypercalls)
+       if (cmd < nr_hypercalls) {
+               perfc_incra(hypercalls, cmd);
                regs->r8 = (*ia64_hypercall_table[cmd])(
                        regs->r14,
                        regs->r15,
@@ -119,7 +120,7 @@
                        regs->r17,
                        regs->r18,
                        regs->r19);
-       else
+       } else
                regs->r8 = -ENOSYS;
 
        return IA64_NO_FAULT;
@@ -225,6 +226,7 @@
                        }
                        else {
                                pal_halt_light_count++;
+                               perfc_incra(hypercalls, 
__HYPERVISOR_sched_op_compat);
                                do_sched_op_compat(SCHEDOP_yield, 0);
                        }
                        regs->r8 = 0;
diff -Nur xen-ia64-unstable.hg/xen/arch/ia64/xen/xenmisc.c 
xenoprof-ia64-unstable/xen/arch/ia64/xen/xenmisc.c
--- xen-ia64-unstable.hg/xen/arch/ia64/xen/xenmisc.c    2006-06-22 
13:37:13.000000000 +0900
+++ xenoprof-ia64-unstable/xen/arch/ia64/xen/xenmisc.c  2006-06-30 
15:25:01.000000000 +0900
@@ -22,6 +22,7 @@
 #include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
 #include <asm/vcpu.h>
+#include <asm/perfmon.h>
 
 unsigned long loops_per_jiffy = (1<<12);       // from linux/init/main.c
 
diff -Nur xen-ia64-unstable.hg/xen/common/schedule.c 
xenoprof-ia64-unstable/xen/common/schedule.c
--- xen-ia64-unstable.hg/xen/common/schedule.c  2006-06-30 11:23:29.000000000 
+0900
+++ xenoprof-ia64-unstable/xen/common/schedule.c        2006-06-30 
15:25:01.000000000 +0900
@@ -210,6 +210,7 @@
     else
     {
         TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
+        perfc_incrc(sched_block);
         __enter_scheduler();
     }
 
@@ -255,6 +256,7 @@
         set_timer(&v->poll_timer, sched_poll->timeout);
 
     TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
+    perfc_incrc(sched_poll);
     __enter_scheduler();
 
     stop_timer(&v->poll_timer);
@@ -269,6 +271,7 @@
 static long do_yield(void)
 {
     TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
+    perfc_incrc(sched_yield);
     __enter_scheduler();
     return 0;
 }
@@ -496,6 +499,9 @@
     r_time = next_slice.time;
     next = next_slice.task;
 
+    if (prev == next)
+        perfc_incrc(sched_xen);
+
     schedule_data[cpu].curr = next;
     
     set_timer(&schedule_data[cpu].s_timer, now + r_time);
diff -Nur xen-ia64-unstable.hg/xen/common/softirq.c 
xenoprof-ia64-unstable/xen/common/softirq.c
--- xen-ia64-unstable.hg/xen/common/softirq.c   2006-06-22 13:37:13.000000000 
+0900
+++ xenoprof-ia64-unstable/xen/common/softirq.c 2006-06-30 15:25:01.000000000 
+0900
@@ -14,6 +14,7 @@
 #include <xen/mm.h>
 #include <xen/sched.h>
 #include <xen/softirq.h>
+#include <xen/perfc.h>
 
 #ifndef __ARCH_IRQ_STAT
 irq_cpustat_t irq_stat[NR_CPUS];
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/config.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/config.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/config.h  2006-06-22 
13:37:13.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/config.h        2006-06-30 
15:25:01.000000000 +0900
@@ -254,6 +254,8 @@
 #define CONFIG_ACPI_BOOT 1
 //#endif
 
+#define        CONFIG_PERFMON  1
+
 #define CONFIG_XEN_ATTENTION_KEY 1
 #endif /* __ASSEMBLY__ */
 #endif /* __XEN_IA64_CONFIG_H__ */
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/asm/perfmon.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/linux/asm/perfmon.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/asm/perfmon.h       
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/asm/perfmon.h     
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ */
+
+#ifndef _ASM_IA64_PERFMON_H
+#define _ASM_IA64_PERFMON_H
+
+/*
+ * perfmon comamnds supported on all CPU models
+ */
+#define PFM_WRITE_PMCS         0x01
+#define PFM_WRITE_PMDS         0x02
+#define PFM_READ_PMDS          0x03
+#define PFM_STOP               0x04
+#define PFM_START              0x05
+#define PFM_ENABLE             0x06 /* obsolete */
+#define PFM_DISABLE            0x07 /* obsolete */
+#define PFM_CREATE_CONTEXT     0x08
+#define PFM_DESTROY_CONTEXT    0x09 /* obsolete use close() */
+#define PFM_RESTART            0x0a
+#define PFM_PROTECT_CONTEXT    0x0b /* obsolete */
+#define PFM_GET_FEATURES       0x0c
+#define PFM_DEBUG              0x0d
+#define PFM_UNPROTECT_CONTEXT  0x0e /* obsolete */
+#define PFM_GET_PMC_RESET_VAL  0x0f
+#define PFM_LOAD_CONTEXT       0x10
+#define PFM_UNLOAD_CONTEXT     0x11
+#define PFM_FREE_CONTEXT       0x12
+
+/*
+ * PMU model specific commands (may not be supported on all PMU models)
+ */
+#define PFM_WRITE_IBRS         0x20
+#define PFM_WRITE_DBRS         0x21
+
+/*
+ * context flags
+ */
+#define PFM_FL_NOTIFY_BLOCK             0x01   /* block task on user level 
notifications */
+#define PFM_FL_SYSTEM_WIDE      0x02   /* create a system wide context */
+#define PFM_FL_OVFL_NO_MSG      0x80   /* do not post overflow/end messages 
for notification */
+
+/*
+ * event set flags
+ */
+#define PFM_SETFL_EXCL_IDLE      0x01   /* exclude idle task (syswide only) 
XXX: DO NOT USE YET */
+
+/*
+ * PMC flags
+ */
+#define PFM_REGFL_OVFL_NOTIFY  0x1     /* send notification on overflow */
+#define PFM_REGFL_RANDOM       0x2     /* randomize sampling interval   */
+
+/*
+ * PMD/PMC/IBR/DBR return flags (ignored on input)
+ *
+ * Those flags are used on output and must be checked in case EAGAIN is 
returned
+ * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
+ */
+#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but 
not available */
+#define PFM_REG_RETFL_EINVAL   (1UL<<30) /* set if register entry is invalid */
+#define PFM_REG_RETFL_MASK     (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
+
+#define PFM_REG_HAS_ERROR(flag)        (((flag) & PFM_REG_RETFL_MASK) != 0)
+
+typedef unsigned char pfm_uuid_t[16];  /* custom sampling buffer identifier 
type */
+
+/*
+ * Request structure used to define a context
+ */
+typedef struct {
+       pfm_uuid_t     ctx_smpl_buf_id;  /* which buffer format to use (if 
needed) */
+       unsigned long  ctx_flags;        /* noblock/block */
+       unsigned short ctx_nextra_sets;  /* number of extra event sets (you 
always get 1) */
+       unsigned short ctx_reserved1;    /* for future use */
+       int            ctx_fd;           /* return arg: unique identification 
for context */
+       void           *ctx_smpl_vaddr;  /* return arg: virtual address of 
sampling buffer, is used */
+       unsigned long  ctx_reserved2[11];/* for future use */
+} pfarg_context_t;
+
+/*
+ * Request structure used to write/read a PMC or PMD
+ */
+typedef struct {
+       unsigned int    reg_num;           /* which register */
+       unsigned short  reg_set;           /* event set for this register */
+       unsigned short  reg_reserved1;     /* for future use */
+
+       unsigned long   reg_value;         /* initial pmc/pmd value */
+       unsigned long   reg_flags;         /* input: pmc/pmd flags, return: reg 
error */
+
+       unsigned long   reg_long_reset;    /* reset after buffer overflow 
notification */
+       unsigned long   reg_short_reset;   /* reset after counter overflow */
+
+       unsigned long   reg_reset_pmds[4]; /* which other counters to reset on 
overflow */
+       unsigned long   reg_random_seed;   /* seed value when randomization is 
used */
+       unsigned long   reg_random_mask;   /* bitmask used to limit random 
value */
+       unsigned long   reg_last_reset_val;/* return: PMD last reset value */
+
+       unsigned long   reg_smpl_pmds[4];  /* which pmds are accessed when PMC 
overflows */
+       unsigned long   reg_smpl_eventid;  /* opaque sampling event identifier 
*/
+
+       unsigned long   reg_reserved2[3];   /* for future use */
+} pfarg_reg_t;
+
+typedef struct {
+       unsigned int    dbreg_num;              /* which debug register */
+       unsigned short  dbreg_set;              /* event set for this register 
*/
+       unsigned short  dbreg_reserved1;        /* for future use */
+       unsigned long   dbreg_value;            /* value for debug register */
+       unsigned long   dbreg_flags;            /* return: dbreg error */
+       unsigned long   dbreg_reserved2[1];     /* for future use */
+} pfarg_dbreg_t;
+
+typedef struct {
+       unsigned int    ft_version;     /* perfmon: major [16-31], minor [0-15] 
*/
+       unsigned int    ft_reserved;    /* reserved for future use */
+       unsigned long   reserved[4];    /* for future use */
+} pfarg_features_t;
+
+typedef struct {
+       pid_t           load_pid;          /* process to load the context into 
*/
+       unsigned short  load_set;          /* first event set to load */
+       unsigned short  load_reserved1;    /* for future use */
+       unsigned long   load_reserved2[3]; /* for future use */
+} pfarg_load_t;
+
+typedef struct {
+       int             msg_type;               /* generic message header */
+       int             msg_ctx_fd;             /* generic message header */
+       unsigned long   msg_ovfl_pmds[4];       /* which PMDs overflowed */
+       unsigned short  msg_active_set;         /* active set at the time of 
overflow */
+       unsigned short  msg_reserved1;          /* for future use */
+       unsigned int    msg_reserved2;          /* for future use */
+       unsigned long   msg_tstamp;             /* for perf tuning/debug */
+} pfm_ovfl_msg_t;
+
+typedef struct {
+       int             msg_type;               /* generic message header */
+       int             msg_ctx_fd;             /* generic message header */
+       unsigned long   msg_tstamp;             /* for perf tuning */
+} pfm_end_msg_t;
+
+typedef struct {
+       int             msg_type;               /* type of the message */
+       int             msg_ctx_fd;             /* unique identifier for the 
context */
+       unsigned long   msg_tstamp;             /* for perf tuning */
+} pfm_gen_msg_t;
+
+#define PFM_MSG_OVFL   1       /* an overflow happened */
+#define PFM_MSG_END    2       /* task to which context was attached ended */
+
+typedef union {
+       pfm_ovfl_msg_t  pfm_ovfl_msg;
+       pfm_end_msg_t   pfm_end_msg;
+       pfm_gen_msg_t   pfm_gen_msg;
+} pfm_msg_t;
+
+/*
+ * Define the version numbers for both perfmon as a whole and the sampling 
buffer format.
+ */
+#define PFM_VERSION_MAJ                 2U
+#define PFM_VERSION_MIN                 0U
+#define PFM_VERSION             
(((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
+#define PFM_VERSION_MAJOR(x)    (((x)>>16) & 0xffff)
+#define PFM_VERSION_MINOR(x)    ((x) & 0xffff)
+
+
+/*
+ * miscellaneous architected definitions
+ */
+#define PMU_FIRST_COUNTER      4       /* first counting monitor (PMC/PMD) */
+#define PMU_MAX_PMCS           256     /* maximum architected number of PMC 
registers */
+#define PMU_MAX_PMDS           256     /* maximum architected number of PMD 
registers */
+
+#ifdef __KERNEL__
+
+extern long perfmonctl(int fd, int cmd, void *arg, int narg);
+
+typedef struct {
+       void (*handler)(int irq, void *arg, struct pt_regs *regs);
+} pfm_intr_handler_desc_t;
+
+extern void pfm_save_regs (struct task_struct *);
+extern void pfm_load_regs (struct task_struct *);
+
+extern void pfm_exit_thread(struct task_struct *);
+extern int  pfm_use_debug_registers(struct task_struct *);
+extern int  pfm_release_debug_registers(struct task_struct *);
+extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long 
info, int is_ctxswin);
+extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
+extern void pfm_init_percpu(void);
+extern void pfm_handle_work(void);
+extern int  pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+extern int  pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+
+
+
+/*
+ * Reset PMD register flags
+ */
+#define PFM_PMD_SHORT_RESET    0
+#define PFM_PMD_LONG_RESET     1
+
+typedef union {
+       unsigned int val;
+       struct {
+               unsigned int notify_user:1;     /* notify user program of 
overflow */
+               unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */
+               unsigned int block_task:1;      /* block monitored task on 
kernel exit */
+               unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
+               unsigned int reserved:28;       /* for future use */
+       } bits;
+} pfm_ovfl_ctrl_t;
+
+typedef struct {
+       unsigned char   ovfl_pmd;                       /* index of overflowed 
PMD  */
+       unsigned char   ovfl_notify;                    /* =1 if monitor 
requested overflow notification */
+       unsigned short  active_set;                     /* event set active at 
the time of the overflow */
+       pfm_ovfl_ctrl_t ovfl_ctrl;                      /* return: perfmon 
controls to set by handler */
+
+       unsigned long   pmd_last_reset;                 /* last reset value of 
of the PMD */
+       unsigned long   smpl_pmds[4];                   /* bitmask of other PMD 
of interest on overflow */
+       unsigned long   smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other 
PMDs of interest */
+       unsigned long   pmd_value;                      /* current 64-bit value 
of the PMD */
+       unsigned long   pmd_eventid;                    /* eventid associated 
with PMD */
+} pfm_ovfl_arg_t;
+
+
+typedef struct {
+       char            *fmt_name;
+       pfm_uuid_t      fmt_uuid;
+       size_t          fmt_arg_size;
+       unsigned long   fmt_flags;
+
+       int             (*fmt_validate)(struct task_struct *task, unsigned int 
flags, int cpu, void *arg);
+       int             (*fmt_getsize)(struct task_struct *task, unsigned int 
flags, int cpu, void *arg, unsigned long *size);
+       int             (*fmt_init)(struct task_struct *task, void *buf, 
unsigned int flags, int cpu, void *arg);
+       int             (*fmt_handler)(struct task_struct *task, void *buf, 
pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
+       int             (*fmt_restart)(struct task_struct *task, 
pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+       int             (*fmt_restart_active)(struct task_struct *task, 
pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+       int             (*fmt_exit)(struct task_struct *task, void *buf, struct 
pt_regs *regs);
+
+       struct list_head fmt_list;
+} pfm_buffer_fmt_t;
+
+extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
+extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
+
+/*
+ * perfmon interface exported to modules
+ */
+extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int 
nreq, struct pt_regs *regs);
+extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int 
nreq, struct pt_regs *regs);
+extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned 
int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned 
int nreq, struct pt_regs *regs);
+
+/*
+ * describe the content of the local_cpu_date->pfm_syst_info field
+ */
+#define PFM_CPUINFO_SYST_WIDE  0x1     /* if set a system wide session exists 
*/
+#define PFM_CPUINFO_DCR_PP     0x2     /* if set the system wide session has 
started */
+#define PFM_CPUINFO_EXCL_IDLE  0x4     /* the system wide session excludes the 
idle task */
+
+/*
+ * sysctl control structure. visible to sampling formats
+ */
+typedef struct {
+       int     debug;          /* turn on/off debugging via syslog */
+       int     debug_ovfl;     /* turn on/off debug printk in overflow handler 
*/
+       int     fastctxsw;      /* turn on/off fast (unsecure) ctxsw */
+       int     expert_mode;    /* turn on/off value checking */
+} pfm_sysctl_t;
+extern pfm_sysctl_t pfm_sysctl;
+
+
+/*
+ * information about a PMC or PMD.
+ * dep_pmd[]: a bitmask of dependent PMD registers
+ * dep_pmc[]: a bitmask of dependent PMC registers
+ */
+struct pfm_context;;
+typedef int (*pfm_reg_check_t)(struct task_struct *task, struct pfm_context 
*ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+typedef struct {
+       unsigned int            type;
+       int                     pm_pos;
+       unsigned long           default_value;  /* power-on default value */
+       unsigned long           reserved_mask;  /* bitmask of reserved bits */
+       pfm_reg_check_t         read_check;
+       pfm_reg_check_t         write_check;
+       unsigned long           dep_pmd[4];
+       unsigned long           dep_pmc[4];
+} pfm_reg_desc_t;
+
+
+/*
+ * This structure is initialized at boot time and contains
+ * a description of the PMU main characteristics.
+ *
+ * If the probe function is defined, detection is based
+ * on its return value: 
+ *     - 0 means recognized PMU
+ *     - anything else means not supported
+ * When the probe function is not defined, then the pmu_family field
+ * is used and it must match the host CPU family such that:
+ *     - cpu->family & config->pmu_family != 0
+ */
+typedef struct {
+       unsigned long  ovfl_val;        /* overflow value for counters */
+
+       pfm_reg_desc_t *pmc_desc;       /* detailed PMC register dependencies 
descriptions */
+       pfm_reg_desc_t *pmd_desc;       /* detailed PMD register dependencies 
descriptions */
+
+       unsigned int   num_pmcs;        /* number of PMCS: computed at init 
time */
+       unsigned int   num_pmds;        /* number of PMDS: computed at init 
time */
+       unsigned long  impl_pmcs[4];    /* bitmask of implemented PMCS */
+       unsigned long  impl_pmds[4];    /* bitmask of implemented PMDS */
+
+       char          *pmu_name;        /* PMU family name */
+       unsigned int  pmu_family;       /* cpuid family pattern used to 
identify pmu */
+       unsigned int  flags;            /* pmu specific flags */
+       unsigned int  num_ibrs;         /* number of IBRS: computed at init 
time */
+       unsigned int  num_dbrs;         /* number of DBRS: computed at init 
time */
+       unsigned int  num_counters;     /* PMC/PMD counting pairs : computed at 
init time */
+       int           (*probe)(void);   /* customized probe routine */
+       unsigned int  use_rr_dbregs:1;  /* set if debug registers used for 
range restriction */
+} pmu_config_t;
+
+extern pmu_config_t    *pmu_conf;
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_PERFMON_H */
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/completion.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/linux/completion.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/completion.h        
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/completion.h      
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,42 @@
+#ifndef __LINUX_COMPLETION_H
+#define __LINUX_COMPLETION_H
+
+/*
+ * (C) Copyright 2001 Linus Torvalds
+ *
+ * Atomic wait-for-completion handler data structures.
+ * See kernel/sched.c for details.
+ */
+
+#include <linux/wait.h>
+
+struct completion {
+       unsigned int done;
+       wait_queue_head_t wait;
+};
+
+#define COMPLETION_INITIALIZER(work) \
+       { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+
+#define DECLARE_COMPLETION(work) \
+       struct completion work = COMPLETION_INITIALIZER(work)
+
+static inline void init_completion(struct completion *x)
+{
+       x->done = 0;
+       init_waitqueue_head(&x->wait);
+}
+
+extern void FASTCALL(wait_for_completion(struct completion *));
+extern int FASTCALL(wait_for_completion_interruptible(struct completion *x));
+extern unsigned long FASTCALL(wait_for_completion_timeout(struct completion *x,
+                                                  unsigned long timeout));
+extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout(
+                       struct completion *x, unsigned long timeout));
+
+extern void FASTCALL(complete(struct completion *));
+extern void FASTCALL(complete_all(struct completion *));
+
+#define INIT_COMPLETION(x)     ((x).done = 0)
+
+#endif
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/posix_types.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/linux/posix_types.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/posix_types.h       
1970-01-01 09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/posix_types.h     
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,126 @@
+#ifndef _ASM_IA64_POSIX_TYPES_H
+#define _ASM_IA64_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ *
+ * Based on <asm-alpha/posix_types.h>.
+ *
+ * Modified 1998-2000, 2003
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>, Hewlett-Packard Co
+ */
+
+typedef unsigned long  __kernel_ino_t;
+typedef unsigned int   __kernel_mode_t;
+typedef unsigned int   __kernel_nlink_t;
+typedef long           __kernel_off_t;
+typedef long long      __kernel_loff_t;
+typedef int            __kernel_pid_t;
+typedef int            __kernel_ipc_pid_t;
+typedef unsigned int   __kernel_uid_t;
+typedef unsigned int   __kernel_gid_t;
+typedef unsigned long  __kernel_size_t;
+typedef long           __kernel_ssize_t;
+typedef long           __kernel_ptrdiff_t;
+typedef long           __kernel_time_t;
+typedef long           __kernel_suseconds_t;
+typedef long           __kernel_clock_t;
+typedef int            __kernel_timer_t;
+typedef int            __kernel_clockid_t;
+typedef int            __kernel_daddr_t;
+typedef char *         __kernel_caddr_t;
+typedef unsigned long  __kernel_sigset_t;      /* at least 32 bits */
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+
+typedef struct {
+       int     val[2];
+} __kernel_fsid_t;
+
+typedef __kernel_uid_t __kernel_old_uid_t;
+typedef __kernel_gid_t __kernel_old_gid_t;
+typedef __kernel_uid_t __kernel_uid32_t;
+typedef __kernel_gid_t __kernel_gid32_t;
+
+typedef unsigned int   __kernel_old_dev_t;
+
+# ifdef __KERNEL__
+
+#  ifndef __GNUC__
+
+#define        __FD_SET(d, set)        ((set)->fds_bits[__FDELT(d)] |= 
__FDMASK(d))
+#define        __FD_CLR(d, set)        ((set)->fds_bits[__FDELT(d)] &= 
~__FDMASK(d))
+#define        __FD_ISSET(d, set)      (((set)->fds_bits[__FDELT(d)] & 
__FDMASK(d)) != 0)
+#define        __FD_ZERO(set)  \
+  ((void) memset ((__ptr_t) (set), 0, sizeof (__kernel_fd_set)))
+
+#  else /* !__GNUC__ */
+
+/* With GNU C, use inline functions instead so args are evaluated only once: */
+
+#undef __FD_SET
+static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
+}
+
+#undef __FD_CLR
+static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
+}
+
+#undef __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long fd, const __kernel_fd_set *p)
+{ 
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *p)
+{
+       unsigned long *tmp = p->fds_bits;
+       int i;
+
+       if (__builtin_constant_p(__FDSET_LONGS)) {
+               switch (__FDSET_LONGS) {
+                     case 16:
+                       tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                       tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                       tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+                       tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+                       return;
+
+                     case 8:
+                       tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                       tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                       return;
+
+                     case 4:
+                       tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                       return;
+               }
+       }
+       i = __FDSET_LONGS;
+       while (i) {
+               i--;
+               *tmp = 0;
+               tmp++;
+       }
+}
+
+#  endif /* !__GNUC__ */
+# endif /* __KERNEL__ */
+#endif /* _ASM_IA64_POSIX_TYPES_H */
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/linux/sysctl.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/linux/sysctl.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/linux/sysctl.h    1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/linux/sysctl.h  2006-06-30 
15:25:01.000000000 +0900
@@ -0,0 +1,990 @@
+/*
+ * sysctl.h: General linux system control interface
+ *
+ * Begun 24 March 1995, Stephen Tweedie
+ *
+ ****************************************************************
+ ****************************************************************
+ **
+ **  The values in this file are exported to user space via 
+ **  the sysctl() binary interface.  However this interface
+ **  is unstable and deprecated and will be removed in the future. 
+ **  For a stable interface use /proc/sys.
+ **
+ ****************************************************************
+ ****************************************************************
+ */
+
+#ifndef _LINUX_SYSCTL_H
+#define _LINUX_SYSCTL_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct file;
+struct completion;
+
+#define CTL_MAXNAME 10         /* how many path components do we allow in a
+                                  call to sysctl?   In other words, what is
+                                  the largest acceptable value for the nlen
+                                  member of a struct __sysctl_args to have? */
+
+struct __sysctl_args {
+       int __user *name;
+       int nlen;
+       void __user *oldval;
+       size_t __user *oldlenp;
+       void __user *newval;
+       size_t newlen;
+       unsigned long __unused[4];
+};
+
+/* Define sysctl names first */
+
+/* Top-level names: */
+
+/* For internal pattern-matching use only: */
+#ifdef __KERNEL__
+#define CTL_ANY                -1      /* Matches any name */
+#define CTL_NONE       0
+#endif
+
+enum
+{
+       CTL_KERN=1,             /* General kernel info and control */
+       CTL_VM=2,               /* VM management */
+       CTL_NET=3,              /* Networking */
+       CTL_PROC=4,             /* Process info */
+       CTL_FS=5,               /* Filesystems */
+       CTL_DEBUG=6,            /* Debugging */
+       CTL_DEV=7,              /* Devices */
+       CTL_BUS=8,              /* Busses */
+       CTL_ABI=9,              /* Binary emulation */
+       CTL_CPU=10              /* CPU stuff (speed scaling, etc) */
+};
+
+/* CTL_BUS names: */
+enum
+{
+       CTL_BUS_ISA=1           /* ISA */
+};
+
+/* /proc/sys/fs/inotify/ */
+enum
+{
+       INOTIFY_MAX_USER_INSTANCES=1,   /* max instances per user */
+       INOTIFY_MAX_USER_WATCHES=2,     /* max watches per user */
+       INOTIFY_MAX_QUEUED_EVENTS=3     /* max queued events per instance */
+};
+
+/* CTL_KERN names: */
+enum
+{
+       KERN_OSTYPE=1,          /* string: system version */
+       KERN_OSRELEASE=2,       /* string: system release */
+       KERN_OSREV=3,           /* int: system revision */
+       KERN_VERSION=4,         /* string: compile time info */
+       KERN_SECUREMASK=5,      /* struct: maximum rights mask */
+       KERN_PROF=6,            /* table: profiling information */
+       KERN_NODENAME=7,
+       KERN_DOMAINNAME=8,
+
+       KERN_CAP_BSET=14,       /* int: capability bounding set */
+       KERN_PANIC=15,          /* int: panic timeout */
+       KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
+
+       KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
+       KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
+       KERN_PRINTK=23,         /* struct: control printk logging parameters */
+       KERN_NAMETRANS=24,      /* Name translation */
+       KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */
+       KERN_PPC_ZEROPAGED=26,  /* turn idle page zeroing on/off on PPC */
+       KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */
+       KERN_MODPROBE=28,
+       KERN_SG_BIG_BUFF=29,
+       KERN_ACCT=30,           /* BSD process accounting parameters */
+       KERN_PPC_L2CR=31,       /* l2cr register on PPC */
+
+       KERN_RTSIGNR=32,        /* Number of rt sigs queued */
+       KERN_RTSIGMAX=33,       /* Max queuable */
+       
+       KERN_SHMMAX=34,         /* long: Maximum shared memory segment */
+       KERN_MSGMAX=35,         /* int: Maximum size of a messege */
+       KERN_MSGMNB=36,         /* int: Maximum message queue size */
+       KERN_MSGPOOL=37,        /* int: Maximum system message pool size */
+       KERN_SYSRQ=38,          /* int: Sysreq enable */
+       KERN_MAX_THREADS=39,    /* int: Maximum nr of threads in the system */
+       KERN_RANDOM=40,         /* Random driver */
+       KERN_SHMALL=41,         /* int: Maximum size of shared memory */
+       KERN_MSGMNI=42,         /* int: msg queue identifiers */
+       KERN_SEM=43,            /* struct: sysv semaphore limits */
+       KERN_SPARC_STOP_A=44,   /* int: Sparc Stop-A enable */
+       KERN_SHMMNI=45,         /* int: shm array identifiers */
+       KERN_OVERFLOWUID=46,    /* int: overflow UID */
+       KERN_OVERFLOWGID=47,    /* int: overflow GID */
+       KERN_SHMPATH=48,        /* string: path to shm fs */
+       KERN_HOTPLUG=49,        /* string: path to uevent helper (deprecated) */
+       KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee 
instructions */
+       KERN_S390_USER_DEBUG_LOGGING=51,  /* int: dumps of user faults */
+       KERN_CORE_USES_PID=52,          /* int: use core or core.%pid */
+       KERN_TAINTED=53,        /* int: various kernel tainted flags */
+       KERN_CADPID=54,         /* int: PID of the process to notify on CAD */
+       KERN_PIDMAX=55,         /* int: PID # limit */
+       KERN_CORE_PATTERN=56,   /* string: pattern for core-file names */
+       KERN_PANIC_ON_OOPS=57,  /* int: whether we will panic on an oops */
+       KERN_HPPA_PWRSW=58,     /* int: hppa soft-power enable */
+       KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */
+       KERN_PRINTK_RATELIMIT=60, /* int: tune printk ratelimiting */
+       KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */
+       KERN_PTY=62,            /* dir: pty driver */
+       KERN_NGROUPS_MAX=63,    /* int: NGROUPS_MAX */
+       KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
+       KERN_HZ_TIMER=65,       /* int: hz timer on or off */
+       KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */
+       KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */
+       KERN_RANDOMIZE=68, /* int: randomize virtual address space */
+       KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
+       KERN_SPIN_RETRY=70,     /* int: number of spinlock retries */
+       KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI 
sleep */
+       KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
+};
+
+
+/* CTL_VM names: */
+enum
+{
+       VM_UNUSED1=1,           /* was: struct: Set vm swapping control */
+       VM_UNUSED2=2,           /* was; int: Linear or sqrt() swapout for hogs 
*/
+       VM_UNUSED3=3,           /* was: struct: Set free page thresholds */
+       VM_UNUSED4=4,           /* Spare */
+       VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */
+       VM_UNUSED5=6,           /* was: struct: Set buffer memory thresholds */
+       VM_UNUSED7=7,           /* was: struct: Set cache memory thresholds */
+       VM_UNUSED8=8,           /* was: struct: Control kswapd behaviour */
+       VM_UNUSED9=9,           /* was: struct: Set page table cache parameters 
*/
+       VM_PAGE_CLUSTER=10,     /* int: set number of pages to swap together */
+       VM_DIRTY_BACKGROUND=11, /* dirty_background_ratio */
+       VM_DIRTY_RATIO=12,      /* dirty_ratio */
+       VM_DIRTY_WB_CS=13,      /* dirty_writeback_centisecs */
+       VM_DIRTY_EXPIRE_CS=14,  /* dirty_expire_centisecs */
+       VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */
+       VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */
+       VM_PAGEBUF=17,          /* struct: Control pagebuf parameters */
+       VM_HUGETLB_PAGES=18,    /* int: Number of available Huge Pages */
+       VM_SWAPPINESS=19,       /* Tendency to steal mapped memory */
+       VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones 
*/
+       VM_MIN_FREE_KBYTES=21,  /* Minimum free kilobytes to maintain */
+       VM_MAX_MAP_COUNT=22,    /* int: Maximum number of mmaps/address-space */
+       VM_LAPTOP_MODE=23,      /* vm laptop mode */
+       VM_BLOCK_DUMP=24,       /* block dump mode */
+       VM_HUGETLB_GROUP=25,    /* permitted hugetlb group */
+       VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
+       VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space 
layout */
+       VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
+       VM_DROP_PAGECACHE=29,   /* int: nuke lots of pagecache */
+       VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each 
percpu_pagelist */
+       VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off 
node */
+       VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim 
failure */
+};
+
+
+/* CTL_NET names: */
+enum
+{
+       NET_CORE=1,
+       NET_ETHER=2,
+       NET_802=3,
+       NET_UNIX=4,
+       NET_IPV4=5,
+       NET_IPX=6,
+       NET_ATALK=7,
+       NET_NETROM=8,
+       NET_AX25=9,
+       NET_BRIDGE=10,
+       NET_ROSE=11,
+       NET_IPV6=12,
+       NET_X25=13,
+       NET_TR=14,
+       NET_DECNET=15,
+       NET_ECONET=16,
+       NET_SCTP=17,
+       NET_LLC=18,
+       NET_NETFILTER=19,
+};
+
+/* /proc/sys/kernel/random */
+enum
+{
+       RANDOM_POOLSIZE=1,
+       RANDOM_ENTROPY_COUNT=2,
+       RANDOM_READ_THRESH=3,
+       RANDOM_WRITE_THRESH=4,
+       RANDOM_BOOT_ID=5,
+       RANDOM_UUID=6
+};
+
+/* /proc/sys/kernel/pty */
+enum
+{
+       PTY_MAX=1,
+       PTY_NR=2
+};
+
+/* /proc/sys/bus/isa */
+enum
+{
+       BUS_ISA_MEM_BASE=1,
+       BUS_ISA_PORT_BASE=2,
+       BUS_ISA_PORT_SHIFT=3
+};
+
+/* /proc/sys/net/core */
+enum
+{
+       NET_CORE_WMEM_MAX=1,
+       NET_CORE_RMEM_MAX=2,
+       NET_CORE_WMEM_DEFAULT=3,
+       NET_CORE_RMEM_DEFAULT=4,
+/* was NET_CORE_DESTROY_DELAY */
+       NET_CORE_MAX_BACKLOG=6,
+       NET_CORE_FASTROUTE=7,
+       NET_CORE_MSG_COST=8,
+       NET_CORE_MSG_BURST=9,
+       NET_CORE_OPTMEM_MAX=10,
+       NET_CORE_HOT_LIST_LENGTH=11,
+       NET_CORE_DIVERT_VERSION=12,
+       NET_CORE_NO_CONG_THRESH=13,
+       NET_CORE_NO_CONG=14,
+       NET_CORE_LO_CONG=15,
+       NET_CORE_MOD_CONG=16,
+       NET_CORE_DEV_WEIGHT=17,
+       NET_CORE_SOMAXCONN=18,
+       NET_CORE_BUDGET=19,
+};
+
+/* /proc/sys/net/ethernet */
+
+/* /proc/sys/net/802 */
+
+/* /proc/sys/net/unix */
+
+enum
+{
+       NET_UNIX_DESTROY_DELAY=1,
+       NET_UNIX_DELETE_DELAY=2,
+       NET_UNIX_MAX_DGRAM_QLEN=3,
+};
+
+/* /proc/sys/net/netfilter */
+enum
+{
+       NET_NF_CONNTRACK_MAX=1,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9,
+       NET_NF_CONNTRACK_UDP_TIMEOUT=10,
+       NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11,
+       NET_NF_CONNTRACK_ICMP_TIMEOUT=12,
+       NET_NF_CONNTRACK_GENERIC_TIMEOUT=13,
+       NET_NF_CONNTRACK_BUCKETS=14,
+       NET_NF_CONNTRACK_LOG_INVALID=15,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
+       NET_NF_CONNTRACK_TCP_LOOSE=17,
+       NET_NF_CONNTRACK_TCP_BE_LIBERAL=18,
+       NET_NF_CONNTRACK_TCP_MAX_RETRANS=19,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
+       NET_NF_CONNTRACK_COUNT=27,
+       NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28,
+       NET_NF_CONNTRACK_FRAG6_TIMEOUT=29,
+       NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30,
+       NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31,
+};
+
+/* /proc/sys/net/ipv4 */
+enum
+{
+       /* v2.0 compatibile variables */
+       NET_IPV4_FORWARD=8,
+       NET_IPV4_DYNADDR=9,
+
+       NET_IPV4_CONF=16,
+       NET_IPV4_NEIGH=17,
+       NET_IPV4_ROUTE=18,
+       NET_IPV4_FIB_HASH=19,
+       NET_IPV4_NETFILTER=20,
+
+       NET_IPV4_TCP_TIMESTAMPS=33,
+       NET_IPV4_TCP_WINDOW_SCALING=34,
+       NET_IPV4_TCP_SACK=35,
+       NET_IPV4_TCP_RETRANS_COLLAPSE=36,
+       NET_IPV4_DEFAULT_TTL=37,
+       NET_IPV4_AUTOCONFIG=38,
+       NET_IPV4_NO_PMTU_DISC=39,
+       NET_IPV4_TCP_SYN_RETRIES=40,
+       NET_IPV4_IPFRAG_HIGH_THRESH=41,
+       NET_IPV4_IPFRAG_LOW_THRESH=42,
+       NET_IPV4_IPFRAG_TIME=43,
+       NET_IPV4_TCP_MAX_KA_PROBES=44,
+       NET_IPV4_TCP_KEEPALIVE_TIME=45,
+       NET_IPV4_TCP_KEEPALIVE_PROBES=46,
+       NET_IPV4_TCP_RETRIES1=47,
+       NET_IPV4_TCP_RETRIES2=48,
+       NET_IPV4_TCP_FIN_TIMEOUT=49,
+       NET_IPV4_IP_MASQ_DEBUG=50,
+       NET_TCP_SYNCOOKIES=51,
+       NET_TCP_STDURG=52,
+       NET_TCP_RFC1337=53,
+       NET_TCP_SYN_TAILDROP=54,
+       NET_TCP_MAX_SYN_BACKLOG=55,
+       NET_IPV4_LOCAL_PORT_RANGE=56,
+       NET_IPV4_ICMP_ECHO_IGNORE_ALL=57,
+       NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS=58,
+       NET_IPV4_ICMP_SOURCEQUENCH_RATE=59,
+       NET_IPV4_ICMP_DESTUNREACH_RATE=60,
+       NET_IPV4_ICMP_TIMEEXCEED_RATE=61,
+       NET_IPV4_ICMP_PARAMPROB_RATE=62,
+       NET_IPV4_ICMP_ECHOREPLY_RATE=63,
+       NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES=64,
+       NET_IPV4_IGMP_MAX_MEMBERSHIPS=65,
+       NET_TCP_TW_RECYCLE=66,
+       NET_IPV4_ALWAYS_DEFRAG=67,
+       NET_IPV4_TCP_KEEPALIVE_INTVL=68,
+       NET_IPV4_INET_PEER_THRESHOLD=69,
+       NET_IPV4_INET_PEER_MINTTL=70,
+       NET_IPV4_INET_PEER_MAXTTL=71,
+       NET_IPV4_INET_PEER_GC_MINTIME=72,
+       NET_IPV4_INET_PEER_GC_MAXTIME=73,
+       NET_TCP_ORPHAN_RETRIES=74,
+       NET_TCP_ABORT_ON_OVERFLOW=75,
+       NET_TCP_SYNACK_RETRIES=76,
+       NET_TCP_MAX_ORPHANS=77,
+       NET_TCP_MAX_TW_BUCKETS=78,
+       NET_TCP_FACK=79,
+       NET_TCP_REORDERING=80,
+       NET_TCP_ECN=81,
+       NET_TCP_DSACK=82,
+       NET_TCP_MEM=83,
+       NET_TCP_WMEM=84,
+       NET_TCP_RMEM=85,
+       NET_TCP_APP_WIN=86,
+       NET_TCP_ADV_WIN_SCALE=87,
+       NET_IPV4_NONLOCAL_BIND=88,
+       NET_IPV4_ICMP_RATELIMIT=89,
+       NET_IPV4_ICMP_RATEMASK=90,
+       NET_TCP_TW_REUSE=91,
+       NET_TCP_FRTO=92,
+       NET_TCP_LOW_LATENCY=93,
+       NET_IPV4_IPFRAG_SECRET_INTERVAL=94,
+       NET_IPV4_IGMP_MAX_MSF=96,
+       NET_TCP_NO_METRICS_SAVE=97,
+       NET_TCP_DEFAULT_WIN_SCALE=105,
+       NET_TCP_MODERATE_RCVBUF=106,
+       NET_TCP_TSO_WIN_DIVISOR=107,
+       NET_TCP_BIC_BETA=108,
+       NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
+       NET_TCP_CONG_CONTROL=110,
+       NET_TCP_ABC=111,
+       NET_IPV4_IPFRAG_MAX_DIST=112,
+};
+
+enum {
+       NET_IPV4_ROUTE_FLUSH=1,
+       NET_IPV4_ROUTE_MIN_DELAY=2,
+       NET_IPV4_ROUTE_MAX_DELAY=3,
+       NET_IPV4_ROUTE_GC_THRESH=4,
+       NET_IPV4_ROUTE_MAX_SIZE=5,
+       NET_IPV4_ROUTE_GC_MIN_INTERVAL=6,
+       NET_IPV4_ROUTE_GC_TIMEOUT=7,
+       NET_IPV4_ROUTE_GC_INTERVAL=8,
+       NET_IPV4_ROUTE_REDIRECT_LOAD=9,
+       NET_IPV4_ROUTE_REDIRECT_NUMBER=10,
+       NET_IPV4_ROUTE_REDIRECT_SILENCE=11,
+       NET_IPV4_ROUTE_ERROR_COST=12,
+       NET_IPV4_ROUTE_ERROR_BURST=13,
+       NET_IPV4_ROUTE_GC_ELASTICITY=14,
+       NET_IPV4_ROUTE_MTU_EXPIRES=15,
+       NET_IPV4_ROUTE_MIN_PMTU=16,
+       NET_IPV4_ROUTE_MIN_ADVMSS=17,
+       NET_IPV4_ROUTE_SECRET_INTERVAL=18,
+       NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS=19,
+};
+
+enum
+{
+       NET_PROTO_CONF_ALL=-2,
+       NET_PROTO_CONF_DEFAULT=-3
+
+       /* And device ifindices ... */
+};
+
+enum
+{
+       NET_IPV4_CONF_FORWARDING=1,
+       NET_IPV4_CONF_MC_FORWARDING=2,
+       NET_IPV4_CONF_PROXY_ARP=3,
+       NET_IPV4_CONF_ACCEPT_REDIRECTS=4,
+       NET_IPV4_CONF_SECURE_REDIRECTS=5,
+       NET_IPV4_CONF_SEND_REDIRECTS=6,
+       NET_IPV4_CONF_SHARED_MEDIA=7,
+       NET_IPV4_CONF_RP_FILTER=8,
+       NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE=9,
+       NET_IPV4_CONF_BOOTP_RELAY=10,
+       NET_IPV4_CONF_LOG_MARTIANS=11,
+       NET_IPV4_CONF_TAG=12,
+       NET_IPV4_CONF_ARPFILTER=13,
+       NET_IPV4_CONF_MEDIUM_ID=14,
+       NET_IPV4_CONF_NOXFRM=15,
+       NET_IPV4_CONF_NOPOLICY=16,
+       NET_IPV4_CONF_FORCE_IGMP_VERSION=17,
+       NET_IPV4_CONF_ARP_ANNOUNCE=18,
+       NET_IPV4_CONF_ARP_IGNORE=19,
+       NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
+       __NET_IPV4_CONF_MAX
+};
+
+/* /proc/sys/net/ipv4/netfilter */
+enum
+{
+       NET_IPV4_NF_CONNTRACK_MAX=1,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9,
+       NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT=10,
+       NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11,
+       NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12,
+       NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13,
+       NET_IPV4_NF_CONNTRACK_BUCKETS=14,
+       NET_IPV4_NF_CONNTRACK_LOG_INVALID=15,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
+       NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17,
+       NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18,
+       NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
+       NET_IPV4_NF_CONNTRACK_COUNT=27,
+};
+ 
+/* /proc/sys/net/ipv6 */
+enum {
+       NET_IPV6_CONF=16,
+       NET_IPV6_NEIGH=17,
+       NET_IPV6_ROUTE=18,
+       NET_IPV6_ICMP=19,
+       NET_IPV6_BINDV6ONLY=20,
+       NET_IPV6_IP6FRAG_HIGH_THRESH=21,
+       NET_IPV6_IP6FRAG_LOW_THRESH=22,
+       NET_IPV6_IP6FRAG_TIME=23,
+       NET_IPV6_IP6FRAG_SECRET_INTERVAL=24,
+       NET_IPV6_MLD_MAX_MSF=25,
+};
+
+enum {
+       NET_IPV6_ROUTE_FLUSH=1,
+       NET_IPV6_ROUTE_GC_THRESH=2,
+       NET_IPV6_ROUTE_MAX_SIZE=3,
+       NET_IPV6_ROUTE_GC_MIN_INTERVAL=4,
+       NET_IPV6_ROUTE_GC_TIMEOUT=5,
+       NET_IPV6_ROUTE_GC_INTERVAL=6,
+       NET_IPV6_ROUTE_GC_ELASTICITY=7,
+       NET_IPV6_ROUTE_MTU_EXPIRES=8,
+       NET_IPV6_ROUTE_MIN_ADVMSS=9,
+       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10
+};
+
+enum {
+       NET_IPV6_FORWARDING=1,
+       NET_IPV6_HOP_LIMIT=2,
+       NET_IPV6_MTU=3,
+       NET_IPV6_ACCEPT_RA=4,
+       NET_IPV6_ACCEPT_REDIRECTS=5,
+       NET_IPV6_AUTOCONF=6,
+       NET_IPV6_DAD_TRANSMITS=7,
+       NET_IPV6_RTR_SOLICITS=8,
+       NET_IPV6_RTR_SOLICIT_INTERVAL=9,
+       NET_IPV6_RTR_SOLICIT_DELAY=10,
+       NET_IPV6_USE_TEMPADDR=11,
+       NET_IPV6_TEMP_VALID_LFT=12,
+       NET_IPV6_TEMP_PREFERED_LFT=13,
+       NET_IPV6_REGEN_MAX_RETRY=14,
+       NET_IPV6_MAX_DESYNC_FACTOR=15,
+       NET_IPV6_MAX_ADDRESSES=16,
+       NET_IPV6_FORCE_MLD_VERSION=17,
+       __NET_IPV6_MAX
+};
+
+/* /proc/sys/net/ipv6/icmp */
+enum {
+       NET_IPV6_ICMP_RATELIMIT=1
+};
+
+/* /proc/sys/net/<protocol>/neigh/<dev> */
+enum {
+       NET_NEIGH_MCAST_SOLICIT=1,
+       NET_NEIGH_UCAST_SOLICIT=2,
+       NET_NEIGH_APP_SOLICIT=3,
+       NET_NEIGH_RETRANS_TIME=4,
+       NET_NEIGH_REACHABLE_TIME=5,
+       NET_NEIGH_DELAY_PROBE_TIME=6,
+       NET_NEIGH_GC_STALE_TIME=7,
+       NET_NEIGH_UNRES_QLEN=8,
+       NET_NEIGH_PROXY_QLEN=9,
+       NET_NEIGH_ANYCAST_DELAY=10,
+       NET_NEIGH_PROXY_DELAY=11,
+       NET_NEIGH_LOCKTIME=12,
+       NET_NEIGH_GC_INTERVAL=13,
+       NET_NEIGH_GC_THRESH1=14,
+       NET_NEIGH_GC_THRESH2=15,
+       NET_NEIGH_GC_THRESH3=16,
+       NET_NEIGH_RETRANS_TIME_MS=17,
+       NET_NEIGH_REACHABLE_TIME_MS=18,
+       __NET_NEIGH_MAX
+};
+
+/* /proc/sys/net/ipx */
+enum {
+       NET_IPX_PPROP_BROADCASTING=1,
+       NET_IPX_FORWARDING=2
+};
+
+/* /proc/sys/net/llc */
+enum {
+       NET_LLC2=1,
+       NET_LLC_STATION=2,
+};
+
+/* /proc/sys/net/llc/llc2 */
+enum {
+       NET_LLC2_TIMEOUT=1,
+};
+
+/* /proc/sys/net/llc/station */
+enum {
+       NET_LLC_STATION_ACK_TIMEOUT=1,
+};
+
+/* /proc/sys/net/llc/llc2/timeout */
+enum {
+       NET_LLC2_ACK_TIMEOUT=1,
+       NET_LLC2_P_TIMEOUT=2,
+       NET_LLC2_REJ_TIMEOUT=3,
+       NET_LLC2_BUSY_TIMEOUT=4,
+};
+
+/* /proc/sys/net/appletalk */
+enum {
+       NET_ATALK_AARP_EXPIRY_TIME=1,
+       NET_ATALK_AARP_TICK_TIME=2,
+       NET_ATALK_AARP_RETRANSMIT_LIMIT=3,
+       NET_ATALK_AARP_RESOLVE_TIME=4
+};
+
+
+/* /proc/sys/net/netrom */
+enum {
+       NET_NETROM_DEFAULT_PATH_QUALITY=1,
+       NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER=2,
+       NET_NETROM_NETWORK_TTL_INITIALISER=3,
+       NET_NETROM_TRANSPORT_TIMEOUT=4,
+       NET_NETROM_TRANSPORT_MAXIMUM_TRIES=5,
+       NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY=6,
+       NET_NETROM_TRANSPORT_BUSY_DELAY=7,
+       NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE=8,
+       NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT=9,
+       NET_NETROM_ROUTING_CONTROL=10,
+       NET_NETROM_LINK_FAILS_COUNT=11,
+       NET_NETROM_RESET=12
+};
+
+/* /proc/sys/net/ax25 */
+enum {
+       NET_AX25_IP_DEFAULT_MODE=1,
+       NET_AX25_DEFAULT_MODE=2,
+       NET_AX25_BACKOFF_TYPE=3,
+       NET_AX25_CONNECT_MODE=4,
+       NET_AX25_STANDARD_WINDOW=5,
+       NET_AX25_EXTENDED_WINDOW=6,
+       NET_AX25_T1_TIMEOUT=7,
+       NET_AX25_T2_TIMEOUT=8,
+       NET_AX25_T3_TIMEOUT=9,
+       NET_AX25_IDLE_TIMEOUT=10,
+       NET_AX25_N2=11,
+       NET_AX25_PACLEN=12,
+       NET_AX25_PROTOCOL=13,
+       NET_AX25_DAMA_SLAVE_TIMEOUT=14
+};
+
+/* /proc/sys/net/rose */
+enum {
+       NET_ROSE_RESTART_REQUEST_TIMEOUT=1,
+       NET_ROSE_CALL_REQUEST_TIMEOUT=2,
+       NET_ROSE_RESET_REQUEST_TIMEOUT=3,
+       NET_ROSE_CLEAR_REQUEST_TIMEOUT=4,
+       NET_ROSE_ACK_HOLD_BACK_TIMEOUT=5,
+       NET_ROSE_ROUTING_CONTROL=6,
+       NET_ROSE_LINK_FAIL_TIMEOUT=7,
+       NET_ROSE_MAX_VCS=8,
+       NET_ROSE_WINDOW_SIZE=9,
+       NET_ROSE_NO_ACTIVITY_TIMEOUT=10
+};
+
+/* /proc/sys/net/x25 */
+enum {
+       NET_X25_RESTART_REQUEST_TIMEOUT=1,
+       NET_X25_CALL_REQUEST_TIMEOUT=2,
+       NET_X25_RESET_REQUEST_TIMEOUT=3,
+       NET_X25_CLEAR_REQUEST_TIMEOUT=4,
+       NET_X25_ACK_HOLD_BACK_TIMEOUT=5
+};
+
+/* /proc/sys/net/token-ring */
+enum
+{
+       NET_TR_RIF_TIMEOUT=1
+};
+
+/* /proc/sys/net/decnet/ */
+enum {
+       NET_DECNET_NODE_TYPE = 1,
+       NET_DECNET_NODE_ADDRESS = 2,
+       NET_DECNET_NODE_NAME = 3,
+       NET_DECNET_DEFAULT_DEVICE = 4,
+       NET_DECNET_TIME_WAIT = 5,
+       NET_DECNET_DN_COUNT = 6,
+       NET_DECNET_DI_COUNT = 7,
+       NET_DECNET_DR_COUNT = 8,
+       NET_DECNET_DST_GC_INTERVAL = 9,
+       NET_DECNET_CONF = 10,
+       NET_DECNET_NO_FC_MAX_CWND = 11,
+       NET_DECNET_MEM = 12,
+       NET_DECNET_RMEM = 13,
+       NET_DECNET_WMEM = 14,
+       NET_DECNET_DEBUG_LEVEL = 255
+};
+
+/* /proc/sys/net/decnet/conf/<dev> */
+enum {
+       NET_DECNET_CONF_LOOPBACK = -2,
+       NET_DECNET_CONF_DDCMP = -3,
+       NET_DECNET_CONF_PPP = -4,
+       NET_DECNET_CONF_X25 = -5,
+       NET_DECNET_CONF_GRE = -6,
+       NET_DECNET_CONF_ETHER = -7
+
+       /* ... and ifindex of devices */
+};
+
+/* /proc/sys/net/decnet/conf/<dev>/ */
+enum {
+       NET_DECNET_CONF_DEV_PRIORITY = 1,
+       NET_DECNET_CONF_DEV_T1 = 2,
+       NET_DECNET_CONF_DEV_T2 = 3,
+       NET_DECNET_CONF_DEV_T3 = 4,
+       NET_DECNET_CONF_DEV_FORWARDING = 5,
+       NET_DECNET_CONF_DEV_BLKSIZE = 6,
+       NET_DECNET_CONF_DEV_STATE = 7
+};
+
+/* /proc/sys/net/sctp */
+enum {
+       NET_SCTP_RTO_INITIAL = 1,
+       NET_SCTP_RTO_MIN     = 2,
+       NET_SCTP_RTO_MAX     = 3,
+       NET_SCTP_RTO_ALPHA   = 4,
+       NET_SCTP_RTO_BETA    = 5,
+       NET_SCTP_VALID_COOKIE_LIFE       =  6,
+       NET_SCTP_ASSOCIATION_MAX_RETRANS =  7,
+       NET_SCTP_PATH_MAX_RETRANS        =  8,
+       NET_SCTP_MAX_INIT_RETRANSMITS    =  9,
+       NET_SCTP_HB_INTERVAL             = 10,
+       NET_SCTP_PRESERVE_ENABLE         = 11,
+       NET_SCTP_MAX_BURST               = 12,
+       NET_SCTP_ADDIP_ENABLE            = 13,
+       NET_SCTP_PRSCTP_ENABLE           = 14,
+       NET_SCTP_SNDBUF_POLICY           = 15,
+       NET_SCTP_SACK_TIMEOUT            = 16,
+       NET_SCTP_RCVBUF_POLICY           = 17,
+};
+
+/* /proc/sys/net/bridge */
+enum {
+       NET_BRIDGE_NF_CALL_ARPTABLES = 1,
+       NET_BRIDGE_NF_CALL_IPTABLES = 2,
+       NET_BRIDGE_NF_CALL_IP6TABLES = 3,
+       NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4,
+};
+
+/* CTL_PROC names: */
+
+/* CTL_FS names: */
+enum
+{
+       FS_NRINODE=1,   /* int:current number of allocated inodes */
+       FS_STATINODE=2,
+       FS_MAXINODE=3,  /* int:maximum number of inodes that can be allocated */
+       FS_NRDQUOT=4,   /* int:current number of allocated dquots */
+       FS_MAXDQUOT=5,  /* int:maximum number of dquots that can be allocated */
+       FS_NRFILE=6,    /* int:current number of allocated filedescriptors */
+       FS_MAXFILE=7,   /* int:maximum number of filedescriptors that can be 
allocated */
+       FS_DENTRY=8,
+       FS_NRSUPER=9,   /* int:current number of allocated super_blocks */
+       FS_MAXSUPER=10, /* int:maximum number of super_blocks that can be 
allocated */
+       FS_OVERFLOWUID=11,      /* int: overflow UID */
+       FS_OVERFLOWGID=12,      /* int: overflow GID */
+       FS_LEASES=13,   /* int: leases enabled */
+       FS_DIR_NOTIFY=14,       /* int: directory notification enabled */
+       FS_LEASE_TIME=15,       /* int: maximum time to wait for a lease break 
*/
+       FS_DQSTATS=16,  /* disc quota usage statistics and control */
+       FS_XFS=17,      /* struct: control xfs parameters */
+       FS_AIO_NR=18,   /* current system-wide number of aio requests */
+       FS_AIO_MAX_NR=19,       /* system-wide maximum number of aio requests */
+       FS_INOTIFY=20,  /* inotify submenu */
+};
+
+/* /proc/sys/fs/quota/ */
+enum {
+       FS_DQ_LOOKUPS = 1,
+       FS_DQ_DROPS = 2,
+       FS_DQ_READS = 3,
+       FS_DQ_WRITES = 4,
+       FS_DQ_CACHE_HITS = 5,
+       FS_DQ_ALLOCATED = 6,
+       FS_DQ_FREE = 7,
+       FS_DQ_SYNCS = 8,
+       FS_DQ_WARNINGS = 9,
+};
+
+/* CTL_DEBUG names: */
+
+/* CTL_DEV names: */
+enum {
+       DEV_CDROM=1,
+       DEV_HWMON=2,
+       DEV_PARPORT=3,
+       DEV_RAID=4,
+       DEV_MAC_HID=5,
+       DEV_SCSI=6,
+       DEV_IPMI=7,
+};
+
+/* /proc/sys/dev/cdrom */
+enum {
+       DEV_CDROM_INFO=1,
+       DEV_CDROM_AUTOCLOSE=2,
+       DEV_CDROM_AUTOEJECT=3,
+       DEV_CDROM_DEBUG=4,
+       DEV_CDROM_LOCK=5,
+       DEV_CDROM_CHECK_MEDIA=6
+};
+
+/* /proc/sys/dev/parport */
+enum {
+       DEV_PARPORT_DEFAULT=-3
+};
+
+/* /proc/sys/dev/raid */
+enum {
+       DEV_RAID_SPEED_LIMIT_MIN=1,
+       DEV_RAID_SPEED_LIMIT_MAX=2
+};
+
+/* /proc/sys/dev/parport/default */
+enum {
+       DEV_PARPORT_DEFAULT_TIMESLICE=1,
+       DEV_PARPORT_DEFAULT_SPINTIME=2
+};
+
+/* /proc/sys/dev/parport/parport n */
+enum {
+       DEV_PARPORT_SPINTIME=1,
+       DEV_PARPORT_BASE_ADDR=2,
+       DEV_PARPORT_IRQ=3,
+       DEV_PARPORT_DMA=4,
+       DEV_PARPORT_MODES=5,
+       DEV_PARPORT_DEVICES=6,
+       DEV_PARPORT_AUTOPROBE=16
+};
+
+/* /proc/sys/dev/parport/parport n/devices/ */
+enum {
+       DEV_PARPORT_DEVICES_ACTIVE=-3,
+};
+
+/* /proc/sys/dev/parport/parport n/devices/device n */
+enum {
+       DEV_PARPORT_DEVICE_TIMESLICE=1,
+};
+
+/* /proc/sys/dev/mac_hid */
+enum {
+       DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES=1,
+       DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES=2,
+       DEV_MAC_HID_MOUSE_BUTTON_EMULATION=3,
+       DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE=4,
+       DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE=5,
+       DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES=6
+};
+
+/* /proc/sys/dev/scsi */
+enum {
+       DEV_SCSI_LOGGING_LEVEL=1,
+};
+
+/* /proc/sys/dev/ipmi */
+enum {
+       DEV_IPMI_POWEROFF_POWERCYCLE=1,
+};
+
+/* /proc/sys/abi */
+enum
+{
+       ABI_DEFHANDLER_COFF=1,  /* default handler for coff binaries */
+       ABI_DEFHANDLER_ELF=2,   /* default handler for ELF binaries */
+       ABI_DEFHANDLER_LCALL7=3,/* default handler for procs using lcall7 */
+       ABI_DEFHANDLER_LIBCSO=4,/* default handler for an libc.so ELF interp */
+       ABI_TRACE=5,            /* tracing flags */
+       ABI_FAKE_UTSNAME=6,     /* fake target utsname information */
+};
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+
+extern void sysctl_init(void);
+
+typedef struct ctl_table ctl_table;
+
+typedef int ctl_handler (ctl_table *table, int __user *name, int nlen,
+                        void __user *oldval, size_t __user *oldlenp,
+                        void __user *newval, size_t newlen, 
+                        void **context);
+
+typedef int proc_handler (ctl_table *ctl, int write, struct file * filp,
+                         void __user *buffer, size_t *lenp, loff_t *ppos);
+
+extern int proc_dostring(ctl_table *, int, struct file *,
+                        void __user *, size_t *, loff_t *);
+extern int proc_dointvec(ctl_table *, int, struct file *,
+                        void __user *, size_t *, loff_t *);
+extern int proc_dointvec_bset(ctl_table *, int, struct file *,
+                             void __user *, size_t *, loff_t *);
+extern int proc_dointvec_minmax(ctl_table *, int, struct file *,
+                               void __user *, size_t *, loff_t *);
+extern int proc_dointvec_jiffies(ctl_table *, int, struct file *,
+                                void __user *, size_t *, loff_t *);
+extern int proc_dointvec_userhz_jiffies(ctl_table *, int, struct file *,
+                                       void __user *, size_t *, loff_t *);
+extern int proc_dointvec_ms_jiffies(ctl_table *, int, struct file *,
+                                   void __user *, size_t *, loff_t *);
+extern int proc_doulongvec_minmax(ctl_table *, int, struct file *,
+                                 void __user *, size_t *, loff_t *);
+extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
+                                     struct file *, void __user *, size_t *, 
loff_t *);
+
+extern int do_sysctl (int __user *name, int nlen,
+                     void __user *oldval, size_t __user *oldlenp,
+                     void __user *newval, size_t newlen);
+
+extern int do_sysctl_strategy (ctl_table *table, 
+                              int __user *name, int nlen,
+                              void __user *oldval, size_t __user *oldlenp,
+                              void __user *newval, size_t newlen, void ** 
context);
+
+extern ctl_handler sysctl_string;
+extern ctl_handler sysctl_intvec;
+extern ctl_handler sysctl_jiffies;
+extern ctl_handler sysctl_ms_jiffies;
+
+
+/*
+ * Register a set of sysctl names by calling register_sysctl_table
+ * with an initialised array of ctl_table's.  An entry with zero
+ * ctl_name terminates the table.  table->de will be set up by the
+ * registration and need not be initialised in advance.
+ *
+ * sysctl names can be mirrored automatically under /proc/sys.  The
+ * procname supplied controls /proc naming.
+ *
+ * The table's mode will be honoured both for sys_sysctl(2) and
+ * proc-fs access.
+ *
+ * Leaf nodes in the sysctl tree will be represented by a single file
+ * under /proc; non-leaf nodes will be represented by directories.  A
+ * null procname disables /proc mirroring at this node.
+ * 
+ * sysctl(2) can automatically manage read and write requests through
+ * the sysctl table.  The data and maxlen fields of the ctl_table
+ * struct enable minimal validation of the values being written to be
+ * performed, and the mode field allows minimal authentication.
+ * 
+ * More sophisticated management can be enabled by the provision of a
+ * strategy routine with the table entry.  This will be called before
+ * any automatic read or write of the data is performed.
+ * 
+ * The strategy routine may return:
+ * <0: Error occurred (error is passed to user process)
+ * 0:  OK - proceed with automatic read or write.
+ * >0: OK - read or write has been done by the strategy routine, so 
+ *     return immediately.
+ * 
+ * There must be a proc_handler routine for any terminal nodes
+ * mirrored under /proc/sys (non-terminals are handled by a built-in
+ * directory handler).  Several default handlers are available to
+ * cover common cases.
+ */
+
+/* A sysctl table is an array of struct ctl_table: */
+struct ctl_table 
+{
+       int ctl_name;                   /* Binary ID */
+       const char *procname;           /* Text ID for /proc/sys, or zero */
+       void *data;
+       int maxlen;
+       mode_t mode;
+       ctl_table *child;
+       proc_handler *proc_handler;     /* Callback for text formatting */
+       ctl_handler *strategy;          /* Callback function for all r/w */
+       struct proc_dir_entry *de;      /* /proc control block */
+       void *extra1;
+       void *extra2;
+};
+
+/* struct ctl_table_header is used to maintain dynamic lists of
+   ctl_table trees. */
+struct ctl_table_header
+{
+       ctl_table *ctl_table;
+       struct list_head ctl_entry;
+       int used;
+       struct completion *unregistering;
+};
+
+struct ctl_table_header * register_sysctl_table(ctl_table * table, 
+                                               int insert_at_head);
+void unregister_sysctl_table(struct ctl_table_header * table);
+
+#else /* __KERNEL__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SYSCTL_H */
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/regs.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/regs.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/regs.h    2006-06-22 
13:37:14.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/regs.h  2006-06-30 
15:25:01.000000000 +0900
@@ -1,2 +1,7 @@
 #include <asm/ptrace.h>
 #define xen_regs pt_regs
+
+#define ring_0(r)    (ia64_psr(regs)->cpl == 0)
+#define ring_1(r)    (ia64_psr(regs)->cpl == 1)
+#define ring_2(r)    (ia64_psr(regs)->cpl == 2)
+#define ring_3(r)    (ia64_psr(regs)->cpl == 3)
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/asm/perfmon.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/xen/asm/perfmon.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/asm/perfmon.h 1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/asm/perfmon.h       
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@xxxxxxxxxx>
+ */
+
+#ifndef _ASM_IA64_PERFMON_H
+#define _ASM_IA64_PERFMON_H
+
+/*
+ * perfmon comamnds supported on all CPU models
+ */
+#define PFM_WRITE_PMCS         0x01
+#define PFM_WRITE_PMDS         0x02
+#define PFM_READ_PMDS          0x03
+#define PFM_STOP               0x04
+#define PFM_START              0x05
+#define PFM_ENABLE             0x06 /* obsolete */
+#define PFM_DISABLE            0x07 /* obsolete */
+#define PFM_CREATE_CONTEXT     0x08
+#define PFM_DESTROY_CONTEXT    0x09 /* obsolete use close() */
+#define PFM_RESTART            0x0a
+#define PFM_PROTECT_CONTEXT    0x0b /* obsolete */
+#define PFM_GET_FEATURES       0x0c
+#define PFM_DEBUG              0x0d
+#define PFM_UNPROTECT_CONTEXT  0x0e /* obsolete */
+#define PFM_GET_PMC_RESET_VAL  0x0f
+#define PFM_LOAD_CONTEXT       0x10
+#define PFM_UNLOAD_CONTEXT     0x11
+#define PFM_FREE_CONTEXT       0x12
+
+/*
+ * PMU model specific commands (may not be supported on all PMU models)
+ */
+#define PFM_WRITE_IBRS         0x20
+#define PFM_WRITE_DBRS         0x21
+
+/*
+ * context flags
+ */
+#define PFM_FL_NOTIFY_BLOCK             0x01   /* block task on user level 
notifications */
+#define PFM_FL_SYSTEM_WIDE      0x02   /* create a system wide context */
+#define PFM_FL_OVFL_NO_MSG      0x80   /* do not post overflow/end messages 
for notification */
+
+/*
+ * event set flags
+ */
+#define PFM_SETFL_EXCL_IDLE      0x01   /* exclude idle task (syswide only) 
XXX: DO NOT USE YET */
+
+/*
+ * PMC flags
+ */
+#define PFM_REGFL_OVFL_NOTIFY  0x1     /* send notification on overflow */
+#define PFM_REGFL_RANDOM       0x2     /* randomize sampling interval   */
+
+/*
+ * PMD/PMC/IBR/DBR return flags (ignored on input)
+ *
+ * Those flags are used on output and must be checked in case EAGAIN is 
returned
+ * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
+ */
+#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but 
not available */
+#define PFM_REG_RETFL_EINVAL   (1UL<<30) /* set if register entry is invalid */
+#define PFM_REG_RETFL_MASK     (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
+
+#define PFM_REG_HAS_ERROR(flag)        (((flag) & PFM_REG_RETFL_MASK) != 0)
+
+typedef unsigned char pfm_uuid_t[16];  /* custom sampling buffer identifier 
type */
+
+/*
+ * Request structure used to define a context
+ */
+typedef struct {
+       pfm_uuid_t     ctx_smpl_buf_id;  /* which buffer format to use (if 
needed) */
+       unsigned long  ctx_flags;        /* noblock/block */
+       unsigned short ctx_nextra_sets;  /* number of extra event sets (you 
always get 1) */
+       unsigned short ctx_reserved1;    /* for future use */
+       int            ctx_fd;           /* return arg: unique identification 
for context */
+       void           *ctx_smpl_vaddr;  /* return arg: virtual address of 
sampling buffer, is used */
+       unsigned long  ctx_reserved2[11];/* for future use */
+} pfarg_context_t;
+
+/*
+ * Request structure used to write/read a PMC or PMD
+ */
+typedef struct {
+       unsigned int    reg_num;           /* which register */
+       unsigned short  reg_set;           /* event set for this register */
+       unsigned short  reg_reserved1;     /* for future use */
+
+       unsigned long   reg_value;         /* initial pmc/pmd value */
+       unsigned long   reg_flags;         /* input: pmc/pmd flags, return: reg 
error */
+
+       unsigned long   reg_long_reset;    /* reset after buffer overflow 
notification */
+       unsigned long   reg_short_reset;   /* reset after counter overflow */
+
+       unsigned long   reg_reset_pmds[4]; /* which other counters to reset on 
overflow */
+       unsigned long   reg_random_seed;   /* seed value when randomization is 
used */
+       unsigned long   reg_random_mask;   /* bitmask used to limit random 
value */
+       unsigned long   reg_last_reset_val;/* return: PMD last reset value */
+
+       unsigned long   reg_smpl_pmds[4];  /* which pmds are accessed when PMC 
overflows */
+       unsigned long   reg_smpl_eventid;  /* opaque sampling event identifier 
*/
+
+       unsigned long   reg_reserved2[3];   /* for future use */
+} pfarg_reg_t;
+
+typedef struct {
+       unsigned int    dbreg_num;              /* which debug register */
+       unsigned short  dbreg_set;              /* event set for this register 
*/
+       unsigned short  dbreg_reserved1;        /* for future use */
+       unsigned long   dbreg_value;            /* value for debug register */
+       unsigned long   dbreg_flags;            /* return: dbreg error */
+       unsigned long   dbreg_reserved2[1];     /* for future use */
+} pfarg_dbreg_t;
+
+typedef struct {
+       unsigned int    ft_version;     /* perfmon: major [16-31], minor [0-15] 
*/
+       unsigned int    ft_reserved;    /* reserved for future use */
+       unsigned long   reserved[4];    /* for future use */
+} pfarg_features_t;
+
+typedef struct {
+       pid_t           load_pid;          /* process to load the context into 
*/
+       unsigned short  load_set;          /* first event set to load */
+       unsigned short  load_reserved1;    /* for future use */
+       unsigned long   load_reserved2[3]; /* for future use */
+} pfarg_load_t;
+
+typedef struct {
+       int             msg_type;               /* generic message header */
+       int             msg_ctx_fd;             /* generic message header */
+       unsigned long   msg_ovfl_pmds[4];       /* which PMDs overflowed */
+       unsigned short  msg_active_set;         /* active set at the time of 
overflow */
+       unsigned short  msg_reserved1;          /* for future use */
+       unsigned int    msg_reserved2;          /* for future use */
+       unsigned long   msg_tstamp;             /* for perf tuning/debug */
+} pfm_ovfl_msg_t;
+
+typedef struct {
+       int             msg_type;               /* generic message header */
+       int             msg_ctx_fd;             /* generic message header */
+       unsigned long   msg_tstamp;             /* for perf tuning */
+} pfm_end_msg_t;
+
+typedef struct {
+       int             msg_type;               /* type of the message */
+       int             msg_ctx_fd;             /* unique identifier for the 
context */
+       unsigned long   msg_tstamp;             /* for perf tuning */
+} pfm_gen_msg_t;
+
+#define PFM_MSG_OVFL   1       /* an overflow happened */
+#define PFM_MSG_END    2       /* task to which context was attached ended */
+
+typedef union {
+       pfm_ovfl_msg_t  pfm_ovfl_msg;
+       pfm_end_msg_t   pfm_end_msg;
+       pfm_gen_msg_t   pfm_gen_msg;
+} pfm_msg_t;
+
+/*
+ * Define the version numbers for both perfmon as a whole and the sampling 
buffer format.
+ */
+#define PFM_VERSION_MAJ                 2U
+#define PFM_VERSION_MIN                 0U
+#define PFM_VERSION             
(((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
+#define PFM_VERSION_MAJOR(x)    (((x)>>16) & 0xffff)
+#define PFM_VERSION_MINOR(x)    ((x) & 0xffff)
+
+
+/*
+ * miscellaneous architected definitions
+ */
+#define PMU_FIRST_COUNTER      4       /* first counting monitor (PMC/PMD) */
+#define PMU_MAX_PMCS           256     /* maximum architected number of PMC 
registers */
+#define PMU_MAX_PMDS           256     /* maximum architected number of PMD 
registers */
+
+#ifdef __KERNEL__
+
+extern long perfmonctl(int fd, int cmd, void *arg, int narg);
+
+typedef struct {
+       void (*handler)(int irq, void *arg, struct pt_regs *regs);
+} pfm_intr_handler_desc_t;
+
+extern void pfm_save_regs (struct task_struct *);
+extern void pfm_load_regs (struct task_struct *);
+
+extern void pfm_exit_thread(struct task_struct *);
+extern int  pfm_use_debug_registers(struct task_struct *);
+extern int  pfm_release_debug_registers(struct task_struct *);
+extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long 
info, int is_ctxswin);
+extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
+extern void pfm_init_percpu(void);
+extern void pfm_handle_work(void);
+extern int  pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+extern int  pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+
+
+
+/*
+ * Reset PMD register flags
+ */
+#define PFM_PMD_SHORT_RESET    0
+#define PFM_PMD_LONG_RESET     1
+
+typedef union {
+       unsigned int val;
+       struct {
+               unsigned int notify_user:1;     /* notify user program of 
overflow */
+               unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */
+               unsigned int block_task:1;      /* block monitored task on 
kernel exit */
+               unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
+               unsigned int reserved:28;       /* for future use */
+       } bits;
+} pfm_ovfl_ctrl_t;
+
+typedef struct {
+       unsigned char   ovfl_pmd;                       /* index of overflowed 
PMD  */
+       unsigned char   ovfl_notify;                    /* =1 if monitor 
requested overflow notification */
+       unsigned short  active_set;                     /* event set active at 
the time of the overflow */
+       pfm_ovfl_ctrl_t ovfl_ctrl;                      /* return: perfmon 
controls to set by handler */
+
+       unsigned long   pmd_last_reset;                 /* last reset value of 
of the PMD */
+       unsigned long   smpl_pmds[4];                   /* bitmask of other PMD 
of interest on overflow */
+       unsigned long   smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other 
PMDs of interest */
+       unsigned long   pmd_value;                      /* current 64-bit value 
of the PMD */
+       unsigned long   pmd_eventid;                    /* eventid associated 
with PMD */
+} pfm_ovfl_arg_t;
+
+
+typedef struct {
+       char            *fmt_name;
+       pfm_uuid_t      fmt_uuid;
+       size_t          fmt_arg_size;
+       unsigned long   fmt_flags;
+
+       int             (*fmt_validate)(struct task_struct *task, unsigned int 
flags, int cpu, void *arg);
+       int             (*fmt_getsize)(struct task_struct *task, unsigned int 
flags, int cpu, void *arg, unsigned long *size);
+       int             (*fmt_init)(struct task_struct *task, void *buf, 
unsigned int flags, int cpu, void *arg);
+       int             (*fmt_handler)(struct task_struct *task, void *buf, 
pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
+       int             (*fmt_restart)(struct task_struct *task, 
pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+       int             (*fmt_restart_active)(struct task_struct *task, 
pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+       int             (*fmt_exit)(struct task_struct *task, void *buf, struct 
pt_regs *regs);
+
+       struct list_head fmt_list;
+} pfm_buffer_fmt_t;
+
+extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
+extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
+
+/*
+ * perfmon interface exported to modules
+ */
+extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int 
nreq, struct pt_regs *regs);
+extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int 
nreq, struct pt_regs *regs);
+extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned 
int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned 
int nreq, struct pt_regs *regs);
+
+/*
+ * describe the content of the local_cpu_date->pfm_syst_info field
+ */
+#define PFM_CPUINFO_SYST_WIDE  0x1     /* if set a system wide session exists 
*/
+#define PFM_CPUINFO_DCR_PP     0x2     /* if set the system wide session has 
started */
+#define PFM_CPUINFO_EXCL_IDLE  0x4     /* the system wide session excludes the 
idle task */
+
+/*
+ * sysctl control structure. visible to sampling formats
+ */
+typedef struct {
+       int     debug;          /* turn on/off debugging via syslog */
+       int     debug_ovfl;     /* turn on/off debug printk in overflow handler 
*/
+       int     fastctxsw;      /* turn on/off fast (unsecure) ctxsw */
+       int     expert_mode;    /* turn on/off value checking */
+} pfm_sysctl_t;
+extern pfm_sysctl_t pfm_sysctl;
+
+
+/*
+ * information about a PMC or PMD.
+ * dep_pmd[]: a bitmask of dependent PMD registers
+ * dep_pmc[]: a bitmask of dependent PMC registers
+ */
+struct pfm_context;;
+typedef int (*pfm_reg_check_t)(struct task_struct *task, struct pfm_context 
*ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+typedef struct {
+       unsigned int            type;
+       int                     pm_pos;
+       unsigned long           default_value;  /* power-on default value */
+       unsigned long           reserved_mask;  /* bitmask of reserved bits */
+       pfm_reg_check_t         read_check;
+       pfm_reg_check_t         write_check;
+       unsigned long           dep_pmd[4];
+       unsigned long           dep_pmc[4];
+} pfm_reg_desc_t;
+
+
+/*
+ * This structure is initialized at boot time and contains
+ * a description of the PMU main characteristics.
+ *
+ * If the probe function is defined, detection is based
+ * on its return value: 
+ *     - 0 means recognized PMU
+ *     - anything else means not supported
+ * When the probe function is not defined, then the pmu_family field
+ * is used and it must match the host CPU family such that:
+ *     - cpu->family & config->pmu_family != 0
+ */
+typedef struct {
+       unsigned long  ovfl_val;        /* overflow value for counters */
+
+       pfm_reg_desc_t *pmc_desc;       /* detailed PMC register dependencies 
descriptions */
+       pfm_reg_desc_t *pmd_desc;       /* detailed PMD register dependencies 
descriptions */
+
+       unsigned int   num_pmcs;        /* number of PMCS: computed at init 
time */
+       unsigned int   num_pmds;        /* number of PMDS: computed at init 
time */
+       unsigned long  impl_pmcs[4];    /* bitmask of implemented PMCS */
+       unsigned long  impl_pmds[4];    /* bitmask of implemented PMDS */
+
+       char          *pmu_name;        /* PMU family name */
+       unsigned int  pmu_family;       /* cpuid family pattern used to 
identify pmu */
+       unsigned int  flags;            /* pmu specific flags */
+       unsigned int  num_ibrs;         /* number of IBRS: computed at init 
time */
+       unsigned int  num_dbrs;         /* number of DBRS: computed at init 
time */
+       unsigned int  num_counters;     /* PMC/PMD counting pairs : computed at 
init time */
+       int           (*probe)(void);   /* customized probe routine */
+       unsigned int  use_rr_dbregs:1;  /* set if debug registers used for 
range restriction */
+} pmu_config_t;
+
+extern pmu_config_t    *pmu_conf;
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_PERFMON_H */
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/completion.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/xen/completion.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/completion.h  1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/completion.h        
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,42 @@
+#ifndef __LINUX_COMPLETION_H
+#define __LINUX_COMPLETION_H
+
+/*
+ * (C) Copyright 2001 Linus Torvalds
+ *
+ * Atomic wait-for-completion handler data structures.
+ * See kernel/sched.c for details.
+ */
+
+#include <linux/wait.h>
+
+struct completion {
+       unsigned int done;
+       wait_queue_head_t wait;
+};
+
+#define COMPLETION_INITIALIZER(work) \
+       { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+
+#define DECLARE_COMPLETION(work) \
+       struct completion work = COMPLETION_INITIALIZER(work)
+
+static inline void init_completion(struct completion *x)
+{
+       x->done = 0;
+       init_waitqueue_head(&x->wait);
+}
+
+extern void FASTCALL(wait_for_completion(struct completion *));
+extern int FASTCALL(wait_for_completion_interruptible(struct completion *x));
+extern unsigned long FASTCALL(wait_for_completion_timeout(struct completion *x,
+                                                  unsigned long timeout));
+extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout(
+                       struct completion *x, unsigned long timeout));
+
+extern void FASTCALL(complete(struct completion *));
+extern void FASTCALL(complete_all(struct completion *));
+
+#define INIT_COMPLETION(x)     ((x).done = 0)
+
+#endif
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/posix_types.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/xen/posix_types.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/posix_types.h 1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/posix_types.h       
2006-06-30 15:25:01.000000000 +0900
@@ -0,0 +1,126 @@
+#ifndef _ASM_IA64_POSIX_TYPES_H
+#define _ASM_IA64_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ *
+ * Based on <asm-alpha/posix_types.h>.
+ *
+ * Modified 1998-2000, 2003
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>, Hewlett-Packard Co
+ */
+
+typedef unsigned long  __kernel_ino_t;
+typedef unsigned int   __kernel_mode_t;
+typedef unsigned int   __kernel_nlink_t;
+typedef long           __kernel_off_t;
+typedef long long      __kernel_loff_t;
+typedef int            __kernel_pid_t;
+typedef int            __kernel_ipc_pid_t;
+typedef unsigned int   __kernel_uid_t;
+typedef unsigned int   __kernel_gid_t;
+typedef unsigned long  __kernel_size_t;
+typedef long           __kernel_ssize_t;
+typedef long           __kernel_ptrdiff_t;
+typedef long           __kernel_time_t;
+typedef long           __kernel_suseconds_t;
+typedef long           __kernel_clock_t;
+typedef int            __kernel_timer_t;
+typedef int            __kernel_clockid_t;
+typedef int            __kernel_daddr_t;
+typedef char *         __kernel_caddr_t;
+typedef unsigned long  __kernel_sigset_t;      /* at least 32 bits */
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+
+typedef struct {
+       int     val[2];
+} __kernel_fsid_t;
+
+typedef __kernel_uid_t __kernel_old_uid_t;
+typedef __kernel_gid_t __kernel_old_gid_t;
+typedef __kernel_uid_t __kernel_uid32_t;
+typedef __kernel_gid_t __kernel_gid32_t;
+
+typedef unsigned int   __kernel_old_dev_t;
+
+# ifdef __KERNEL__
+
+#  ifndef __GNUC__
+
+#define        __FD_SET(d, set)        ((set)->fds_bits[__FDELT(d)] |= 
__FDMASK(d))
+#define        __FD_CLR(d, set)        ((set)->fds_bits[__FDELT(d)] &= 
~__FDMASK(d))
+#define        __FD_ISSET(d, set)      (((set)->fds_bits[__FDELT(d)] & 
__FDMASK(d)) != 0)
+#define        __FD_ZERO(set)  \
+  ((void) memset ((__ptr_t) (set), 0, sizeof (__kernel_fd_set)))
+
+#  else /* !__GNUC__ */
+
+/* With GNU C, use inline functions instead so args are evaluated only once: */
+
+#undef __FD_SET
+static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
+}
+
+#undef __FD_CLR
+static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
+}
+
+#undef __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long fd, const __kernel_fd_set *p)
+{ 
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *p)
+{
+       unsigned long *tmp = p->fds_bits;
+       int i;
+
+       if (__builtin_constant_p(__FDSET_LONGS)) {
+               switch (__FDSET_LONGS) {
+                     case 16:
+                       tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                       tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                       tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+                       tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+                       return;
+
+                     case 8:
+                       tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                       tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                       return;
+
+                     case 4:
+                       tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                       return;
+               }
+       }
+       i = __FDSET_LONGS;
+       while (i) {
+               i--;
+               *tmp = 0;
+               tmp++;
+       }
+}
+
+#  endif /* !__GNUC__ */
+# endif /* __KERNEL__ */
+#endif /* _ASM_IA64_POSIX_TYPES_H */
diff -Nur xen-ia64-unstable.hg/xen/include/asm-ia64/xen/sysctl.h 
xenoprof-ia64-unstable/xen/include/asm-ia64/xen/sysctl.h
--- xen-ia64-unstable.hg/xen/include/asm-ia64/xen/sysctl.h      1970-01-01 
09:00:00.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/asm-ia64/xen/sysctl.h    2006-06-30 
15:25:01.000000000 +0900
@@ -0,0 +1,990 @@
+/*
+ * sysctl.h: General linux system control interface
+ *
+ * Begun 24 March 1995, Stephen Tweedie
+ *
+ ****************************************************************
+ ****************************************************************
+ **
+ **  The values in this file are exported to user space via 
+ **  the sysctl() binary interface.  However this interface
+ **  is unstable and deprecated and will be removed in the future. 
+ **  For a stable interface use /proc/sys.
+ **
+ ****************************************************************
+ ****************************************************************
+ */
+
+#ifndef _LINUX_SYSCTL_H
+#define _LINUX_SYSCTL_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct file;
+struct completion;
+
+#define CTL_MAXNAME 10         /* how many path components do we allow in a
+                                  call to sysctl?   In other words, what is
+                                  the largest acceptable value for the nlen
+                                  member of a struct __sysctl_args to have? */
+
+struct __sysctl_args {
+       int __user *name;
+       int nlen;
+       void __user *oldval;
+       size_t __user *oldlenp;
+       void __user *newval;
+       size_t newlen;
+       unsigned long __unused[4];
+};
+
+/* Define sysctl names first */
+
+/* Top-level names: */
+
+/* For internal pattern-matching use only: */
+#ifdef __KERNEL__
+#define CTL_ANY                -1      /* Matches any name */
+#define CTL_NONE       0
+#endif
+
+enum
+{
+       CTL_KERN=1,             /* General kernel info and control */
+       CTL_VM=2,               /* VM management */
+       CTL_NET=3,              /* Networking */
+       CTL_PROC=4,             /* Process info */
+       CTL_FS=5,               /* Filesystems */
+       CTL_DEBUG=6,            /* Debugging */
+       CTL_DEV=7,              /* Devices */
+       CTL_BUS=8,              /* Busses */
+       CTL_ABI=9,              /* Binary emulation */
+       CTL_CPU=10              /* CPU stuff (speed scaling, etc) */
+};
+
+/* CTL_BUS names: */
+enum
+{
+       CTL_BUS_ISA=1           /* ISA */
+};
+
+/* /proc/sys/fs/inotify/ */
+enum
+{
+       INOTIFY_MAX_USER_INSTANCES=1,   /* max instances per user */
+       INOTIFY_MAX_USER_WATCHES=2,     /* max watches per user */
+       INOTIFY_MAX_QUEUED_EVENTS=3     /* max queued events per instance */
+};
+
+/* CTL_KERN names: */
+enum
+{
+       KERN_OSTYPE=1,          /* string: system version */
+       KERN_OSRELEASE=2,       /* string: system release */
+       KERN_OSREV=3,           /* int: system revision */
+       KERN_VERSION=4,         /* string: compile time info */
+       KERN_SECUREMASK=5,      /* struct: maximum rights mask */
+       KERN_PROF=6,            /* table: profiling information */
+       KERN_NODENAME=7,
+       KERN_DOMAINNAME=8,
+
+       KERN_CAP_BSET=14,       /* int: capability bounding set */
+       KERN_PANIC=15,          /* int: panic timeout */
+       KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
+
+       KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
+       KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
+       KERN_PRINTK=23,         /* struct: control printk logging parameters */
+       KERN_NAMETRANS=24,      /* Name translation */
+       KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */
+       KERN_PPC_ZEROPAGED=26,  /* turn idle page zeroing on/off on PPC */
+       KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */
+       KERN_MODPROBE=28,
+       KERN_SG_BIG_BUFF=29,
+       KERN_ACCT=30,           /* BSD process accounting parameters */
+       KERN_PPC_L2CR=31,       /* l2cr register on PPC */
+
+       KERN_RTSIGNR=32,        /* Number of rt sigs queued */
+       KERN_RTSIGMAX=33,       /* Max queuable */
+       
+       KERN_SHMMAX=34,         /* long: Maximum shared memory segment */
+       KERN_MSGMAX=35,         /* int: Maximum size of a messege */
+       KERN_MSGMNB=36,         /* int: Maximum message queue size */
+       KERN_MSGPOOL=37,        /* int: Maximum system message pool size */
+       KERN_SYSRQ=38,          /* int: Sysreq enable */
+       KERN_MAX_THREADS=39,    /* int: Maximum nr of threads in the system */
+       KERN_RANDOM=40,         /* Random driver */
+       KERN_SHMALL=41,         /* int: Maximum size of shared memory */
+       KERN_MSGMNI=42,         /* int: msg queue identifiers */
+       KERN_SEM=43,            /* struct: sysv semaphore limits */
+       KERN_SPARC_STOP_A=44,   /* int: Sparc Stop-A enable */
+       KERN_SHMMNI=45,         /* int: shm array identifiers */
+       KERN_OVERFLOWUID=46,    /* int: overflow UID */
+       KERN_OVERFLOWGID=47,    /* int: overflow GID */
+       KERN_SHMPATH=48,        /* string: path to shm fs */
+       KERN_HOTPLUG=49,        /* string: path to uevent helper (deprecated) */
+       KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee 
instructions */
+       KERN_S390_USER_DEBUG_LOGGING=51,  /* int: dumps of user faults */
+       KERN_CORE_USES_PID=52,          /* int: use core or core.%pid */
+       KERN_TAINTED=53,        /* int: various kernel tainted flags */
+       KERN_CADPID=54,         /* int: PID of the process to notify on CAD */
+       KERN_PIDMAX=55,         /* int: PID # limit */
+       KERN_CORE_PATTERN=56,   /* string: pattern for core-file names */
+       KERN_PANIC_ON_OOPS=57,  /* int: whether we will panic on an oops */
+       KERN_HPPA_PWRSW=58,     /* int: hppa soft-power enable */
+       KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */
+       KERN_PRINTK_RATELIMIT=60, /* int: tune printk ratelimiting */
+       KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */
+       KERN_PTY=62,            /* dir: pty driver */
+       KERN_NGROUPS_MAX=63,    /* int: NGROUPS_MAX */
+       KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
+       KERN_HZ_TIMER=65,       /* int: hz timer on or off */
+       KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */
+       KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */
+       KERN_RANDOMIZE=68, /* int: randomize virtual address space */
+       KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
+       KERN_SPIN_RETRY=70,     /* int: number of spinlock retries */
+       KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI 
sleep */
+       KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
+};
+
+
+/* CTL_VM names: */
+enum
+{
+       VM_UNUSED1=1,           /* was: struct: Set vm swapping control */
+       VM_UNUSED2=2,           /* was; int: Linear or sqrt() swapout for hogs 
*/
+       VM_UNUSED3=3,           /* was: struct: Set free page thresholds */
+       VM_UNUSED4=4,           /* Spare */
+       VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */
+       VM_UNUSED5=6,           /* was: struct: Set buffer memory thresholds */
+       VM_UNUSED7=7,           /* was: struct: Set cache memory thresholds */
+       VM_UNUSED8=8,           /* was: struct: Control kswapd behaviour */
+       VM_UNUSED9=9,           /* was: struct: Set page table cache parameters 
*/
+       VM_PAGE_CLUSTER=10,     /* int: set number of pages to swap together */
+       VM_DIRTY_BACKGROUND=11, /* dirty_background_ratio */
+       VM_DIRTY_RATIO=12,      /* dirty_ratio */
+       VM_DIRTY_WB_CS=13,      /* dirty_writeback_centisecs */
+       VM_DIRTY_EXPIRE_CS=14,  /* dirty_expire_centisecs */
+       VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */
+       VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */
+       VM_PAGEBUF=17,          /* struct: Control pagebuf parameters */
+       VM_HUGETLB_PAGES=18,    /* int: Number of available Huge Pages */
+       VM_SWAPPINESS=19,       /* Tendency to steal mapped memory */
+       VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones 
*/
+       VM_MIN_FREE_KBYTES=21,  /* Minimum free kilobytes to maintain */
+       VM_MAX_MAP_COUNT=22,    /* int: Maximum number of mmaps/address-space */
+       VM_LAPTOP_MODE=23,      /* vm laptop mode */
+       VM_BLOCK_DUMP=24,       /* block dump mode */
+       VM_HUGETLB_GROUP=25,    /* permitted hugetlb group */
+       VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
+       VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space 
layout */
+       VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
+       VM_DROP_PAGECACHE=29,   /* int: nuke lots of pagecache */
+       VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each 
percpu_pagelist */
+       VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off 
node */
+       VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim 
failure */
+};
+
+
+/* CTL_NET names: */
+enum
+{
+       NET_CORE=1,
+       NET_ETHER=2,
+       NET_802=3,
+       NET_UNIX=4,
+       NET_IPV4=5,
+       NET_IPX=6,
+       NET_ATALK=7,
+       NET_NETROM=8,
+       NET_AX25=9,
+       NET_BRIDGE=10,
+       NET_ROSE=11,
+       NET_IPV6=12,
+       NET_X25=13,
+       NET_TR=14,
+       NET_DECNET=15,
+       NET_ECONET=16,
+       NET_SCTP=17,
+       NET_LLC=18,
+       NET_NETFILTER=19,
+};
+
+/* /proc/sys/kernel/random */
+enum
+{
+       RANDOM_POOLSIZE=1,
+       RANDOM_ENTROPY_COUNT=2,
+       RANDOM_READ_THRESH=3,
+       RANDOM_WRITE_THRESH=4,
+       RANDOM_BOOT_ID=5,
+       RANDOM_UUID=6
+};
+
+/* /proc/sys/kernel/pty */
+enum
+{
+       PTY_MAX=1,
+       PTY_NR=2
+};
+
+/* /proc/sys/bus/isa */
+enum
+{
+       BUS_ISA_MEM_BASE=1,
+       BUS_ISA_PORT_BASE=2,
+       BUS_ISA_PORT_SHIFT=3
+};
+
+/* /proc/sys/net/core */
+enum
+{
+       NET_CORE_WMEM_MAX=1,
+       NET_CORE_RMEM_MAX=2,
+       NET_CORE_WMEM_DEFAULT=3,
+       NET_CORE_RMEM_DEFAULT=4,
+/* was NET_CORE_DESTROY_DELAY */
+       NET_CORE_MAX_BACKLOG=6,
+       NET_CORE_FASTROUTE=7,
+       NET_CORE_MSG_COST=8,
+       NET_CORE_MSG_BURST=9,
+       NET_CORE_OPTMEM_MAX=10,
+       NET_CORE_HOT_LIST_LENGTH=11,
+       NET_CORE_DIVERT_VERSION=12,
+       NET_CORE_NO_CONG_THRESH=13,
+       NET_CORE_NO_CONG=14,
+       NET_CORE_LO_CONG=15,
+       NET_CORE_MOD_CONG=16,
+       NET_CORE_DEV_WEIGHT=17,
+       NET_CORE_SOMAXCONN=18,
+       NET_CORE_BUDGET=19,
+};
+
+/* /proc/sys/net/ethernet */
+
+/* /proc/sys/net/802 */
+
+/* /proc/sys/net/unix */
+
+enum
+{
+       NET_UNIX_DESTROY_DELAY=1,
+       NET_UNIX_DELETE_DELAY=2,
+       NET_UNIX_MAX_DGRAM_QLEN=3,
+};
+
+/* /proc/sys/net/netfilter */
+enum
+{
+       NET_NF_CONNTRACK_MAX=1,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9,
+       NET_NF_CONNTRACK_UDP_TIMEOUT=10,
+       NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11,
+       NET_NF_CONNTRACK_ICMP_TIMEOUT=12,
+       NET_NF_CONNTRACK_GENERIC_TIMEOUT=13,
+       NET_NF_CONNTRACK_BUCKETS=14,
+       NET_NF_CONNTRACK_LOG_INVALID=15,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
+       NET_NF_CONNTRACK_TCP_LOOSE=17,
+       NET_NF_CONNTRACK_TCP_BE_LIBERAL=18,
+       NET_NF_CONNTRACK_TCP_MAX_RETRANS=19,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
+       NET_NF_CONNTRACK_COUNT=27,
+       NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28,
+       NET_NF_CONNTRACK_FRAG6_TIMEOUT=29,
+       NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30,
+       NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31,
+};
+
+/* /proc/sys/net/ipv4 */
+enum
+{
+       /* v2.0 compatibile variables */
+       NET_IPV4_FORWARD=8,
+       NET_IPV4_DYNADDR=9,
+
+       NET_IPV4_CONF=16,
+       NET_IPV4_NEIGH=17,
+       NET_IPV4_ROUTE=18,
+       NET_IPV4_FIB_HASH=19,
+       NET_IPV4_NETFILTER=20,
+
+       NET_IPV4_TCP_TIMESTAMPS=33,
+       NET_IPV4_TCP_WINDOW_SCALING=34,
+       NET_IPV4_TCP_SACK=35,
+       NET_IPV4_TCP_RETRANS_COLLAPSE=36,
+       NET_IPV4_DEFAULT_TTL=37,
+       NET_IPV4_AUTOCONFIG=38,
+       NET_IPV4_NO_PMTU_DISC=39,
+       NET_IPV4_TCP_SYN_RETRIES=40,
+       NET_IPV4_IPFRAG_HIGH_THRESH=41,
+       NET_IPV4_IPFRAG_LOW_THRESH=42,
+       NET_IPV4_IPFRAG_TIME=43,
+       NET_IPV4_TCP_MAX_KA_PROBES=44,
+       NET_IPV4_TCP_KEEPALIVE_TIME=45,
+       NET_IPV4_TCP_KEEPALIVE_PROBES=46,
+       NET_IPV4_TCP_RETRIES1=47,
+       NET_IPV4_TCP_RETRIES2=48,
+       NET_IPV4_TCP_FIN_TIMEOUT=49,
+       NET_IPV4_IP_MASQ_DEBUG=50,
+       NET_TCP_SYNCOOKIES=51,
+       NET_TCP_STDURG=52,
+       NET_TCP_RFC1337=53,
+       NET_TCP_SYN_TAILDROP=54,
+       NET_TCP_MAX_SYN_BACKLOG=55,
+       NET_IPV4_LOCAL_PORT_RANGE=56,
+       NET_IPV4_ICMP_ECHO_IGNORE_ALL=57,
+       NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS=58,
+       NET_IPV4_ICMP_SOURCEQUENCH_RATE=59,
+       NET_IPV4_ICMP_DESTUNREACH_RATE=60,
+       NET_IPV4_ICMP_TIMEEXCEED_RATE=61,
+       NET_IPV4_ICMP_PARAMPROB_RATE=62,
+       NET_IPV4_ICMP_ECHOREPLY_RATE=63,
+       NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES=64,
+       NET_IPV4_IGMP_MAX_MEMBERSHIPS=65,
+       NET_TCP_TW_RECYCLE=66,
+       NET_IPV4_ALWAYS_DEFRAG=67,
+       NET_IPV4_TCP_KEEPALIVE_INTVL=68,
+       NET_IPV4_INET_PEER_THRESHOLD=69,
+       NET_IPV4_INET_PEER_MINTTL=70,
+       NET_IPV4_INET_PEER_MAXTTL=71,
+       NET_IPV4_INET_PEER_GC_MINTIME=72,
+       NET_IPV4_INET_PEER_GC_MAXTIME=73,
+       NET_TCP_ORPHAN_RETRIES=74,
+       NET_TCP_ABORT_ON_OVERFLOW=75,
+       NET_TCP_SYNACK_RETRIES=76,
+       NET_TCP_MAX_ORPHANS=77,
+       NET_TCP_MAX_TW_BUCKETS=78,
+       NET_TCP_FACK=79,
+       NET_TCP_REORDERING=80,
+       NET_TCP_ECN=81,
+       NET_TCP_DSACK=82,
+       NET_TCP_MEM=83,
+       NET_TCP_WMEM=84,
+       NET_TCP_RMEM=85,
+       NET_TCP_APP_WIN=86,
+       NET_TCP_ADV_WIN_SCALE=87,
+       NET_IPV4_NONLOCAL_BIND=88,
+       NET_IPV4_ICMP_RATELIMIT=89,
+       NET_IPV4_ICMP_RATEMASK=90,
+       NET_TCP_TW_REUSE=91,
+       NET_TCP_FRTO=92,
+       NET_TCP_LOW_LATENCY=93,
+       NET_IPV4_IPFRAG_SECRET_INTERVAL=94,
+       NET_IPV4_IGMP_MAX_MSF=96,
+       NET_TCP_NO_METRICS_SAVE=97,
+       NET_TCP_DEFAULT_WIN_SCALE=105,
+       NET_TCP_MODERATE_RCVBUF=106,
+       NET_TCP_TSO_WIN_DIVISOR=107,
+       NET_TCP_BIC_BETA=108,
+       NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
+       NET_TCP_CONG_CONTROL=110,
+       NET_TCP_ABC=111,
+       NET_IPV4_IPFRAG_MAX_DIST=112,
+};
+
+enum {
+       NET_IPV4_ROUTE_FLUSH=1,
+       NET_IPV4_ROUTE_MIN_DELAY=2,
+       NET_IPV4_ROUTE_MAX_DELAY=3,
+       NET_IPV4_ROUTE_GC_THRESH=4,
+       NET_IPV4_ROUTE_MAX_SIZE=5,
+       NET_IPV4_ROUTE_GC_MIN_INTERVAL=6,
+       NET_IPV4_ROUTE_GC_TIMEOUT=7,
+       NET_IPV4_ROUTE_GC_INTERVAL=8,
+       NET_IPV4_ROUTE_REDIRECT_LOAD=9,
+       NET_IPV4_ROUTE_REDIRECT_NUMBER=10,
+       NET_IPV4_ROUTE_REDIRECT_SILENCE=11,
+       NET_IPV4_ROUTE_ERROR_COST=12,
+       NET_IPV4_ROUTE_ERROR_BURST=13,
+       NET_IPV4_ROUTE_GC_ELASTICITY=14,
+       NET_IPV4_ROUTE_MTU_EXPIRES=15,
+       NET_IPV4_ROUTE_MIN_PMTU=16,
+       NET_IPV4_ROUTE_MIN_ADVMSS=17,
+       NET_IPV4_ROUTE_SECRET_INTERVAL=18,
+       NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS=19,
+};
+
+enum
+{
+       NET_PROTO_CONF_ALL=-2,
+       NET_PROTO_CONF_DEFAULT=-3
+
+       /* And device ifindices ... */
+};
+
+enum
+{
+       NET_IPV4_CONF_FORWARDING=1,
+       NET_IPV4_CONF_MC_FORWARDING=2,
+       NET_IPV4_CONF_PROXY_ARP=3,
+       NET_IPV4_CONF_ACCEPT_REDIRECTS=4,
+       NET_IPV4_CONF_SECURE_REDIRECTS=5,
+       NET_IPV4_CONF_SEND_REDIRECTS=6,
+       NET_IPV4_CONF_SHARED_MEDIA=7,
+       NET_IPV4_CONF_RP_FILTER=8,
+       NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE=9,
+       NET_IPV4_CONF_BOOTP_RELAY=10,
+       NET_IPV4_CONF_LOG_MARTIANS=11,
+       NET_IPV4_CONF_TAG=12,
+       NET_IPV4_CONF_ARPFILTER=13,
+       NET_IPV4_CONF_MEDIUM_ID=14,
+       NET_IPV4_CONF_NOXFRM=15,
+       NET_IPV4_CONF_NOPOLICY=16,
+       NET_IPV4_CONF_FORCE_IGMP_VERSION=17,
+       NET_IPV4_CONF_ARP_ANNOUNCE=18,
+       NET_IPV4_CONF_ARP_IGNORE=19,
+       NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
+       __NET_IPV4_CONF_MAX
+};
+
+/* /proc/sys/net/ipv4/netfilter */
+enum
+{
+       NET_IPV4_NF_CONNTRACK_MAX=1,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9,
+       NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT=10,
+       NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11,
+       NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12,
+       NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13,
+       NET_IPV4_NF_CONNTRACK_BUCKETS=14,
+       NET_IPV4_NF_CONNTRACK_LOG_INVALID=15,
+       NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
+       NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17,
+       NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18,
+       NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
+       NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
+       NET_IPV4_NF_CONNTRACK_COUNT=27,
+};
+ 
+/* /proc/sys/net/ipv6 */
+enum {
+       NET_IPV6_CONF=16,
+       NET_IPV6_NEIGH=17,
+       NET_IPV6_ROUTE=18,
+       NET_IPV6_ICMP=19,
+       NET_IPV6_BINDV6ONLY=20,
+       NET_IPV6_IP6FRAG_HIGH_THRESH=21,
+       NET_IPV6_IP6FRAG_LOW_THRESH=22,
+       NET_IPV6_IP6FRAG_TIME=23,
+       NET_IPV6_IP6FRAG_SECRET_INTERVAL=24,
+       NET_IPV6_MLD_MAX_MSF=25,
+};
+
+enum {
+       NET_IPV6_ROUTE_FLUSH=1,
+       NET_IPV6_ROUTE_GC_THRESH=2,
+       NET_IPV6_ROUTE_MAX_SIZE=3,
+       NET_IPV6_ROUTE_GC_MIN_INTERVAL=4,
+       NET_IPV6_ROUTE_GC_TIMEOUT=5,
+       NET_IPV6_ROUTE_GC_INTERVAL=6,
+       NET_IPV6_ROUTE_GC_ELASTICITY=7,
+       NET_IPV6_ROUTE_MTU_EXPIRES=8,
+       NET_IPV6_ROUTE_MIN_ADVMSS=9,
+       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10
+};
+
+enum {
+       NET_IPV6_FORWARDING=1,
+       NET_IPV6_HOP_LIMIT=2,
+       NET_IPV6_MTU=3,
+       NET_IPV6_ACCEPT_RA=4,
+       NET_IPV6_ACCEPT_REDIRECTS=5,
+       NET_IPV6_AUTOCONF=6,
+       NET_IPV6_DAD_TRANSMITS=7,
+       NET_IPV6_RTR_SOLICITS=8,
+       NET_IPV6_RTR_SOLICIT_INTERVAL=9,
+       NET_IPV6_RTR_SOLICIT_DELAY=10,
+       NET_IPV6_USE_TEMPADDR=11,
+       NET_IPV6_TEMP_VALID_LFT=12,
+       NET_IPV6_TEMP_PREFERED_LFT=13,
+       NET_IPV6_REGEN_MAX_RETRY=14,
+       NET_IPV6_MAX_DESYNC_FACTOR=15,
+       NET_IPV6_MAX_ADDRESSES=16,
+       NET_IPV6_FORCE_MLD_VERSION=17,
+       __NET_IPV6_MAX
+};
+
+/* /proc/sys/net/ipv6/icmp */
+enum {
+       NET_IPV6_ICMP_RATELIMIT=1
+};
+
+/* /proc/sys/net/<protocol>/neigh/<dev> */
+enum {
+       NET_NEIGH_MCAST_SOLICIT=1,
+       NET_NEIGH_UCAST_SOLICIT=2,
+       NET_NEIGH_APP_SOLICIT=3,
+       NET_NEIGH_RETRANS_TIME=4,
+       NET_NEIGH_REACHABLE_TIME=5,
+       NET_NEIGH_DELAY_PROBE_TIME=6,
+       NET_NEIGH_GC_STALE_TIME=7,
+       NET_NEIGH_UNRES_QLEN=8,
+       NET_NEIGH_PROXY_QLEN=9,
+       NET_NEIGH_ANYCAST_DELAY=10,
+       NET_NEIGH_PROXY_DELAY=11,
+       NET_NEIGH_LOCKTIME=12,
+       NET_NEIGH_GC_INTERVAL=13,
+       NET_NEIGH_GC_THRESH1=14,
+       NET_NEIGH_GC_THRESH2=15,
+       NET_NEIGH_GC_THRESH3=16,
+       NET_NEIGH_RETRANS_TIME_MS=17,
+       NET_NEIGH_REACHABLE_TIME_MS=18,
+       __NET_NEIGH_MAX
+};
+
+/* /proc/sys/net/ipx */
+enum {
+       NET_IPX_PPROP_BROADCASTING=1,
+       NET_IPX_FORWARDING=2
+};
+
+/* /proc/sys/net/llc */
+enum {
+       NET_LLC2=1,
+       NET_LLC_STATION=2,
+};
+
+/* /proc/sys/net/llc/llc2 */
+enum {
+       NET_LLC2_TIMEOUT=1,
+};
+
+/* /proc/sys/net/llc/station */
+enum {
+       NET_LLC_STATION_ACK_TIMEOUT=1,
+};
+
+/* /proc/sys/net/llc/llc2/timeout */
+enum {
+       NET_LLC2_ACK_TIMEOUT=1,
+       NET_LLC2_P_TIMEOUT=2,
+       NET_LLC2_REJ_TIMEOUT=3,
+       NET_LLC2_BUSY_TIMEOUT=4,
+};
+
+/* /proc/sys/net/appletalk */
+enum {
+       NET_ATALK_AARP_EXPIRY_TIME=1,
+       NET_ATALK_AARP_TICK_TIME=2,
+       NET_ATALK_AARP_RETRANSMIT_LIMIT=3,
+       NET_ATALK_AARP_RESOLVE_TIME=4
+};
+
+
+/* /proc/sys/net/netrom */
+enum {
+       NET_NETROM_DEFAULT_PATH_QUALITY=1,
+       NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER=2,
+       NET_NETROM_NETWORK_TTL_INITIALISER=3,
+       NET_NETROM_TRANSPORT_TIMEOUT=4,
+       NET_NETROM_TRANSPORT_MAXIMUM_TRIES=5,
+       NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY=6,
+       NET_NETROM_TRANSPORT_BUSY_DELAY=7,
+       NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE=8,
+       NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT=9,
+       NET_NETROM_ROUTING_CONTROL=10,
+       NET_NETROM_LINK_FAILS_COUNT=11,
+       NET_NETROM_RESET=12
+};
+
+/* /proc/sys/net/ax25 */
+enum {
+       NET_AX25_IP_DEFAULT_MODE=1,
+       NET_AX25_DEFAULT_MODE=2,
+       NET_AX25_BACKOFF_TYPE=3,
+       NET_AX25_CONNECT_MODE=4,
+       NET_AX25_STANDARD_WINDOW=5,
+       NET_AX25_EXTENDED_WINDOW=6,
+       NET_AX25_T1_TIMEOUT=7,
+       NET_AX25_T2_TIMEOUT=8,
+       NET_AX25_T3_TIMEOUT=9,
+       NET_AX25_IDLE_TIMEOUT=10,
+       NET_AX25_N2=11,
+       NET_AX25_PACLEN=12,
+       NET_AX25_PROTOCOL=13,
+       NET_AX25_DAMA_SLAVE_TIMEOUT=14
+};
+
+/* /proc/sys/net/rose */
+enum {
+       NET_ROSE_RESTART_REQUEST_TIMEOUT=1,
+       NET_ROSE_CALL_REQUEST_TIMEOUT=2,
+       NET_ROSE_RESET_REQUEST_TIMEOUT=3,
+       NET_ROSE_CLEAR_REQUEST_TIMEOUT=4,
+       NET_ROSE_ACK_HOLD_BACK_TIMEOUT=5,
+       NET_ROSE_ROUTING_CONTROL=6,
+       NET_ROSE_LINK_FAIL_TIMEOUT=7,
+       NET_ROSE_MAX_VCS=8,
+       NET_ROSE_WINDOW_SIZE=9,
+       NET_ROSE_NO_ACTIVITY_TIMEOUT=10
+};
+
+/* /proc/sys/net/x25 */
+enum {
+       NET_X25_RESTART_REQUEST_TIMEOUT=1,
+       NET_X25_CALL_REQUEST_TIMEOUT=2,
+       NET_X25_RESET_REQUEST_TIMEOUT=3,
+       NET_X25_CLEAR_REQUEST_TIMEOUT=4,
+       NET_X25_ACK_HOLD_BACK_TIMEOUT=5
+};
+
+/* /proc/sys/net/token-ring */
+enum
+{
+       NET_TR_RIF_TIMEOUT=1
+};
+
+/* /proc/sys/net/decnet/ */
+enum {
+       NET_DECNET_NODE_TYPE = 1,
+       NET_DECNET_NODE_ADDRESS = 2,
+       NET_DECNET_NODE_NAME = 3,
+       NET_DECNET_DEFAULT_DEVICE = 4,
+       NET_DECNET_TIME_WAIT = 5,
+       NET_DECNET_DN_COUNT = 6,
+       NET_DECNET_DI_COUNT = 7,
+       NET_DECNET_DR_COUNT = 8,
+       NET_DECNET_DST_GC_INTERVAL = 9,
+       NET_DECNET_CONF = 10,
+       NET_DECNET_NO_FC_MAX_CWND = 11,
+       NET_DECNET_MEM = 12,
+       NET_DECNET_RMEM = 13,
+       NET_DECNET_WMEM = 14,
+       NET_DECNET_DEBUG_LEVEL = 255
+};
+
+/* /proc/sys/net/decnet/conf/<dev> */
+enum {
+       NET_DECNET_CONF_LOOPBACK = -2,
+       NET_DECNET_CONF_DDCMP = -3,
+       NET_DECNET_CONF_PPP = -4,
+       NET_DECNET_CONF_X25 = -5,
+       NET_DECNET_CONF_GRE = -6,
+       NET_DECNET_CONF_ETHER = -7
+
+       /* ... and ifindex of devices */
+};
+
+/* /proc/sys/net/decnet/conf/<dev>/ */
+enum {
+       NET_DECNET_CONF_DEV_PRIORITY = 1,
+       NET_DECNET_CONF_DEV_T1 = 2,
+       NET_DECNET_CONF_DEV_T2 = 3,
+       NET_DECNET_CONF_DEV_T3 = 4,
+       NET_DECNET_CONF_DEV_FORWARDING = 5,
+       NET_DECNET_CONF_DEV_BLKSIZE = 6,
+       NET_DECNET_CONF_DEV_STATE = 7
+};
+
+/* /proc/sys/net/sctp */
+enum {
+       NET_SCTP_RTO_INITIAL = 1,
+       NET_SCTP_RTO_MIN     = 2,
+       NET_SCTP_RTO_MAX     = 3,
+       NET_SCTP_RTO_ALPHA   = 4,
+       NET_SCTP_RTO_BETA    = 5,
+       NET_SCTP_VALID_COOKIE_LIFE       =  6,
+       NET_SCTP_ASSOCIATION_MAX_RETRANS =  7,
+       NET_SCTP_PATH_MAX_RETRANS        =  8,
+       NET_SCTP_MAX_INIT_RETRANSMITS    =  9,
+       NET_SCTP_HB_INTERVAL             = 10,
+       NET_SCTP_PRESERVE_ENABLE         = 11,
+       NET_SCTP_MAX_BURST               = 12,
+       NET_SCTP_ADDIP_ENABLE            = 13,
+       NET_SCTP_PRSCTP_ENABLE           = 14,
+       NET_SCTP_SNDBUF_POLICY           = 15,
+       NET_SCTP_SACK_TIMEOUT            = 16,
+       NET_SCTP_RCVBUF_POLICY           = 17,
+};
+
+/* /proc/sys/net/bridge */
+enum {
+       NET_BRIDGE_NF_CALL_ARPTABLES = 1,
+       NET_BRIDGE_NF_CALL_IPTABLES = 2,
+       NET_BRIDGE_NF_CALL_IP6TABLES = 3,
+       NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4,
+};
+
+/* CTL_PROC names: */
+
+/* CTL_FS names: */
+enum
+{
+       FS_NRINODE=1,   /* int:current number of allocated inodes */
+       FS_STATINODE=2,
+       FS_MAXINODE=3,  /* int:maximum number of inodes that can be allocated */
+       FS_NRDQUOT=4,   /* int:current number of allocated dquots */
+       FS_MAXDQUOT=5,  /* int:maximum number of dquots that can be allocated */
+       FS_NRFILE=6,    /* int:current number of allocated filedescriptors */
+       FS_MAXFILE=7,   /* int:maximum number of filedescriptors that can be 
allocated */
+       FS_DENTRY=8,
+       FS_NRSUPER=9,   /* int:current number of allocated super_blocks */
+       FS_MAXSUPER=10, /* int:maximum number of super_blocks that can be 
allocated */
+       FS_OVERFLOWUID=11,      /* int: overflow UID */
+       FS_OVERFLOWGID=12,      /* int: overflow GID */
+       FS_LEASES=13,   /* int: leases enabled */
+       FS_DIR_NOTIFY=14,       /* int: directory notification enabled */
+       FS_LEASE_TIME=15,       /* int: maximum time to wait for a lease break 
*/
+       FS_DQSTATS=16,  /* disc quota usage statistics and control */
+       FS_XFS=17,      /* struct: control xfs parameters */
+       FS_AIO_NR=18,   /* current system-wide number of aio requests */
+       FS_AIO_MAX_NR=19,       /* system-wide maximum number of aio requests */
+       FS_INOTIFY=20,  /* inotify submenu */
+};
+
+/* /proc/sys/fs/quota/ */
+enum {
+       FS_DQ_LOOKUPS = 1,
+       FS_DQ_DROPS = 2,
+       FS_DQ_READS = 3,
+       FS_DQ_WRITES = 4,
+       FS_DQ_CACHE_HITS = 5,
+       FS_DQ_ALLOCATED = 6,
+       FS_DQ_FREE = 7,
+       FS_DQ_SYNCS = 8,
+       FS_DQ_WARNINGS = 9,
+};
+
+/* CTL_DEBUG names: */
+
+/* CTL_DEV names: */
+enum {
+       DEV_CDROM=1,
+       DEV_HWMON=2,
+       DEV_PARPORT=3,
+       DEV_RAID=4,
+       DEV_MAC_HID=5,
+       DEV_SCSI=6,
+       DEV_IPMI=7,
+};
+
+/* /proc/sys/dev/cdrom */
+enum {
+       DEV_CDROM_INFO=1,
+       DEV_CDROM_AUTOCLOSE=2,
+       DEV_CDROM_AUTOEJECT=3,
+       DEV_CDROM_DEBUG=4,
+       DEV_CDROM_LOCK=5,
+       DEV_CDROM_CHECK_MEDIA=6
+};
+
+/* /proc/sys/dev/parport */
+enum {
+       DEV_PARPORT_DEFAULT=-3
+};
+
+/* /proc/sys/dev/raid */
+enum {
+       DEV_RAID_SPEED_LIMIT_MIN=1,
+       DEV_RAID_SPEED_LIMIT_MAX=2
+};
+
+/* /proc/sys/dev/parport/default */
+enum {
+       DEV_PARPORT_DEFAULT_TIMESLICE=1,
+       DEV_PARPORT_DEFAULT_SPINTIME=2
+};
+
+/* /proc/sys/dev/parport/parport n */
+enum {
+       DEV_PARPORT_SPINTIME=1,
+       DEV_PARPORT_BASE_ADDR=2,
+       DEV_PARPORT_IRQ=3,
+       DEV_PARPORT_DMA=4,
+       DEV_PARPORT_MODES=5,
+       DEV_PARPORT_DEVICES=6,
+       DEV_PARPORT_AUTOPROBE=16
+};
+
+/* /proc/sys/dev/parport/parport n/devices/ */
+enum {
+       DEV_PARPORT_DEVICES_ACTIVE=-3,
+};
+
+/* /proc/sys/dev/parport/parport n/devices/device n */
+enum {
+       DEV_PARPORT_DEVICE_TIMESLICE=1,
+};
+
+/* /proc/sys/dev/mac_hid */
+enum {
+       DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES=1,
+       DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES=2,
+       DEV_MAC_HID_MOUSE_BUTTON_EMULATION=3,
+       DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE=4,
+       DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE=5,
+       DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES=6
+};
+
+/* /proc/sys/dev/scsi */
+enum {
+       DEV_SCSI_LOGGING_LEVEL=1,
+};
+
+/* /proc/sys/dev/ipmi */
+enum {
+       DEV_IPMI_POWEROFF_POWERCYCLE=1,
+};
+
+/* /proc/sys/abi */
+enum
+{
+       ABI_DEFHANDLER_COFF=1,  /* default handler for coff binaries */
+       ABI_DEFHANDLER_ELF=2,   /* default handler for ELF binaries */
+       ABI_DEFHANDLER_LCALL7=3,/* default handler for procs using lcall7 */
+       ABI_DEFHANDLER_LIBCSO=4,/* default handler for an libc.so ELF interp */
+       ABI_TRACE=5,            /* tracing flags */
+       ABI_FAKE_UTSNAME=6,     /* fake target utsname information */
+};
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+
+extern void sysctl_init(void);
+
+typedef struct ctl_table ctl_table;
+
+typedef int ctl_handler (ctl_table *table, int __user *name, int nlen,
+                        void __user *oldval, size_t __user *oldlenp,
+                        void __user *newval, size_t newlen, 
+                        void **context);
+
+typedef int proc_handler (ctl_table *ctl, int write, struct file * filp,
+                         void __user *buffer, size_t *lenp, loff_t *ppos);
+
+extern int proc_dostring(ctl_table *, int, struct file *,
+                        void __user *, size_t *, loff_t *);
+extern int proc_dointvec(ctl_table *, int, struct file *,
+                        void __user *, size_t *, loff_t *);
+extern int proc_dointvec_bset(ctl_table *, int, struct file *,
+                             void __user *, size_t *, loff_t *);
+extern int proc_dointvec_minmax(ctl_table *, int, struct file *,
+                               void __user *, size_t *, loff_t *);
+extern int proc_dointvec_jiffies(ctl_table *, int, struct file *,
+                                void __user *, size_t *, loff_t *);
+extern int proc_dointvec_userhz_jiffies(ctl_table *, int, struct file *,
+                                       void __user *, size_t *, loff_t *);
+extern int proc_dointvec_ms_jiffies(ctl_table *, int, struct file *,
+                                   void __user *, size_t *, loff_t *);
+extern int proc_doulongvec_minmax(ctl_table *, int, struct file *,
+                                 void __user *, size_t *, loff_t *);
+extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
+                                     struct file *, void __user *, size_t *, 
loff_t *);
+
+extern int do_sysctl (int __user *name, int nlen,
+                     void __user *oldval, size_t __user *oldlenp,
+                     void __user *newval, size_t newlen);
+
+extern int do_sysctl_strategy (ctl_table *table, 
+                              int __user *name, int nlen,
+                              void __user *oldval, size_t __user *oldlenp,
+                              void __user *newval, size_t newlen, void ** 
context);
+
+extern ctl_handler sysctl_string;
+extern ctl_handler sysctl_intvec;
+extern ctl_handler sysctl_jiffies;
+extern ctl_handler sysctl_ms_jiffies;
+
+
+/*
+ * Register a set of sysctl names by calling register_sysctl_table
+ * with an initialised array of ctl_table's.  An entry with zero
+ * ctl_name terminates the table.  table->de will be set up by the
+ * registration and need not be initialised in advance.
+ *
+ * sysctl names can be mirrored automatically under /proc/sys.  The
+ * procname supplied controls /proc naming.
+ *
+ * The table's mode will be honoured both for sys_sysctl(2) and
+ * proc-fs access.
+ *
+ * Leaf nodes in the sysctl tree will be represented by a single file
+ * under /proc; non-leaf nodes will be represented by directories.  A
+ * null procname disables /proc mirroring at this node.
+ * 
+ * sysctl(2) can automatically manage read and write requests through
+ * the sysctl table.  The data and maxlen fields of the ctl_table
+ * struct enable minimal validation of the values being written to be
+ * performed, and the mode field allows minimal authentication.
+ * 
+ * More sophisticated management can be enabled by the provision of a
+ * strategy routine with the table entry.  This will be called before
+ * any automatic read or write of the data is performed.
+ * 
+ * The strategy routine may return:
+ * <0: Error occurred (error is passed to user process)
+ * 0:  OK - proceed with automatic read or write.
+ * >0: OK - read or write has been done by the strategy routine, so 
+ *     return immediately.
+ * 
+ * There must be a proc_handler routine for any terminal nodes
+ * mirrored under /proc/sys (non-terminals are handled by a built-in
+ * directory handler).  Several default handlers are available to
+ * cover common cases.
+ */
+
+/* A sysctl table is an array of struct ctl_table: */
+struct ctl_table 
+{
+       int ctl_name;                   /* Binary ID */
+       const char *procname;           /* Text ID for /proc/sys, or zero */
+       void *data;
+       int maxlen;
+       mode_t mode;
+       ctl_table *child;
+       proc_handler *proc_handler;     /* Callback for text formatting */
+       ctl_handler *strategy;          /* Callback function for all r/w */
+       struct proc_dir_entry *de;      /* /proc control block */
+       void *extra1;
+       void *extra2;
+};
+
+/* struct ctl_table_header is used to maintain dynamic lists of
+   ctl_table trees. */
+struct ctl_table_header
+{
+       ctl_table *ctl_table;
+       struct list_head ctl_entry;
+       int used;
+       struct completion *unregistering;
+};
+
+struct ctl_table_header * register_sysctl_table(ctl_table * table, 
+                                               int insert_at_head);
+void unregister_sysctl_table(struct ctl_table_header * table);
+
+#else /* __KERNEL__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SYSCTL_H */
diff -Nur xen-ia64-unstable.hg/xen/include/public/xen.h 
xenoprof-ia64-unstable/xen/include/public/xen.h
--- xen-ia64-unstable.hg/xen/include/public/xen.h       2006-06-22 
13:37:14.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/public/xen.h     2006-06-30 
15:25:01.000000000 +0900
@@ -4,6 +4,8 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by KAZ(kaz@xxxxxxxxxxxxxx)
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -64,6 +66,7 @@
 #define __HYPERVISOR_xenoprof_op          31
 #define __HYPERVISOR_event_channel_op     32
 #define __HYPERVISOR_physdev_op           33
+#define __HYPERVISOR_perfmon_op           34
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff -Nur xen-ia64-unstable.hg/xen/include/xen/hypercall.h 
xenoprof-ia64-unstable/xen/include/xen/hypercall.h
--- xen-ia64-unstable.hg/xen/include/xen/hypercall.h    2006-06-22 
13:37:14.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/xen/hypercall.h  2006-06-30 
15:25:01.000000000 +0900
@@ -87,4 +87,17 @@
     unsigned int cmd,
     XEN_GUEST_HANDLE(void) arg);
 
+extern long
+do_perfmon_op(
+    unsigned int cmd,
+    XEN_GUEST_HANDLE(void) arg1,
+    XEN_GUEST_HANDLE(void) arg2,
+    unsigned int arg3);
+
+extern long
+do_xenoprof_op(
+    unsigned int cmd,
+    unsigned int arg1,
+    XEN_GUEST_HANDLE(void) arg2);
+
 #endif /* __XEN_HYPERCALL_H__ */
diff -Nur xen-ia64-unstable.hg/xen/include/xen/perfc_defn.h 
xenoprof-ia64-unstable/xen/include/xen/perfc_defn.h
--- xen-ia64-unstable.hg/xen/include/xen/perfc_defn.h   2006-06-22 
13:37:14.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/xen/perfc_defn.h 2006-06-30 
15:25:01.000000000 +0900
@@ -41,6 +41,10 @@
 PERFCOUNTER_CPU(timer_max,           "timer max error (ns)")
 PERFCOUNTER_CPU(sched_irq,              "sched: timer")
 PERFCOUNTER_CPU(sched_run,              "sched: runs through scheduler")
+PERFCOUNTER_CPU(sched_xen,              "sched: runs hyper through scheduler")
+PERFCOUNTER_CPU(sched_block,              "sched: runs do_block")
+PERFCOUNTER_CPU(sched_poll,              "sched: runs do_poll")
+PERFCOUNTER_CPU(sched_yield,              "sched: runs do_yield")
 PERFCOUNTER_CPU(sched_ctx,              "sched: context switches")
 
 PERFCOUNTER_CPU(domain_page_tlb_flush,  "domain page tlb flushes")
diff -Nur xen-ia64-unstable.hg/xen/include/xen/smp.h 
xenoprof-ia64-unstable/xen/include/xen/smp.h
--- xen-ia64-unstable.hg/xen/include/xen/smp.h  2006-06-22 13:37:14.000000000 
+0900
+++ xenoprof-ia64-unstable/xen/include/xen/smp.h        2006-06-30 
15:25:01.000000000 +0900
@@ -111,4 +111,6 @@
 
 #define smp_processor_id() raw_smp_processor_id()
 
+#define put_cpu_no_resched()   preempt_enable_no_resched()
+
 #endif
diff -Nur xen-ia64-unstable.hg/xen/include/xen/types.h 
xenoprof-ia64-unstable/xen/include/xen/types.h
--- xen-ia64-unstable.hg/xen/include/xen/types.h        2006-06-22 
13:37:14.000000000 +0900
+++ xenoprof-ia64-unstable/xen/include/xen/types.h      2006-06-30 
15:25:01.000000000 +0900
@@ -20,6 +20,22 @@
 #define LONG_MIN        (-LONG_MAX - 1)
 #define ULONG_MAX       (~0UL)
 
+typedef unsigned int   __kernel_mode_t;
+
+/* typedef __kernel_fd_set             fd_set; */
+/* typedef __kernel_dev_t              dev_t; */
+/* typedef __kernel_ino_t              ino_t; */
+typedef __kernel_mode_t                mode_t;
+/* typedef __kernel_nlink_t    nlink_t; */
+/* typedef __kernel_off_t              off_t; */
+/* typedef __kernel_pid_t              pid_t; */
+/* typedef __kernel_daddr_t    daddr_t; */
+/* typedef __kernel_key_t              key_t; */
+/* typedef __kernel_suseconds_t        suseconds_t; */
+/* typedef __kernel_timer_t    timer_t; */
+/* typedef __kernel_clockid_t  clockid_t; */
+/* typedef __kernel_mqd_t              mqd_t; */
+
 /* bsd */
 typedef unsigned char           u_char;
 typedef unsigned short          u_short;
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
<Prev in Thread] Current Thread [Next in Thread>