diff -r 52f536cda4ba tools/include/Makefile --- a/tools/include/Makefile Fri Jul 16 17:36:44 2010 -0700 +++ b/tools/include/Makefile Wed Aug 25 19:02:24 2010 -0700 @@ -13,7 +13,7 @@ xen/.dir: mkdir -p xen/libelf ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen - ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen + ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm perf) xen ln -sf ../xen-sys/$(XEN_OS) xen/sys ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/xen/,libelf.h elfstructs.h) xen/libelf/ ln -s ../xen-foreign xen/foreign @@ -30,6 +30,7 @@ install: all $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/io $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/sys $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/xsm + $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/perf $(INSTALL_DATA) xen/COPYING $(DESTDIR)$(INCLUDEDIR)/xen $(INSTALL_DATA) xen/*.h $(DESTDIR)$(INCLUDEDIR)/xen $(INSTALL_DATA) xen/arch-ia64/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-ia64 @@ -41,6 +42,7 @@ install: all $(INSTALL_DATA) xen/io/*.h $(DESTDIR)$(INCLUDEDIR)/xen/io $(INSTALL_DATA) xen/sys/*.h $(DESTDIR)$(INCLUDEDIR)/xen/sys $(INSTALL_DATA) xen/xsm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/xsm + $(INSTALL_DATA) xen/perf/*.h $(DESTDIR)$(INCLUDEDIR)/xen/perf .PHONY: clean clean: diff -r 52f536cda4ba tools/perf/xmstat/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/perf/xmstat/Makefile Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,52 @@ +# Copyright (C) HP Labs, Palo Alto and Fort Collins, 2005 +# Author: Diwaker Gupta +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; under version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +XEN_ROOT=../../.. +include $(XEN_ROOT)/tools/Rules.mk + +CFLAGS += -Werror +CFLAGS += -I $(XEN_XC) +CFLAGS += $(CFLAGS_libxenctrl) +LDFLAGS += $(LDFLAGS_libxenctrl) + +CFLAGS-$(debug) := $(filter-out -O% -DNDEBUG -fomit-frame-pointer, $(CFLAGS)) +CFLAGS-$(debug) += -O0 + + +BIN = xmstat + +.PHONY: all +all: build + +.PHONY: build +build: $(BIN) + +.PHONY: install +install: build + $(INSTALL_DIR) $(DESTDIR)$(SBINDIR) + $(INSTALL_PROG) xenbaked $(DESTDIR)$(SBINDIR)/xenbaked + $(INSTALL_PROG) xentrace_setmask $(DESTDIR)$(SBINDIR)/xentrace_setmask + $(INSTALL_PROG) xenmon.py $(DESTDIR)$(SBINDIR)/xenmon.py + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR) + $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.xenmon + +.PHONY: clean +clean: + rm -f $(BIN) $(DEPS) + + +%: %.c Makefile + $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ +xmstat%: %.c Makefile + $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ + +-include $(DEPS) diff -r 52f536cda4ba tools/perf/xmstat/xmstat.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/perf/xmstat/xmstat.c Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,401 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern int lock_pages(void *addr, size_t len); +extern int unlock_pages(void *addr, size_t len); +static void xmtrc(const char *fn, const char *fmt, ...); + +#define MAX_LINE_GUESTS 256 /* max guest we'll print in a line */ +#define XEN_HYP_ID (0x7FFFU) +#define INVALID_DOMID (0xFFFFFFFFU) + +#define XMTRC(...) \ + do {(xmtrc_on) ? (xmtrc(__FUNCTION__,__VA_ARGS__)):0;} while (0) + +struct guest_usg { + uint32_t domid; + uint32_t vcpuid; + int count; + int cpu_pct; +}; + +static int old_idxs[XMST_MAX_CPUS]; +static int cpu_interval_ms; /* interval cpu is taking samples */ +static int our_interval_ms; /* interval we are collecting and printing */ +static int tot_samples; /* tot samples cpu collects between our printing*/ +static int _dom0_fd; /* fd of /dev/privcmd */ +static int xmtrc_on=0; /* debug prints */ +static int quit_xmstat; /* set if signal to quit */ + + +/* go thru the guest_usg[] and calculate guest usage pct for vcpu/domain + * and also for hypervisor. + * Returns: cpu usage + */ +static int calc_usg(struct guest_usg g_usga[]) +{ + int i, tot_cpu_usg=0; + + for (i=0; g_usga[i].domid != INVALID_DOMID; i++) { + g_usga[i].cpu_pct = ((g_usga[i].count+1)*100) / tot_samples; + XMTRC("%d: dom:%d vcpu:%d count:%d pct:%d\n", i, g_usga[i].domid, + g_usga[i].vcpuid, g_usga[i].count, g_usga[i].cpu_pct); + + tot_cpu_usg += g_usga[i].cpu_pct; + } + XMTRC("tot_cpu_usg:%d\n", tot_cpu_usg); + return tot_cpu_usg; +} + + +/* + * collect guest samples in g_usga[]. start with oldidx and collect samples + * till endidx. terminate guest_usg[] with INVALID_DOMID + */ +static void guest_samples(int endidx, int oldidx, struct guest_usg g_usga[], + struct xmst_pcpu_data cpu_dataa[]) +{ + int i, gi=0, idx = oldidx; + + for (; idx != endidx; idx = (idx+1) & XMST_MAX_IDX) { + + if (cpu_dataa[idx].xmst_domid == INVALID_DOMID) + continue; + + for (i=0; i < gi; i++) { + if (g_usga[i].domid == cpu_dataa[idx].xmst_domid && + g_usga[i].vcpuid == cpu_dataa[idx].xmst_vcpu) + { + g_usga[i].count++; + break; + } + } + if (i < gi) + continue; + + g_usga[gi].domid = cpu_dataa[idx].xmst_domid; + g_usga[gi].count = 0; + g_usga[gi++].vcpuid = cpu_dataa[idx].xmst_vcpu; + } + + g_usga[gi].domid = INVALID_DOMID; +} + +/* + * Following is what and how we print: + * + * cpu usg intrps pflts hypercls evntc hyp Guests ------------------> + * 6 66 216 765 82 435 10 10:040:20 1:019:30 3:021:05 + * 16 77 124 105 29 019 11 9:003:03 8:017:40 4:023:05 + */ +static void print_header(void) +{ + printf( + "cpu usg intrps pflts hypercls evntc hyp Guests ------------------>\n"); +} + +/* g_usga is sorted by domid:vcpuid, thus xen entry is last if it's there */ +static void print_one_line(int cpu, int cpu_usg, int num_ele, int ctrs[], + struct guest_usg g_usga[]) +{ + int i, hyp_idx = -1; + + if (num_ele && g_usga[num_ele-1].domid == XEN_HYP_ID) + hyp_idx = --num_ele; + + printf("%3d %3d %6d %5d %8d %5d %3d", cpu, cpu_usg, ctrs[0], ctrs[1], + ctrs[2], ctrs[3], hyp_idx == -1 ? 0 : g_usga[hyp_idx].cpu_pct); + + for (i=0; i < num_ele; i++) { + if (g_usga[i].cpu_pct == 0) + continue; + + printf(" %3d:%03d:%2d", g_usga[i].domid, g_usga[i].vcpuid, + g_usga[i].cpu_pct); + } + printf("\n"); +} + + +/* sort the array by domid:vcpuid, this results in xen entry being last */ +static int comp_guest_for_sort(const void *p1, const void *p2) +{ + struct guest_usg *gp1 = (struct guest_usg*)p1, *gp2 = (struct guest_usg*)p2; + + if (gp1->domid < gp2->domid) + return -1; + else if (gp1->domid > gp2->domid) + return 1; + else if (gp1->vcpuid < gp2->vcpuid) + return -1; + else if (gp1->vcpuid > gp2->vcpuid) + return 1; + else if (gp1->cpu_pct < gp2->cpu_pct) + return -1; + else if (gp1->cpu_pct > gp2->cpu_pct) + return 1; + else + return 0; +} + +/* sort the array and return number of elements in the array */ +static int sort_guest_usga(struct guest_usg g_usga[]) +{ + int i; + + for (i=0; g_usga[i].domid != INVALID_DOMID; i++); + if (i < 2) + return i; + + qsort(g_usga, i, sizeof(struct guest_usg), comp_guest_for_sort); + return i; +} + +static void print_continously(void *pg0p, int numcpus) +{ + int i, cpuid, cpu_usg, num_ele, idx, oldidx, size, ctrs[16]; + struct guest_usg guest_usga[MAX_LINE_GUESTS]; + struct xmst_meta *metap = pg0p; + + /* bzero(guest_usga, sizeof(guest_usga)); */ + size = sizeof(struct xmst_pcpu_meta) - + offsetof(struct xmst_pcpu_meta, xmst_pfaults); + XMTRC("size:%d\n", size); + + for (i=0; i < numcpus; i++) { + struct xmst_pcpu_meta *cpu_metap = &metap->xmst_pcpu_meta[i]; + struct xmst_pcpu_data *cpu_datap = pg0p + XMST_PAGE_SIZE*(i+1); + + memcpy(ctrs, &cpu_metap->xmst_pfaults, size); + memset(&cpu_metap->xmst_pfaults, 0, size); + + oldidx = old_idxs[i]; /* last time where we stopped */ + idx = cpu_metap->xmst_index & XMST_MAX_IDX; + idx = idx == 0 ? XMST_MAX_IDX : idx-1; /* not that it matters */ + old_idxs[i] = idx; /* next time start from here */ + XMTRC("cpu:%d samples strt:%d end:%d\n", i, oldidx, idx); + + guest_samples(idx, oldidx, guest_usga, cpu_datap); + cpu_usg = calc_usg(guest_usga); + num_ele = sort_guest_usga(guest_usga); + + cpuid = cpu_metap->xmst_cpuid; + print_one_line(cpuid, cpu_usg, num_ele, ctrs, guest_usga); + } + printf("\n"); +} + +static void +xm_setpriority(void) +{ + int rc; + if ((rc=setpriority(PRIO_PROCESS, 0, -15))) + printf("Could not set priority to -15. rc:%d errno:%d\n", rc, errno); +} + +static int do_hypercall(int xmst_cmd, struct xen_perfop *perfop_p) +{ + int rc; + privcmd_hypercall_t hypercall; + + perfop_p->cmd = XEN_PERFOP_XMSTAT; + + hypercall.op = (ulong)__HYPERVISOR_perf_op; + hypercall.arg[0] = (ulong)perfop_p; + + perfop_p->cmd = XEN_PERFOP_XMSTAT; + perfop_p->interface_version = XENPERF_INTERFACE_VERSION; + perfop_p->u.perf_xmstat.xmst_cmd = xmst_cmd; + + if (lock_pages(perfop_p, sizeof(struct xen_perfop)) != 0) { + printf("Could not lock mem for hypercall. errno:%d\n", errno); + return errno; + } + rc = ioctl(_dom0_fd, IOCTL_PRIVCMD_HYPERCALL, (ulong)&hypercall); + + unlock_pages(perfop_p, sizeof(struct xen_perfop)); + + XMTRC("hcall ret: rc:%d errno:%d perfop_p:%p\n", rc, errno, perfop_p); + + return rc; +} + + +static void * map_pages(struct xmst_init *initp) +{ + #define ERRA_SZ 1024 + void *vaddr; + int i, rc=0, numpgs = initp->xmst_num_mfns; + int erra[ERRA_SZ] = {0}; + + if (ERRA_SZ < numpgs) { + printf("Error: Please increase erra size. numpgs:%d, erra_sz:%d\n", + ERRA_SZ, numpgs); + return NULL; + } + + XMTRC("num pages returned:%d. mfns are:\n", numpgs); + for (i=0; i < numpgs; i++) { + XMTRC(" 0x%lx", initp->xmst_mfna[i]); + } + + /* TBD: pass pfna instead of mfna. 32bit dom0 is 32bit array elements */ + + vaddr = xc_map_foreign_bulk(_dom0_fd, DOMID_XEN, PROT_WRITE, + initp->xmst_mfna, erra, numpgs); + if (vaddr == NULL) { + printf("Failed to map %d pages. errno:%d\n", numpgs, errno); + return NULL; + } + + for (i=0; i < numpgs; i++) { + if (erra[i]) { + printf("Pagenum %d mfn:0x%lx failed to map. err:%d\n", + i, initp->xmst_mfna[i], erra[i]); + rc = 1; + } + } + return (rc == 0 ? vaddr : NULL); +} + + +static void sig_handler(int signal) +{ + quit_xmstat = 1; + printf("Cleaning up.... please wait..\n"); +} + +static void set_signal_handler(void) +{ + struct sigaction sigact; + + sigact.sa_handler = sig_handler; + sigact.sa_flags = 0; + sigemptyset(&sigact.sa_mask); + sigaction(SIGHUP, &sigact, NULL); + sigaction(SIGTERM, &sigact, NULL); + sigaction(SIGINT, &sigact, NULL); + sigaction(SIGALRM, &sigact, NULL); +} + +static void usage_exit(char *argv0) +{ + printf("Usage: %s [clean][-d]\n", argv0); + exit(1); +} + +int main(int argc, char **argv) +{ + struct xen_perfop perfop; + int rc; + unsigned int i, num_cpus; + void *vaddr; + + if ( strcmp(argv[argc-1], "-d") == 0) { + xmtrc_on = 1; + argc--; + } + if ( strcmp(argv[argc-1], "clean") == 0) { + sig_handler(0); + return 0; + } + if ((strcmp(argv[argc-1], "-h") == 0) || + (strcmp(argv[argc-1], "-?") == 0) || + argc != 1) + { + usage_exit(argv[0]); + } + + xm_setpriority(); + + if ( (_dom0_fd=xc_interface_open()) == -1) { + printf("failed to open privcmd. errno:%d\n", errno); + return errno; + } + + set_signal_handler(); + + if ((rc = do_hypercall(XMSTAT_INIT, &perfop))) { + close(_dom0_fd); + return rc; + } + if ((vaddr = map_pages(&perfop.u.perf_xmstat.u.xmst_init)) == NULL) { + do_hypercall(XMSTAT_DEINIT, &perfop); + close(_dom0_fd); + return rc; + } + + cpu_interval_ms = 2; + our_interval_ms = 1000; + tot_samples = our_interval_ms / cpu_interval_ms; + XMTRC("sample interval: %d. Samples per sec:%d\n", cpu_interval_ms, + our_interval_ms); + sleep(1); + + num_cpus = perfop.u.perf_xmstat.u.xmst_init.xmst_num_cpus; + + for (i=0; !quit_xmstat; i++) { + if (i % 10 == 0) + print_header(); + print_continously(vaddr, num_cpus); + fflush(NULL); + usleep(900*1000); /* 900 msecs */ + } + + if ((rc = do_hypercall(XMSTAT_DEINIT, &perfop))) + printf("Failed to do cleanup. rc:%d\n", rc); + + close(_dom0_fd); + return 0; +} + +/* print trace info with function name pre-pended */ +void +xmtrc(const char *fn, const char *fmt, ...) +{ + char buf[2048]; + va_list args; + + fprintf(stderr, "%s:", fn); + va_start(args, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + fprintf(stderr, "%s", buf); + fflush (stderr); +} + +/* print error msg with function name pre-pended */ +void +xmprt(const char *fn, const char *fmt, ...) +{ + char buf[2048]; + va_list args; + + fprintf(stderr, "ERROR:%s:", fn); + va_start(args, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + fprintf (stderr, "%s", buf); + fflush (stderr); +} + diff -r 52f536cda4ba xen/Rules.mk --- a/xen/Rules.mk Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/Rules.mk Wed Aug 25 19:02:24 2010 -0700 @@ -10,6 +10,7 @@ lock_profile ?= n crash_debug ?= n frame_pointer ?= n kdb ?= n +xmstat ?= y XEN_ROOT=$(BASEDIR)/.. include $(XEN_ROOT)/Config.mk @@ -55,6 +56,7 @@ CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS CFLAGS-$(lock_profile) += -DLOCK_PROFILE CFLAGS-$(frame_pointer) += -fno-omit-frame-pointer -DCONFIG_FRAME_POINTER CFLAGS-$(kdb) += -DXEN_KDB_CONFIG +CFLAGS-$(xmstat) += -DXEN_PERF_XMSTAT ifneq ($(max_phys_cpus),) CFLAGS-y += -DMAX_PHYS_CPUS=$(max_phys_cpus) diff -r 52f536cda4ba xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/arch/x86/irq.c Wed Aug 25 19:02:24 2010 -0700 @@ -23,6 +23,9 @@ #include #include #include +#ifdef XEN_PERF_XMSTAT +#include +#endif /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ int __read_mostly opt_noirqbalance = 0; @@ -527,7 +530,12 @@ asmlinkage void do_IRQ(struct cpu_user_r unsigned int vector = regs->entry_vector; int irq = __get_cpu_var(vector_irq[vector]); struct cpu_user_regs *old_regs = set_irq_regs(regs); - + +#ifdef XEN_PERF_XMSTAT + if (xperf_active) + (this_cpu(xmst_pcpu_mp))->xmst_interrupts++; +#endif + perfc_incr(irqs); this_cpu(irq_count)++; diff -r 52f536cda4ba xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/arch/x86/traps.c Wed Aug 25 19:02:24 2010 -0700 @@ -67,6 +67,9 @@ #include #include +#ifdef XEN_PERF_XMSTAT +#include +#endif /* * opt_nmi: one of 'ignore', 'dom0', or 'fatal'. * fatal: Xen prints diagnostic message and then hangs. @@ -1281,6 +1284,10 @@ asmlinkage void do_page_fault(struct cpu unsigned long addr, fixup; unsigned int error_code; +#ifdef XEN_PERF_XMSTAT + if (xperf_active) + (this_cpu(xmst_pcpu_mp))->xmst_pfaults++; +#endif addr = read_cr2(); /* fixup_page_fault() might change regs->error_code, so cache it here. */ @@ -3068,16 +3075,16 @@ asmlinkage void do_nmi(struct cpu_user_r unsigned int cpu = smp_processor_id(); unsigned char reason; + ++nmi_count(cpu); + + if ( nmi_callback(regs, cpu) ) + return; + #ifdef XEN_KDB_CONFIG if (kdb_enabled && kdb_handle_trap_entry(TRAP_nmi, regs)) return; #endif - ++nmi_count(cpu); - - if ( nmi_callback(regs, cpu) ) - return; - if ( nmi_watchdog ) nmi_watchdog_tick(regs); diff -r 52f536cda4ba xen/arch/x86/x86_64/Makefile --- a/xen/arch/x86/x86_64/Makefile Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/arch/x86/x86_64/Makefile Wed Aug 25 19:02:24 2010 -0700 @@ -1,4 +1,5 @@ subdir-y += compat +subdir-y += perf obj-y += entry.o obj-y += gpr_switch.o diff -r 52f536cda4ba xen/arch/x86/x86_64/compat/entry.S --- a/xen/arch/x86/x86_64/compat/entry.S Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/arch/x86/x86_64/compat/entry.S Wed Aug 25 19:02:24 2010 -0700 @@ -415,6 +415,7 @@ ENTRY(compat_hypercall_table) .quad do_domctl .quad compat_kexec_op .quad do_tmem_op + .quad do_perf_op .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8) .quad compat_ni_hypercall .endr @@ -463,6 +464,7 @@ ENTRY(compat_hypercall_args_table) .byte 1 /* do_domctl */ .byte 2 /* compat_kexec_op */ .byte 1 /* do_tmem_op */ + .byte 1 /* do_perf_op */ .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table) .byte 0 /* compat_ni_hypercall */ .endr diff -r 52f536cda4ba xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/arch/x86/x86_64/entry.S Wed Aug 25 19:02:24 2010 -0700 @@ -188,6 +188,7 @@ ENTRY(syscall_enter) #undef SHADOW_BYTES 1: leaq hypercall_table(%rip),%r10 PERFC_INCR(PERFC_hypercalls, %rax, %rbx) + PERF_HCALL_ACCT(%rax, %rbx) callq *(%r10,%rax,8) #ifndef NDEBUG /* Deliberately corrupt parameter regs used by this hypercall. */ @@ -233,6 +234,7 @@ test_guest_events: jz restore_all_guest /*process_guest_events:*/ sti + PERF_EVENTS_ACCT(%rbx) leaq VCPU_trap_bounce(%rbx),%rdx movq VCPU_event_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) @@ -717,6 +719,7 @@ ENTRY(hypercall_table) .quad do_domctl .quad do_kexec_op .quad do_tmem_op + .quad do_perf_op .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8) .quad do_ni_hypercall .endr @@ -765,6 +768,7 @@ ENTRY(hypercall_args_table) .byte 1 /* do_domctl */ .byte 2 /* do_kexec */ .byte 1 /* do_tmem_op */ + .byte 1 /* do_perf_op */ .rept __HYPERVISOR_arch_0-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 52f536cda4ba xen/arch/x86/x86_64/perf/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/perf/Makefile Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,3 @@ +subdir-y += xmstat + +obj-y += xenperf.o diff -r 52f536cda4ba xen/arch/x86/x86_64/perf/xenperf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/perf/xenperf.c Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include + +extern int xmstat_op(struct xen_perfop_xmstat *); + + +long do_perf_op(XEN_GUEST_HANDLE(xen_perfop_t) u_perfop) +{ + int rc; + struct xen_perfop perfop; + + if (copy_from_guest(&perfop, u_perfop, 1)) + return -EFAULT; + + switch (perfop.cmd) + { + case XEN_PERFOP_XMSTAT: + rc = xmstat_op(&perfop.u.perf_xmstat); + if (copy_to_guest(u_perfop, &perfop, 1)) + rc = -EFAULT; + + break; + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + diff -r 52f536cda4ba xen/arch/x86/x86_64/perf/xmstat/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/perf/xmstat/Makefile Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,2 @@ +obj-y += xmstat.o +obj-y += hwctrs.o diff -r 52f536cda4ba xen/arch/x86/x86_64/perf/xmstat/hwctrs.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/perf/xmstat/hwctrs.c Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,252 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define XMSTDBGP(...) {(xmstat_trc) ? printk(__VA_ARGS__):0;} + +/* BUILD_BUG_ON: offset xmst_hcalls not 16 */ + +/* PESM: Perf Event Select MSR fields */ +/* PESM: first bit fields */ +#define P6_PESM__USRMODE (1ULL << 16) /* Count while not in ring 0 */ +#define P6_PESM__KERNMODE (1ULL << 17) /* Count while in ring 0 */ +#define P6_PESM__EDGE (1ULL << 18) /* Enable edge detection */ +#define P6_PESM__PC (1ULL << 19) /* Enable pin control */ +#define P6_PESM__INT (1ULL << 20) /* Enable interrupt on overflow */ +#define P6_PESM__ANY (1ULL << 21) /* Any thread */ +#define P6_PESM__ENABLE (1ULL << 22) /* Enable counters */ +#define P6_PESM__INV (1ULL << 23) /* Invert the CMASK comparision */ + +/* PESM: event select field */ +#define PESM_EVTSEL_UNHALTED_CYCLES_CORE 0x3C +#define PESM_EVTSEL_UNHALTED_CYCLES_P6 0x79 + +/* PESM: Umask field */ +#define UMASK_UNHALTED_CORE_CYCLES (0x00 << 8) +#define UMASK_UNHALTED_REF_CYCLES (0x01 << 8) + + +/* AMD definitions */ +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +volatile unsigned int xperf_active; + +unsigned int sav_perfctr_msr; /* the MSR to reset in NMI callback */ +int msr_ctrval; + +volatile int xmst_timer_debug=0; + +extern int xmstat_nmi_callback(struct cpu_user_regs *regs, int cpu); + + +static int stop_amd_ctrs(void) +{ + wrmsr(MSR_K7_EVNTSEL0, 0, 0); + wrmsr(MSR_K7_PERFCTR0, 0, 0); + return 0; +} + +static noinline int setup_k7_ctrs(void) +{ + unsigned int msrval; + int interval_ms = 2; /* sample interval in milliseconds */ + int ticks = cpu_khz * interval_ms; /* see __udelay() */ + + msr_ctrval = 0 - ticks; /* ctr will roll up and over */ + + wrmsr(MSR_K7_EVNTSEL0, 0, 0); + wrmsr(MSR_K7_PERFCTR0, 0, 0); + + msrval = K7_EVNTSEL_INT | K7_EVNTSEL_OS | K7_EVNTSEL_USR | K7_NMI_EVENT; + wrmsr(MSR_K7_EVNTSEL0, msrval, 0); + + wrmsrl(MSR_K7_PERFCTR0, msr_ctrval); + sav_perfctr_msr = MSR_K7_PERFCTR0; + + apic_write(APIC_LVTPC, APIC_DM_NMI); + + msrval |= K7_EVNTSEL_ENABLE; + wrmsr(MSR_K7_EVNTSEL0, msrval, 0); + + return 0; +} + +static noinline int setup_amd_ctrs(void) +{ + int fam = boot_cpu_data.x86; + + switch (fam) { + case 6: + case 15 ... 17: + return setup_k7_ctrs(); + + default: + printk("xperf:CPU family %d not supported\n", fam); + } + return -1; +} + +/* courtesy setup_p6_watchdog() nmi.c + * NOTE: wrmsr(msr, low, high) + */ +static noinline int setup_p6_ctrs(void) +{ + unsigned int msrval; + /* int ccpu = smp_processor_id(); */ + int model = boot_cpu_data.x86_model; + unsigned int eventsel = model < 14 ? PESM_EVTSEL_UNHALTED_CYCLES_P6 : + PESM_EVTSEL_UNHALTED_CYCLES_CORE; + int interval_ms = 2; /* sample interval in milliseconds */ + int ticks = cpu_khz * interval_ms; /* see __udelay() */ + + msr_ctrval = 0 - ticks; /* ctr will roll up and over */ + + wrmsr(MSR_P6_EVNTSEL0, 0, 0); + wrmsr(MSR_P6_PERFCTR0, 0, 0); + + msrval = P6_PESM__INT | P6_PESM__ANY | P6_PESM__KERNMODE | P6_PESM__USRMODE; + msrval |= UMASK_UNHALTED_CORE_CYCLES | eventsel; + wrmsr(MSR_P6_EVNTSEL0, msrval, 0); + + wrmsrl(MSR_P6_PERFCTR0, msr_ctrval); + sav_perfctr_msr = MSR_P6_PERFCTR0; + + apic_write(APIC_LVTPC, APIC_DM_NMI); + + msrval |= P6_PESM__ENABLE; + wrmsr(MSR_P6_EVNTSEL0, msrval, 0); + + return 0; +} + +static noinline void stop_p6_ctrs(void) +{ + wrmsr(MSR_P6_EVNTSEL0, 0, 0); + wrmsr(MSR_P6_PERFCTR0, 0, 0); +} + +static noinline int setup_intel_ctrs(void) +{ + int fam = boot_cpu_data.x86; + + if ( fam == 6 ) + return setup_p6_ctrs(); + else + printk("xperf:CPU family %d not supported\n", fam); + + return -1; +} + +/* returns : 0 == success else various errors */ +static int xperf_cpu_init(void) +{ + int rc=0; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + setup_amd_ctrs(); + else + setup_intel_ctrs(); + return rc; +} + +static void xperf_cpu_deinit(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + stop_amd_ctrs(); + else if (boot_cpu_data.x86 == 6) + stop_p6_ctrs(); + else { + printk("xperf: invalid cpu family to stop counters\n"); + return; + } +} + + +struct timer xmstat_timer; +void xmstat_timer_fn(void *notused) +{ + int hwperf_deinit(void); + + hwperf_deinit(); + stop_timer(&xmstat_timer); +} +static void xmstat_dbg_timer(void) +{ + if (xmst_timer_debug) + init_timer(&xmstat_timer, xmstat_timer_fn, NULL, 0); +} + +int hwperf_init(int sample_interval) +{ + int ccpu = smp_processor_id(); + int irq_disabled = !local_irq_is_enabled(); + int rc; + + if ((rc=reserve_lapic_nmi())) { + printk("xperf: cpu:%d Unable to reserve LAPIC NMI timer.. \n", ccpu); + return rc; + } + + set_nmi_callback(xmstat_nmi_callback); + + xmstat_dbg_timer(); + + local_irq_enable(); +#if XEN_SUBVERSION > 4 || XEN_VERSION == 4 /* xen 3.5.x or above */ + smp_call_function((void (*)(void *))xperf_cpu_init, NULL, 0); +#else + smp_call_function((void (*)(void *))xperf_cpu_init, NULL, 0, 0); +#endif + + xperf_cpu_init(); /* turn perf on our own cpu */ + mdelay(700); + + if (xmst_timer_debug) + set_timer(&xmstat_timer, NOW() + MILLISECS(1000)); + + xperf_active = 1; + + if (irq_disabled) + local_irq_disable(); + + printk("xperf engine started.\n"); + return 0; +} + +int hwperf_deinit(void) +{ + if (!xperf_active) + return 0; + +#if XEN_SUBVERSION > 4 || XEN_VERSION == 4 /* xen 3.5.x or above */ + smp_call_function((void (*)(void *))xperf_cpu_deinit, NULL, 0); +#else + smp_call_function((void (*)(void *))xperf_cpu_deinit, NULL, 0, 0); +#endif + + xperf_cpu_deinit(); + + mdelay(2000); /* for last counter to clear */ + unset_nmi_callback(); + release_lapic_nmi(); + xperf_active = 0; + printk("xperf hw engine stopped.\n"); + + return 0; +} + diff -r 52f536cda4ba xen/arch/x86/x86_64/perf/xmstat/xmstat.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/perf/xmstat/xmstat.c Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern int hwperf_init(int sample_interval); +extern int hwperf_deinit(void); +extern unsigned int sav_perfctr_msr; /* the MSR to reset in NMI callback */ +extern volatile int mukdbg; +extern int msr_ctrval; +extern volatile unsigned int xperf_active; + +#define XMST_INVALID_DOMID (0xFFFFFFFFU) + + +static DEFINE_SPINLOCK(xmstat_lock); +DEFINE_PER_CPU(struct xmst_pcpu_meta *, xmst_pcpu_mp); +static DEFINE_PER_CPU(struct xmst_pcpu_data *, xmst_pcpu_dp); +static int _save_num_mfns; +static void *_save_vaddr; +static volatile int xmst_dbg=1; + +static void xmstat_nmi_sample(struct cpu_user_regs *regs) +{ + uint32_t domid, vcpuid, idx; + struct xmst_pcpu_meta *metap = this_cpu(xmst_pcpu_mp); + struct xmst_pcpu_data *datap = this_cpu(xmst_pcpu_dp); + + if (!xperf_active) + return; /* for debug purposes */ + + idx = metap->xmst_index++ & XMST_MAX_IDX; + if (is_idle_vcpu(current)) { + datap[idx].xmst_domid = XMST_INVALID_DOMID; + return; + } + + /* if hyp doing something on behalf of guest, we charge the hyp */ + domid = guest_mode(regs) ? current->domain->domain_id : IDLE_DOMAIN_ID; + vcpuid = guest_mode(regs) ? current->vcpu_id : -1; + + datap[idx].xmst_domid = domid; + datap[idx].xmst_vcpu = vcpuid; +} + +/* Returns: 1 if NMI handled */ +/* NOTE: cpu_khz << 4 is about a second */ +int xmstat_nmi_callback(struct cpu_user_regs *regs, int cpu) +{ +#if 0 + static int printctr; + int interval_ms = 5; /* sample interval in milliseconds */ + int ticks = cpu_khz * interval_ms; /* see __udelay() */ + int ctrval = 0 - ticks; /* ctr will roll up and over */ + if (mukdbg) { + if ( (printctr++ % 200) == 0) + printk("MUK: nmicallback cpu:%d ctrval:0x%x\n", cpu, msr_ctrval); + } +#endif + + xmstat_nmi_sample(regs); + + if ( sav_perfctr_msr == MSR_P6_PERFCTR0 ) + apic_write(APIC_LVTPC, APIC_DM_NMI); + wrmsrl(sav_perfctr_msr, msr_ctrval); + return 1; +} + + +static void +unshare_xen_page_with_guest(struct page_info *page, struct domain *dp) +{ + BUG_ON(page_get_owner(page) != dp); + if (test_and_clear_bit(_PGC_allocated, &page->count_info)) + put_page(page); +} + +static noinline void +xmstat_init_pcpu(cpumask_t *maskp, void *pg0p) +{ + int i=0, cpu; + struct xmst_meta *metap = pg0p; + + for_each_cpu_mask(cpu , *maskp) { + metap->xmst_pcpu_meta[i].xmst_cpuid = cpu; + per_cpu(xmst_pcpu_mp, cpu) = &metap->xmst_pcpu_meta[i]; + per_cpu(xmst_pcpu_dp, cpu) = pg0p + XMST_PAGE_SIZE*(i+1); + + i++; + if (xmst_dbg) + printk("xmstat: cpu:%d mp:%p dp:%p\n", cpu, + per_cpu(xmst_pcpu_mp, cpu), per_cpu(xmst_pcpu_dp, cpu)); + } +} + +static noinline int +xmstat_init(struct xen_perfop_xmstat *argp) +{ + int i; + void *vaddr; + mfn_t mfn; + + struct xmst_init *initp = &argp->u.xmst_init; + cpumask_t online_cpus = cpu_online_map; + int max_cpuid = last_cpu(online_cpus); + int num_pages = cpus_weight(online_cpus) + 1; /* 1 per cpu + 1 for meta */ + + if (num_pages > XMST_NUM_MFNS) { + printk("xmstat: xmst_mfn array not big enough for %d pgs\n", num_pages); + return -E2BIG; + } + + vaddr = alloc_xenheap_pages(get_order_from_pages(num_pages), 0); + if (vaddr == NULL) { + printk("xmstat: failed to alloc %d xenheap pages\n", num_pages); + return -ENOMEM; + } + + /* TBD: do we need to zero out the pages */ + + mfn = virt_to_mfn(vaddr); + for (i=0; i < num_pages; mfn++, i++) { + share_xen_page_with_guest(mfn_to_page(mfn), dom_xen, XENSHARE_writable); + initp->xmst_mfna[i] = mfn; + } + + initp->xmst_num_cpus = cpus_weight(online_cpus); + initp->xmst_num_mfns = num_pages; + _save_num_mfns = num_pages; + _save_vaddr = vaddr; + printk("xmstat: mapped %d pages max_cpuid:%d\n", num_pages, max_cpuid); + + xmstat_init_pcpu(&online_cpus, vaddr); + return 0; +} + +static int +xmstat_deinit(void) +{ + int i; + mfn_t mfn = virt_to_mfn(_save_vaddr); + + if (!_save_vaddr) { + printk("xmstat: Calling deinit when xmstat engine is not running\n"); + return -EINVAL; + } + for (i=0; i < _save_num_mfns; i++, mfn++) { + struct page_info *pgp = mfn_to_page(mfn); + unshare_xen_page_with_guest(pgp, dom_xen); + } + _save_vaddr = NULL; + _save_num_mfns = 0; + + return 0; +} + + +int xmstat_op(struct xen_perfop_xmstat *argp) +{ + int rc = -EINVAL; + + spin_lock(&xmstat_lock); + + if (argp->xmst_cmd == XMSTAT_INIT) { + if (_save_vaddr) { + printk("xmstat engine already active\n"); + rc = -EEXIST; + goto out; + } + rc = xmstat_init(argp); + if (rc == 0) + rc = hwperf_init(0); + + } else if (argp->xmst_cmd == XMSTAT_DEINIT) { + rc = xmstat_deinit(); + if (rc == 0) + hwperf_deinit(); + } + +out: + spin_unlock(&xmstat_lock); + return rc; +} + + diff -r 52f536cda4ba xen/include/asm-x86/x86_64/asm_defns.h --- a/xen/include/asm-x86/x86_64/asm_defns.h Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/include/asm-x86/x86_64/asm_defns.h Wed Aug 25 19:02:24 2010 -0700 @@ -62,6 +62,38 @@ 1: addq $8,%rsp; popq %rsi; \ popq %rdi; +#ifdef XEN_PERF_XMSTAT +#define PERF_EVENTS_ACCT(rbx_cur) \ + testl $1, xperf_active(%rip); \ + jz 2f; \ + pushq rbx_cur; \ + pushq %rdx; \ + movslq VCPU_processor(rbx_cur), rbx_cur; \ + lea per_cpu__xmst_pcpu_mp(%rip), %rdx; \ + shl $PERCPU_SHIFT, rbx_cur; \ + mov (%rdx,rbx_cur), %rdx; \ + incl 0x14(%rdx); \ + popq %rdx; \ + popq rbx_cur; \ +2: +#define PERF_HCALL_ACCT(rax_notused, rbx_cur) \ + testl $1, xperf_active(%rip); \ + jz 2f; \ + pushq rbx_cur; \ + pushq %rdx; \ + movslq VCPU_processor(rbx_cur), rbx_cur; \ + lea per_cpu__xmst_pcpu_mp(%rip), %rdx; \ + shl $PERCPU_SHIFT, rbx_cur; \ + mov (%rdx,rbx_cur), %rdx; \ + incl 0x10(%rdx); \ + popq %rdx; \ + popq rbx_cur; \ +2: +#else +#define PERF_HCALL_ACCT(rax_arg, rbx_cur) +#define PERF_EVENTS_ACCT(rbx_cur) +#endif + #ifdef PERF_COUNTERS #define PERFC_INCR(_name,_idx,_cur) \ pushq _cur; \ diff -r 52f536cda4ba xen/include/public/perf/xmstat.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/public/perf/xmstat.h Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,73 @@ +#ifndef __XEN_PUBLIC_XMSTAT_H__ +#define __XEN_PUBLIC_XMSTAT_H__ + +#ifdef __XEN__ +#include +#endif + +#ifndef BUILD_BUG_ON + #define BUILD_BUG_ON(p) ((void)sizeof(struct { int:-!!(p); })) +#endif + + +#define XMST_MAX_CPUS 256 /* max cpus xmstat can support */ +#define XMST_NUM_MFNS (XMST_MAX_CPUS+1) /* +1 for meta data */ + + +enum { + XMSTAT_INIT = 1, /* initialize xmstat */ + XMSTAT_START, /* start collecting samples */ + XMSTAT_DEINIT, /* stop, end, and cleanup */ +}; + +struct xmst_init { + int xmst_interval; /* IN/OUT: sample interval */ + int xmst_num_mfns; /* OUT: num pages mapped */ + int xmst_num_cpus; /* OUT: number of cpus being traced */ + uint64_t xmst_mfna[XMST_NUM_MFNS];/* OUT: array of mfns */ +}; + +struct xen_perfop_xmstat { + int xmst_cmd; /* XMSTAT_INIT etc.. */ + union { + struct xmst_init xmst_init; + } u; +}; + + +/* xmstat per cpu data area */ +struct xmst_pcpu_meta { + int xmst_cpuid; /* physical cpu id */ + int xmst_index; /* index of next entry */ + int xmst_pfaults; /* number of page faults */ + int xmst_interrupts; /* number of interrupts */ + int xmst_hcalls; /* number of hypercalls */ + int xmst_evtchns; /* number of event channel ops */ +}; + +/* xmst_meta resides on page 0 */ +struct xmst_meta { + struct xmst_pcpu_meta xmst_pcpu_meta[XMST_MAX_CPUS]; +}; + +/* each cpu page has an array of these where samples are taken */ +struct xmst_pcpu_data { + uint32_t xmst_domid; /* domain id */ + uint32_t xmst_vcpu; /* vcpu id */ +}; + +#define XMST_PAGE_SIZE 4096 +#define XMST_MAX_IDX (XMST_PAGE_SIZE/sizeof(struct xmst_pcpu_data) - 1) + +#ifdef __XEN__ +DECLARE_PER_CPU(struct xmst_pcpu_meta *, xmst_pcpu_mp); +extern volatile unsigned int xperf_active; +#endif + +#if 0 +typedef struct xmst_pcpu_data + xmst_pcpu_data_a[XMST_PAGE_SIZE/sizeof(struct xmst_pcpu_data)] + xmst_pcpu_data_t; +#endif + +#endif /* __XEN_PUBLIC_XMSTAT_H__ */ diff -r 52f536cda4ba xen/include/public/xen.h --- a/xen/include/public/xen.h Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/include/public/xen.h Wed Aug 25 19:02:24 2010 -0700 @@ -93,6 +93,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #define __HYPERVISOR_domctl 36 #define __HYPERVISOR_kexec_op 37 #define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_perf_op 39 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 diff -r 52f536cda4ba xen/include/public/xenperf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/public/xenperf.h Wed Aug 25 19:02:24 2010 -0700 @@ -0,0 +1,35 @@ +#ifndef __XEN_PUBLIC_XENPERF_H__ +#define __XEN_PUBLIC_XENPERF_H__ + +#include "xen.h" +#include "perf/xmstat.h" + +#define XENPERF_INTERFACE_VERSION 0x00000001 + +enum { + XEN_PERFOP_XMSTAT=1, +}; + +struct xen_perfop_xmstat; +struct xen_perfop { + uint32_t cmd; + uint32_t interface_version; /* XENPERF_INTERFACE_VERSION */ + union { + struct xen_perfop_xmstat perf_xmstat; + } u; +}; + +typedef struct xen_perfop xen_perfop_t; +DEFINE_XEN_GUEST_HANDLE(xen_perfop_t); + +#endif /* __XEN_PUBLIC_XENPERF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 52f536cda4ba xen/include/xen/hypercall.h --- a/xen/include/xen/hypercall.h Fri Jul 16 17:36:44 2010 -0700 +++ b/xen/include/xen/hypercall.h Wed Aug 25 19:02:24 2010 -0700 @@ -16,6 +16,7 @@ #include #include #include +#include extern long do_ni_hypercall( @@ -124,6 +125,9 @@ do_tmem_op( extern int do_xenoprof_op(int op, XEN_GUEST_HANDLE(void) arg); +extern long +do_perf_op(int op, XEN_GUEST_HANDLE(void) arg); + #ifdef CONFIG_COMPAT extern int