diff -r a1d0a575b4ba tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Wed Jan 06 10:13:55 2010 +0000 +++ b/tools/xentrace/xentrace.c Wed Jan 06 18:14:16 2010 +0000 @@ -61,6 +61,12 @@ disable_tracing:1; } settings_t; +struct t_struct { + struct t_info *t_info; /* Structure with information about individual buffers */ + struct t_buf **meta; /* Pointers to trace buffer metadata */ + unsigned char **data; /* Pointers to trace buffer data areas */ +}; + settings_t opts; int interrupted = 0; /* gets set if we get a SIGHUP */ @@ -446,22 +452,61 @@ * * Maps the Xen trace buffers them into process address space. */ -static struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num, - unsigned long size) +static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num, + unsigned long tinfo_size) { - struct t_buf *tbufs_mapped; + static struct t_struct tbufs = { 0 }; + int i; - tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN, - size * num, PROT_READ | PROT_WRITE, + /* Map t_info metadata structure */ + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, + tinfo_size, PROT_READ | PROT_WRITE, tbufs_mfn); - if ( tbufs_mapped == 0 ) + if ( tbufs.t_info == 0 ) { PERROR("Failed to mmap trace buffers"); exit(EXIT_FAILURE); } - return tbufs_mapped; + if ( tbufs.t_info->tbuf_size == 0 ) + { + fprintf(stderr, "%s: tbuf_size 0!\n", __func__); + exit(EXIT_FAILURE); + } + + /* Map per-cpu buffers */ + tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *)); + tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *)); + if ( tbufs.meta == NULL || tbufs.data == NULL ) + { + PERROR( "Failed to allocate memory for buffer pointers\n"); + exit(EXIT_FAILURE); + } + + for(i=0; imfn_offset[i]; + int j; + xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; + + for ( j=0; jtbuf_size; j++) + pfn_list[j] = (xen_pfn_t)mfn_list[j]; + + tbufs.meta[i] = xc_map_foreign_batch(xc_handle, DOMID_XEN, + PROT_READ | PROT_WRITE, + pfn_list, + tbufs.t_info->tbuf_size); + if ( tbufs.meta[i] == NULL ) + { + PERROR("Failed to map cpu buffer!"); + exit(EXIT_FAILURE); + } + tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1); + } + + return &tbufs; } /** @@ -490,66 +535,6 @@ } /** - * init_bufs_ptrs - initialises an array of pointers to the trace buffers - * @bufs_mapped: the userspace address where the trace buffers are mapped - * @num: number of trace buffers - * @size: trace buffer size - * - * Initialises an array of pointers to individual trace buffers within the - * mapped region containing all trace buffers. - */ -static struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num, - unsigned long size) -{ - int i; - struct t_buf **user_ptrs; - - user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *)); - if ( user_ptrs == NULL ) - { - PERROR( "Failed to allocate memory for buffer pointers\n"); - exit(EXIT_FAILURE); - } - - /* initialise pointers to the trace buffers - given the size of a trace - * buffer and the value of bufs_maped, we can easily calculate these */ - for ( i = 0; it_info->tbuf_size * PAGE_SIZE; data_size = size - sizeof(struct t_buf); - /* build arrays of convenience ptrs */ - meta = init_bufs_ptrs(tbufs_mapped, num, size); - data = init_rec_ptrs(meta, num); + meta = tbufs->meta; + data = tbufs->data; if ( opts.discard ) for ( i = 0; i < num; i++ ) diff -r a1d0a575b4ba xen/common/trace.c --- a/xen/common/trace.c Wed Jan 06 10:13:55 2010 +0000 +++ b/xen/common/trace.c Wed Jan 06 18:14:16 2010 +0000 @@ -46,8 +46,11 @@ integer_param("tbuf_size", opt_tbuf_size); /* Pointers to the meta-data objects for all system trace buffers */ +static struct t_info *t_info; +#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); +static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); static int data_size; /* High water mark for trace buffers; */ @@ -80,41 +83,104 @@ */ static int alloc_trace_bufs(void) { - int i, order; + int i, cpu, order; unsigned long nr_pages; - char *rawbuf; - struct t_buf *buf; + /* Start after a fixed-size array of NR_CPUS */ + uint32_t *t_info_mfn_list = (uint32_t *)t_info; + int offset = (NR_CPUS * 2 + 1 + 1) / 4; if ( opt_tbuf_size == 0 ) return -EINVAL; - nr_pages = num_online_cpus() * opt_tbuf_size; - order = get_order_from_pages(nr_pages); - data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)); - - if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL ) + if ( !t_info ) { - printk("Xen trace buffers: memory allocation failed\n"); - opt_tbuf_size = 0; + printk("%s: t_info not allocated, cannot allocate trace buffers!\n", + __func__); return -EINVAL; } - /* Share pages so that xentrace can map them. */ - for ( i = 0; i < nr_pages; i++ ) - share_xen_page_with_privileged_guests( - virt_to_page(rawbuf) + i, XENSHARE_writable); + t_info->tbuf_size = opt_tbuf_size; + printk("tbuf_size %d\n", t_info->tbuf_size); - for_each_online_cpu ( i ) + nr_pages = opt_tbuf_size; + order = get_order_from_pages(nr_pages); + + /* + * First, allocate buffers for all of the cpus. If any + * fails, deallocate what you have so far and exit. + */ + for_each_online_cpu(cpu) { - buf = per_cpu(t_bufs, i) = (struct t_buf *) - &rawbuf[i*opt_tbuf_size*PAGE_SIZE]; + int flags; + char *rawbuf; + struct t_buf *buf; + + if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL ) + { + printk("Xen trace buffers: memory allocation failed\n"); + opt_tbuf_size = 0; + goto out_dealloc; + } + + spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); + + buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; buf->cons = buf->prod = 0; - per_cpu(t_data, i) = (unsigned char *)(buf + 1); + per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); + + spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags); + } + /* + * Now share the pages to xentrace can map them, and write them in + * the global t_info structure. + */ + for_each_online_cpu(cpu) + { + /* Share pages so that xentrace can map them. */ + char *rawbuf; + + if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) + { + struct page_info *p = virt_to_page(rawbuf); + uint32_t mfn = virt_to_mfn(rawbuf); + + for ( i = 0; i < nr_pages; i++ ) + { + share_xen_page_with_privileged_guests( + p + i, XENSHARE_writable); + + t_info_mfn_list[offset + i]=mfn + i; + } + /* Write list first, then write per-cpu offset. */ + wmb(); + t_info->mfn_offset[cpu]=offset; + printk("p%d mfn %"PRIx32" offset %d\n", + cpu, mfn, offset); + offset+=i; + } + } + + data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)); t_buf_highwater = data_size >> 1; /* 50% high water */ return 0; +out_dealloc: + for_each_online_cpu(cpu) + { + int flags; + char * rawbuf; + + spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); + if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) + { + ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); + free_xenheap_pages(rawbuf, order); + } + spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags); + } + return -EINVAL; } @@ -181,6 +247,26 @@ */ void __init init_trace_bufs(void) { + int i; + /* t_info size fixed at 2 pages for now. That should be big enough / small enough + * until it's worth making it dynamic. */ + t_info = alloc_xenheap_pages(1, 0); + + if ( t_info == NULL ) + { + printk("Xen trace buffers: t_info allocation failed! Tracing disabled.\n"); + return; + } + + for(i = 0; i < NR_CPUS; i++) + spin_lock_init(&per_cpu(t_lock, i)); + + for(i=0; ievt_mask = tb_event_mask; - tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(per_cpu(t_bufs, 0)) : 0; - tbc->size = opt_tbuf_size * PAGE_SIZE; + tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0; + tbc->size = T_INFO_PAGES; break; case XEN_SYSCTL_TBUFOP_set_cpu_mask: xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask); @@ -220,7 +306,7 @@ tb_event_mask = tbc->evt_mask; break; case XEN_SYSCTL_TBUFOP_set_size: - rc = !tb_init_done ? tb_set_size(tbc->size) : -EINVAL; + rc = tb_set_size(tbc->size); break; case XEN_SYSCTL_TBUFOP_enable: /* Enable trace buffers. Check buffers are already allocated. */ @@ -428,7 +514,7 @@ unsigned long flags, bytes_to_tail, bytes_to_wrap; int rec_size, total_size; int extra_word; - int started_below_highwater; + int started_below_highwater = 0; if( !tb_init_done ) return; @@ -462,9 +548,12 @@ /* Read tb_init_done /before/ t_bufs. */ rmb(); + spin_lock_irqsave(&this_cpu(t_lock), flags); + buf = this_cpu(t_bufs); - local_irq_save(flags); + if ( unlikely(!buf) ) + goto unlock; started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); @@ -511,8 +600,8 @@ { if ( ++this_cpu(lost_records) == 1 ) this_cpu(lost_records_first_tsc)=(u64)get_cycles(); - local_irq_restore(flags); - return; + started_below_highwater = 0; + goto unlock; } /* @@ -541,7 +630,8 @@ /* Write the original record */ __insert_record(buf, event, extra, cycles, rec_size, extra_data); - local_irq_restore(flags); +unlock: + spin_unlock_irqrestore(&this_cpu(t_lock), flags); /* Notify trace buffer consumer that we've crossed the high water mark. */ if ( started_below_highwater && diff -r a1d0a575b4ba xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Wed Jan 06 10:13:55 2010 +0000 +++ b/xen/include/public/sysctl.h Wed Jan 06 18:14:16 2010 +0000 @@ -75,7 +75,7 @@ uint32_t evt_mask; /* OUT variables */ uint64_aligned_t buffer_mfn; - uint32_t size; + uint32_t size; /* Also an IN variable! */ }; typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); diff -r a1d0a575b4ba xen/include/public/trace.h --- a/xen/include/public/trace.h Wed Jan 06 10:13:55 2010 +0000 +++ b/xen/include/public/trace.h Wed Jan 06 18:14:16 2010 +0000 @@ -195,6 +195,16 @@ /* Records follow immediately after the meta-data header. */ }; +/* Structure used to pass MFNs to the trace buffers back to trace consumers. + * Offset is an offset into the mapped structure where the mfn list will be held. + * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + #endif /* __XEN_PUBLIC_TRACE_H__ */ /*