[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 4/4] Xentrace: add support for HVM's PI blocking list operation



In order to analyze PI blocking list operation frequence and obtain
the list length, add some relevant events to xentrace and some
associated code in xenalyze. Event ASYNC_PI_LIST_DEL may happen in interrupt
context, which incurs current assumptions checked in toplevel_assert_check()
are not suitable any more. Thus, this patch extends the toplevel_assert_check()
to remove such assumptions for events of type ASYNC_PI_LIST_DEL.

Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
---
v4:
 - trace part of Patch 1 in v3

---
 tools/xentrace/formats          |   3 +
 tools/xentrace/xenalyze.c       | 154 +++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/hvm/vmx/vmx.c      |  21 +++++-
 xen/include/asm-x86/hvm/trace.h |   1 +
 xen/include/public/trace.h      |   5 ++
 5 files changed, 172 insertions(+), 12 deletions(-)

diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index 8b31780..54e0b11 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -125,6 +125,9 @@
 0x00082020  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR_WINDOW [ value = 
0x%(1)08x ]
 0x00082021  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NPF         [ gpa = 
0x%(2)08x%(1)08x mfn = 0x%(4)08x%(3)08x qual = 0x%(5)04x p2mt = 0x%(6)04x ]
 0x00082023  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  TRAP        [ vector = 
0x%(1)02x ]
+0x00082026  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PI_LIST_ADD [ domid = 
0x%(1)04x vcpu = 0x%(2)04x, pcpu = 0x%(3)04x, #entry = 0x%(4)04x ]
+
+0x00088001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ASYNC_PI_LIST_DEL [ domid = 
0x%(1)04x vcpu = 0x%(2)04x ]
 
 0x0010f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_map      [ domid = 
%(1)d ]
 0x0010f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_unmap    [ domid = 
%(1)d ]
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index fa608ad..fbc2429 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -296,6 +296,7 @@ struct symbol_struct {
 };
 
 void error(enum error_level l, struct record_info *ri);
+struct vcpu_data * vcpu_find(int did, int vid);
 
 void parse_symbol_file(char *fn) {
     unsigned long long last_addr = 0;
@@ -944,6 +945,7 @@ enum {
     HVM_EVENT_TRAP,
     HVM_EVENT_TRAP_DEBUG,
     HVM_EVENT_VLAPIC,
+    HVM_EVENT_PI_LIST_ADD,
     HVM_EVENT_HANDLER_MAX
 };
 char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
@@ -979,13 +981,15 @@ char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
     "realmode_emulate",
     "trap",
     "trap_debug",
-    "vlapic"
+    "vlapic",
+    "pi_list_add",
 };
 
 enum {
     HVM_VOL_VMENTRY,
     HVM_VOL_VMEXIT,
     HVM_VOL_HANDLER,
+    HVM_VOL_ASYNC,
     HVM_VOL_MAX
 };
 
@@ -1012,6 +1016,7 @@ char *hvm_vol_name[HVM_VOL_MAX] = {
     [HVM_VOL_VMENTRY]="vmentry",
     [HVM_VOL_VMEXIT] ="vmexit",
     [HVM_VOL_HANDLER]="handler",
+    [HVM_VOL_ASYNC]="async",
 };
 
 enum {
@@ -1337,6 +1342,9 @@ struct hvm_data {
         struct {
             struct io_address *mmio, *pio;
         } io;
+        struct {
+            int pi_list_add, pi_list_del;
+        } pi;
     } summary;
 
     /* In-flight accumulation information */
@@ -1391,6 +1399,9 @@ struct hvm_data {
 
     /* Historical info */
     tsc_t last_rdtsc;
+
+    /* Destination pcpu of posted interrupt's wakeup interrupt */
+    int pi_cpu;
 };
 
 enum {
@@ -1457,6 +1468,8 @@ void init_hvm_data(struct hvm_data *h, struct vcpu_data 
*v) {
     }
     for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
         h->summary.guest_interrupt[i].count=0;
+
+    h->pi_cpu = -1;
 }
 
 /* PV data */
@@ -1766,6 +1779,14 @@ char * toplevel_name[TOPLEVEL_MAX] = {
     [TOPLEVEL_HW]="hw",
 };
 
+enum {
+    SUBLEVEL_HVM_ENTRYEXIT=1,
+    SUBLEVEL_HVM_HANDLER,
+    SUBLEVEL_HVM_EMUL,
+    SUBLEVEL_HVM_ASYNC,
+    SUBLEVEL_HVM_MAX=SUBLEVEL_HVM_ASYNC+1,
+};
+
 struct trace_volume {
     unsigned long long toplevel[TOPLEVEL_MAX];
     unsigned long long sched_verbose;
@@ -1852,6 +1873,9 @@ struct pcpu_info {
         tsc_t tsc;
         struct cycle_summary idle, running, lost;
     } time;
+
+    /* Posted Interrupt List Length */
+    int pi_list_length;
 };
 
 void __fill_in_record_info(struct pcpu_info *p);
@@ -4726,6 +4750,71 @@ void hvm_generic_dump(struct record_info *ri, char * 
prefix)
     printf(" ]\n");
 }
 
+void hvm_pi_list_add_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        int did;
+        int vid;
+        int pcpu;
+        int len;
+    } *data;
+    struct vcpu_data *v;
+
+    data = (typeof(data))ri->rec.u.tsc.data;
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+        printf("d%uv%u is added to pi blocking list of pcpu%u. "
+               "The list length is now %d\n",
+               data->did, data->vid, data->pcpu, data->len);
+
+    v->hvm.pi_cpu = data->pcpu;
+    v->hvm.summary.pi.pi_list_add++;
+    if ( data->pcpu > P.max_active_pcpu || !P.pcpu[data->pcpu].active )
+        fprintf(stderr, "Strange! pcpu%u is inactive but a vcpu is added"
+                "to it", data->pcpu);
+    else if ( P.pcpu[data->pcpu].pi_list_length == -1 )
+        P.pcpu[data->pcpu].pi_list_length = data->len;
+    else if ( data->len != ++P.pcpu[data->pcpu].pi_list_length )
+        /*
+         * Correct pi list length. Removing one vcpu that is already on the
+         * list before tracing starts would not decrease the pi list length;
+         * the list length would be inaccuate.
+         */
+        P.pcpu[data->pcpu].pi_list_length = data->len;
+}
+
+void hvm_pi_list_del_process(struct record_info *ri)
+{
+    struct {
+        int did;
+        int vid;
+    } *data;
+    struct vcpu_data *v;
+
+    data = (typeof(data))ri->rec.u.tsc.data;
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+    {
+        if ( v->hvm.pi_cpu != -1 )
+            printf("d%uv%u is removed from pi blocking list of pcpu%u\n",
+                   data->did, data->vid, v->hvm.pi_cpu);
+        else
+            printf("d%uv%u is removed from pi blocking list\n",
+                   data->did, data->vid);
+    }
+
+    if ( P.pcpu[v->hvm.pi_cpu].pi_list_length != -1 )
+        P.pcpu[v->hvm.pi_cpu].pi_list_length--;
+    v->hvm.pi_cpu = -1;
+    v->hvm.summary.pi.pi_list_del++;
+}
+
 void hvm_handler_process(struct record_info *ri, struct hvm_data *h) {
     /* Wait for first vmexit to initialize */
     if(!h->init)
@@ -4763,6 +4852,9 @@ void hvm_handler_process(struct record_info *ri, struct 
hvm_data *h) {
     case TRC_HVM_INTR_WINDOW:
         hvm_intr_window_process(ri, h);
         break;
+    case TRC_HVM_PI_LIST_ADD:
+        hvm_pi_list_add_process(ri, h);
+        break;
     case TRC_HVM_OP_DESTROY_PROC:
         if(h->v->cr3.data) {
             struct cr3_value_struct *cur = h->v->cr3.data;
@@ -4862,7 +4954,6 @@ needs_vmexit:
 void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc);
 void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
                       tsc_t tsc, int new_runstate);
-struct vcpu_data * vcpu_find(int did, int vid);
 void lose_vcpu(struct vcpu_data *v, tsc_t tsc);
 
 int domain_runstate(struct domain_data *d) {
@@ -5267,6 +5358,21 @@ void hvm_process(struct pcpu_info *p)
     struct vcpu_data *v = p->current;
     struct hvm_data *h = &v->hvm;
 
+    if(ri->evt.sub == 8)
+    {
+        UPDATE_VOLUME(p, hvm[HVM_VOL_ASYNC], ri->size);
+
+        switch(ri->event) {
+        case TRC_HVM_ASYNC_PI_LIST_DEL:
+            hvm_pi_list_del_process(ri);
+            break;
+
+        default:
+            fprintf(warn, "Unknown hvm event: %x\n", ri->event);
+        }
+        return;
+    }
+
     assert(p->current);
 
     if(vcpu_set_data_type(p->current, VCPU_DATA_HVM))
@@ -5359,6 +5465,10 @@ void hvm_summary(struct hvm_data *h) {
                   i, h->summary.ipi_count[i]);
    hvm_io_address_summary(h->summary.io.pio, "IO address summary:");
    hvm_io_address_summary(h->summary.io.mmio, "MMIO address summary:");
+
+   printf("Posted Interrupt:\n");
+   printf(" List Add: %u\n", h->summary.pi.pi_list_add);
+   printf(" List Del: %u\n", h->summary.pi.pi_list_del);
 }
 
 /* ---- Shadow records ---- */
@@ -8962,6 +9072,7 @@ off_t scan_for_new_pcpu(off_t offset) {
 
         p->file_offset = offset;
         p->next_cpu_change_offset = offset;
+        p->pi_list_length = -1;
 
         record_order_insert(p);
 
@@ -9142,7 +9253,6 @@ int find_toplevel_event(struct record_info *ri)
     return toplevel;
 }
 
-
 void process_cpu_change(struct pcpu_info *p) {
     struct record_info *ri = &p->ri;
     struct cpu_change_data *r = (typeof(r))ri->d;
@@ -9190,6 +9300,7 @@ void process_cpu_change(struct pcpu_info *p) {
         p2->ri.d = p2->ri.rec.u.notsc.data;
         p2->file_offset = p->file_offset;
         p2->next_cpu_change_offset = p->file_offset;
+        p2->pi_list_length = -1;
 
         fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
                 __func__, r->cpu, (unsigned long long)p->file_offset);
@@ -9276,26 +9387,52 @@ void process_cpu_change(struct pcpu_info *p) {
     }
 }
 
-struct tl_assert_mask {
+struct assert_mask {
     unsigned p_current:1,
-        not_idle_domain:1;
+        not_idle_domain:1,
+        check_sublevel:1;
     int vcpu_data_mode;
+    int sub_max;
+    struct assert_mask *sub;
 };
-static struct tl_assert_mask tl_assert_checks[TOPLEVEL_MAX] = {
-    [TRC_HVM_MAIN]={ .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+
+static struct assert_mask sl_hvm_assert_mask[SUBLEVEL_HVM_MAX] = {
+    [SUBLEVEL_HVM_ENTRYEXIT] = { .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+    [SUBLEVEL_HVM_HANDLER] = { .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+    [SUBLEVEL_HVM_EMUL] = { .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+};
+
+static struct assert_mask tl_assert_checks[TOPLEVEL_MAX] = {
+    [TRC_HVM_MAIN]={ .check_sublevel=1, .sub=sl_hvm_assert_mask, 
.sub_max=SUBLEVEL_HVM_MAX },
     [TRC_SHADOW_MAIN]={ .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
     [TRC_PV_MAIN]={ .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_PV },
 };
 
+/* Other sub types are reserved */
+static int sublevel_to_index[16] = {
+    [1] = 1,
+    [2] = 2,
+    [4] = 3,
+    [8] = 4,
+};
+
 /* There are a lot of common assumptions for the various processing
  * routines.  Check them all in one place, doing something else if
  * they don't pass. */
 int toplevel_assert_check(int toplevel, struct pcpu_info *p)
 {
-    struct tl_assert_mask mask;
+    struct assert_mask mask;
 
     mask = tl_assert_checks[toplevel];
 
+    if (mask.check_sublevel)
+    {
+        int sub = sublevel_to_index[p->ri.evt.sub];
+
+        assert(mask.sub && (sub < mask.sub_max));
+        mask = mask.sub[sub];
+    }
+
     if (mask.p_current && p->current == NULL)
     {
         fprintf(warn, "WARNING: p->current null!  Not processing\n");
@@ -9362,7 +9499,6 @@ void process_record(struct pcpu_info *p) {
     if(opt.dump_all)
         create_dump_header(ri, p);
 
-
     toplevel = find_toplevel_event(ri);
     if ( toplevel < 0 )
         return;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 04e9aa6..ccb1c8d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -190,7 +190,9 @@ static void vmx_vcpu_block(struct vcpu *v)
      */
     ASSERT(old_lock == NULL);
 
-    per_cpu(vmx_pi_blocking, v->processor).counter++;
+    per_cpu(vmx_pi_blocking, pi_cpu).counter++;
+    TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, pi_cpu,
+             per_cpu(vmx_pi_blocking, pi_cpu).counter);
     list_add_tail(&v->arch.hvm_vmx.pi_blocking.list,
                   &per_cpu(vmx_pi_blocking, pi_cpu).list);
     spin_unlock_irqrestore(pi_blocking_list_lock, flags[1]);
@@ -279,6 +281,7 @@ static void vmx_pi_unblock_vcpu(struct vcpu *v)
         list_del(&v->arch.hvm_vmx.pi_blocking.list);
         container_of(pi_blocking_list_lock,
                      struct vmx_pi_blocking_vcpu, lock)->counter--;
+        TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
         v->arch.hvm_vmx.pi_blocking.lock = NULL;
     }
 
@@ -308,6 +311,8 @@ void vmx_pi_desc_fixup(unsigned int cpu)
 
     list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
     {
+        struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
         /*
          * Suppress notification or we may miss an interrupt when the
          * target cpu is dying.
@@ -322,9 +327,11 @@ void vmx_pi_desc_fixup(unsigned int cpu)
         if ( pi_test_on(&vmx->pi_desc) )
         {
             list_del(&vmx->pi_blocking.list);
+            TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id,
+                     v->vcpu_id);
             per_cpu(vmx_pi_blocking, cpu).counter--;
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
         else
         {
@@ -357,6 +364,10 @@ void vmx_pi_desc_fixup(unsigned int cpu)
                       &per_cpu(vmx_pi_blocking, new_cpu).list);
             per_cpu(vmx_pi_blocking, cpu).counter--;
             per_cpu(vmx_pi_blocking, new_cpu).counter++;
+            TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id,
+                     v->vcpu_id);
+            TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id,
+                     new_cpu, per_cpu(vmx_pi_blocking, new_cpu).counter);
             vmx->pi_blocking.lock = new_lock;
 
             spin_unlock(new_lock);
@@ -2475,11 +2486,15 @@ static void pi_wakeup_interrupt(struct cpu_user_regs 
*regs)
     {
         if ( pi_test_on(&vmx->pi_desc) )
         {
+            struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
             list_del(&vmx->pi_blocking.list);
+            TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id,
+                     v->vcpu_id);
             per_cpu(vmx_pi_blocking, cpu).counter--;
             ASSERT(vmx->pi_blocking.lock == lock);
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
     }
 
diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h
index de802a6..b74ffdd 100644
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -54,6 +54,7 @@
 #define DO_TRC_HVM_TRAP             DEFAULT_HVM_MISC
 #define DO_TRC_HVM_TRAP_DEBUG       DEFAULT_HVM_MISC
 #define DO_TRC_HVM_VLAPIC           DEFAULT_HVM_MISC
+#define DO_TRC_HVM_PI_LIST_ADD      DEFAULT_HVM_MISC
 
 
 #define TRC_PAR_LONG(par) ((par)&0xFFFFFFFF),((par)>>32)
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 7f2e891..15ea87c 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -53,6 +53,7 @@
 #define TRC_HVM_ENTRYEXIT   0x00081000   /* VMENTRY and #VMEXIT       */
 #define TRC_HVM_HANDLER     0x00082000   /* various HVM handlers      */
 #define TRC_HVM_EMUL        0x00084000   /* emulated devices */
+#define TRC_HVM_ASYNC       0x00088000   /* Asynchronous events */
 
 #define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
 #define TRC_SCHED_CLASS     0x00022000   /* Scheduler-specific    */
@@ -234,6 +235,7 @@
 #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
 #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
 #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
+#define TRC_HVM_PI_LIST_ADD      (TRC_HVM_HANDLER + 0x26)
 
 #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
 #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
@@ -257,6 +259,9 @@
 #define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10)
 #define TRC_HVM_EMUL_LAPIC_PIC_INTR    (TRC_HVM_EMUL + 0x11)
 
+/* Trace asynconous events for HVM */
+#define TRC_HVM_ASYNC_PI_LIST_DEL      (TRC_HVM_ASYNC + 0x1)
+
 /* trace events for per class */
 #define TRC_PM_FREQ_CHANGE      (TRC_HW_PM + 0x01)
 #define TRC_PM_IDLE_ENTRY       (TRC_HW_PM + 0x02)
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.