[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1 of 8] xenalyze: Improve record-sorting algorithm



The old method of finding the next record was to search through
all active pcpus to find the one with the lowest tsc value.  This
was simple but incredibly wasteful, especially as frequently the
same cpu had several records to process in a row, and only a small
subset of pcpus was active at any one time.

This patch introduces a sorted list, s.t. the top of the list is
always the next record to process.  After that record is processed,
the next record is found, and the record is then "bubbled down" to
its appropriate place on the list.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

diff -r d8c1c4df4535 -r f8c3c44be309 xenalyze.c
--- a/xenalyze.c        Thu Jan 12 14:27:56 2012 +0100
+++ b/xenalyze.c        Thu Jan 26 17:11:34 2012 +0000
@@ -6522,7 +6522,7 @@ void shadow_process(struct pcpu_info *p)
         if(sevt.minor <= PF_XEN_LAST_FAULT) {
             shadow_fault_generic_process(ri, h);
         } else {
-            fprintf(warn, "Warning: processing shadow as generic\n");
+            warn_once("Warning: processing shadow as generic\n");
             process_generic(ri);
         }
         break;
@@ -8558,6 +8558,10 @@ void base_process(struct pcpu_info *p) {
 
 
 /* Non-compat only */
+void record_order_insert(struct pcpu_info *new);
+void record_order_remove(struct pcpu_info *rem);
+void record_order_bubble(struct pcpu_info *last);
+
 struct cpu_change_data {
     int cpu;
     unsigned window_size;
@@ -8624,6 +8628,8 @@ loff_t scan_for_new_pcpu(loff_t offset) 
         p->file_offset = offset;
         p->next_cpu_change_offset = offset;
 
+        record_order_insert(p);
+
         offset += r + cd->window_size;
 
         sched_default_vcpu_activate(p);
@@ -8675,6 +8681,9 @@ void deactivate_pcpu(struct pcpu_info *p
         lose_vcpu(p->current, p->last_tsc);
     }
     p->active = 0;
+
+    record_order_remove(p);
+
     if ( p->pid == P.max_active_pcpu )
     {
         int i, max_active_pcpu = -1;
@@ -8840,11 +8849,6 @@ void process_cpu_change(struct pcpu_info
         if(r->cpu > P.max_active_pcpu)
             P.max_active_pcpu = r->cpu;
 
-        fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
-                __func__, r->cpu, (unsigned long long)p->file_offset);
-        
-        sched_default_vcpu_activate(p2);
-
         /* Taking this record as the first record should make everything
          * run swimmingly. */
         p2->ri = *ri;
@@ -8852,6 +8856,13 @@ void process_cpu_change(struct pcpu_info
         p2->ri.d = p2->ri.rec.u.notsc.data;
         p2->file_offset = p->file_offset;
         p2->next_cpu_change_offset = p->file_offset;
+
+        fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
+                __func__, r->cpu, (unsigned long long)p->file_offset);
+        
+        record_order_insert(p2);
+
+        sched_default_vcpu_activate(p2);
     }
 
     p->last_cpu_change_pid = r->cpu;
@@ -8979,6 +8990,7 @@ int toplevel_assert_check(int toplevel, 
         if ( ! (v->data_type == VCPU_DATA_NONE
                 || v->data_type == mask.vcpu_data_mode) )
         {
+            /* This may happen for track_dirty_vram, which causes a 
SHADOW_WRMAP_BF trace f/ dom0 */
             fprintf(warn, "WARNING: Unexpected vcpu data type for d%dv%d on 
proc %d! Expected %d got %d. Not processing\n",
                     v->d->did, v->vid, p->pid,
                     mask.vcpu_data_mode,
@@ -9317,31 +9329,102 @@ char * pcpu_string(int pcpu)
     return s;
 }
 
+/* Null terminated */
+struct pcpu_info *record_order[MAX_CPUS+1] = { 0 };
+
+/* In the case of identical tsc values, the old algorithm would favor the
+ * pcpu with the lowest number.  By default the new algorithm favors the
+ * pcpu which has been processed most recently.
+ *
+ * I think the second way is better; but it's good to be able to use the
+ * old ordering, at very lest to verify that there are no (other) ordering
+ * differences.  Enabling the below flag will cause the insertion / bubble
+ * routines to order by pcpu id as well as tsc, preserving the old order. */
+//#define PRESERVE_PCPU_ORDERING
+
+/* Steady state:
+ * + Entire list is in order, except (potentially) for the first entry
+ * + last is pointing to the first entry.
+ */
+void record_order_bubble(struct pcpu_info *last)
+{
+    int i;
+
+    /* Find the pcpu to "bubble".  This is usually the
+     * first one, but if other pcpus have been activated, it may
+     * not be. */
+    for(i=0; record_order[i] && record_order[i]!=last; i++);
+
+    assert(record_order[i]);
+
+    /* Now bubble it down */
+    for( ;
+        record_order[i+1]
+             && ( record_order[i+1]->order_tsc < last->order_tsc
+#ifdef PRESERVE_PCPU_ORDERING
+                  || ( record_order[i+1]->order_tsc == last->order_tsc
+                       && record_order[i+1]->pid < last->pid )
+#endif
+                 ) ;
+        i++)
+        record_order[i]=record_order[i+1];
+    record_order[i]=last;
+}
+
+void record_order_insert(struct pcpu_info *new)
+{
+    int i;
+    struct pcpu_info *p=NULL, *t=NULL;
+
+    /* Sanity check: Make sure it's not already in there */
+    for(i=0; record_order[i]; i++)
+        assert(record_order[i]!=new);
+
+    /* Find where to insert it */
+    for(i=0;
+        record_order[i]
+             && ( record_order[i]->order_tsc < new->order_tsc
+#ifdef PRESERVE_PCPU_ORDERING
+                  || ( record_order[i]->order_tsc == new->order_tsc
+                       && record_order[i]->pid < new->pid )
+#endif
+                 ) ;
+        i++)
+        ;
+
+    /* And insert it */
+    for( p=new; p ; i++)
+    {
+        t=record_order[i];
+        record_order[i]=p;
+        p=t;
+    }
+}
+
+void record_order_remove(struct pcpu_info *rem)
+{
+    int i;
+
+    /* Find where the record is */
+    for(i=0; record_order[i] && record_order[i]!=rem; i++)
+        ;
+
+    /* Sanity check: Make sure it's actually there! */
+    assert(record_order[i]);
+
+    /* And move everyone forward */
+    for(; (record_order[i]=record_order[i+1]); i++) 
+        ;
+}
+
 struct pcpu_info * choose_next_record(void)
 {
-    int i;
-    struct pcpu_info * p, *min_p=NULL;
-    loff_t min_offset = 0;
-
-    /* Need to:
-     * - find the pcpu with the lowest order_tsc
-     * - Find the lowest file offset
-     */
-    for(i=0; i<=P.max_active_pcpu; i++)
-    {
-        p = P.pcpu+i;
-        if(!p->active)
-            continue;
-
-        if(!min_p || p->order_tsc < min_p->order_tsc)
-            min_p = p;
-
-        if(!min_offset || p->file_offset < min_offset)
-            min_offset = p->file_offset;
-    }
-
-    if(opt.progress && min_offset >= G.progress.update_offset)
-        progress_update(min_offset);
+    struct pcpu_info *min_p=NULL;
+
+    min_p=record_order[0];
+
+    if(opt.progress && min_p && min_p->file_offset >= G.progress.update_offset)
+        progress_update(min_p->file_offset);
 
     /* If there are active pcpus, make sure we chose one */
     assert(min_p || (P.max_active_pcpu==-1));
@@ -9372,6 +9455,9 @@ void process_records(void) {
         else
             read_record(G.fd, p);
 
+        /* Update this pcpu in the processing order */
+        if ( p->active )
+            record_order_bubble(p);
     }
 }
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.