[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] xen: Implement domain runstates



To help simplify the load configuration of servers, I developed the
idea of a "domain runstate".  The "runstate" of a domain is defined
by the runstate of its constituent vcpus.  The "states" are defined
as follows:

Blocked: All vcpus blocked
Partial run: Some vcpus running, some blocked
Partial contention: Some vcpus waiting for the cpu, some blocked
Full run: All vcpus running
Concurrency hazard: Some vcpus running, some waiting for the cpu
Full contention: All vcpus waiting for the cpu

The idea is that by looking at the amount of time in each state
over the last unit of time, an administrator (or an automated
load-balancing program) can determine whether their vcpu and/or
domain configuration needs to be tweaked.  For example:

If a VM spends the majority of its time in "full run", it may
not have enough vcpus to do its work.

If a VM spends a large amount of time in full contention, the system
is probably too busy, and some workloads should be moved onto other
servers.

If a VM spends a large amount of time in concurrency hazard, it means
that the VM may be wasting a lot of time in spinlocks and other
synchronization (like cache misses).  Either work should be moved off
the machine, or the number of vcpus assigned to the VM should be
reduced.

The state is protected by a lock, but to avoid tricky deadlock
issues, if the lock cannot be acquired, the state is simply not
updated.  Number of lost state updates is recorded.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/domain.c
--- a/xen/common/domain.c       Mon Nov 22 19:16:34 2010 +0000
+++ b/xen/common/domain.c       Tue Nov 23 11:25:50 2010 +0000
@@ -238,6 +238,7 @@
     spin_lock_init_prof(d, domain_lock);
     spin_lock_init_prof(d, page_alloc_lock);
     spin_lock_init(&d->hypercall_deadlock_mutex);
+    spin_lock_init(&d->runstate_lock);
     INIT_PAGE_LIST_HEAD(&d->page_list);
     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/domctl.c
--- a/xen/common/domctl.c       Mon Nov 22 19:16:34 2010 +0000
+++ b/xen/common/domctl.c       Tue Nov 23 11:25:50 2010 +0000
@@ -970,6 +970,24 @@
     }
     break;
 
+    case XEN_DOMCTL_get_runstate_info:
+    {
+        struct domain *d;
+
+        ret = -ESRCH;
+        d = rcu_lock_domain_by_id(op->domain);
+        if ( d != NULL )
+        {
+            domain_runstate_get(d, &op->u.domain_runstate);
+            ret = 0;
+
+            rcu_unlock_domain(d);
+
+            if ( copy_to_guest(u_domctl, op, 1) )
+                ret = -EFAULT;
+        }
+        break;
+    }
     default:
         ret = arch_do_domctl(op, u_domctl);
         break;
diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/schedule.c
--- a/xen/common/schedule.c     Mon Nov 22 19:16:34 2010 +0000
+++ b/xen/common/schedule.c     Tue Nov 23 11:25:50 2010 +0000
@@ -135,10 +135,31 @@
     }
 }
 
+/* Used to quickly map the vcpu runstate mask to a domain runstate */
+static int mask_to_state[] = {
+    /* 000: Nothing in any runstate.  Should never happen. */
+    -1,
+    /* 001: All running */
+    DOMAIN_RUNSTATE_full_run,
+    /* 010: All runnable */
+    DOMAIN_RUNSTATE_full_contention,
+    /* 011: Some running, some runnable */
+    DOMAIN_RUNSTATE_concurrency_hazard,
+    /* 100: All blocked / offline */
+    DOMAIN_RUNSTATE_blocked,
+    /* 101: Some running, some blocked / offline */
+    DOMAIN_RUNSTATE_partial_run,
+    /* 110: Some blocked / offline, some runnable */
+    DOMAIN_RUNSTATE_partial_contention,
+    /* 111: Some in every state.  Mixed running + runnable is most important. 
*/
+    DOMAIN_RUNSTATE_concurrency_hazard
+};
+
 static inline void vcpu_runstate_change(
     struct vcpu *v, int new_state, s_time_t new_entry_time)
 {
     s_time_t delta;
+    struct domain *d = v->domain;
 
     ASSERT(v->runstate.state != new_state);
     ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock));
@@ -155,6 +176,37 @@
     }
 
     v->runstate.state = new_state;
+
+    /* Update domain runstate */
+    if(spin_trylock(&d->runstate_lock)) {
+        unsigned mask=0;
+        struct vcpu *ov;
+        
+        BUG_ON(d->runstate.state > DOMAIN_RUNSTATE_partial_contention);
+        
+        d->runstate.time[d->runstate.state] +=
+            (new_entry_time - d->runstate.state_entry_time);
+        d->runstate.state_entry_time = new_entry_time;
+        
+        /* Determine new runstate.  First, see what states we have */
+        for_each_vcpu(d, ov) {
+            /* Don't count vcpus that have beent taken offline by the guest */ 
+            if(! (ov->runstate.state == RUNSTATE_offline
+                  && test_bit(_VPF_down, &ov->pause_flags)) )
+                mask |= (1 << ov->runstate.state);
+        }
+        
+        BUG_ON(mask == 0);
+        
+        /* Offline & blocked are the same */
+        mask |= ((1 << RUNSTATE_offline) & mask) >> 1;
+        
+        d->runstate.state = mask_to_state[mask&0x7];
+        
+        spin_unlock(&d->runstate_lock);
+    } else {
+        atomic_inc(&d->runstate_missed_changes);
+    }
 }
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
@@ -173,6 +225,18 @@
         vcpu_schedule_unlock_irq(v);
 }
 
+void domain_runstate_get(struct domain *d,
+                         domain_runstate_info_t *runstate)
+{
+    spin_lock(&d->runstate_lock);
+
+    memcpy(runstate, &d->runstate, sizeof(*runstate));
+    runstate->time[d->runstate.state] += NOW() - runstate->state_entry_time;
+    runstate->missed_changes = atomic_read(&d->runstate_missed_changes);
+
+    spin_unlock(&d->runstate_lock);
+}
+
 uint64_t get_cpu_idle_time(unsigned int cpu)
 {
     struct vcpu_runstate_info state;
diff -r c0c1f5f0745e -r 692200e7ce1e xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Mon Nov 22 19:16:34 2010 +0000
+++ b/xen/include/public/domctl.h       Tue Nov 23 11:25:50 2010 +0000
@@ -806,6 +806,46 @@
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t);
 #endif
 
+/*
+ * Return information about the state and running time of a domain.
+ * The "domain runstate" is based on the runstates of all the vcpus of the
+ * domain (see below).
+ * @extra_arg == pointer to domain_runstate_info structure.
+ */
+struct xen_domctl_runstate_info {
+    /* Domain's's current state (DOMAIN_RUNSTATE_*). */
+    uint32_t      state;
+    /* Number of times we missed an update due to contention */
+    uint32_t missed_changes;
+    /* When was current state entered (system time, ns)? */
+    uint64_t state_entry_time;
+    /*
+     * Time spent in each RUNSTATE_* (ns). The sum of these times is
+     * NOT guaranteed not to drift from system time.
+     */
+    uint64_t time[6];
+};
+typedef struct xen_domctl_runstate_info xen_domctl_runstate_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_runstate_info_t);
+
+/* All vcpus are running */
+#define DOMAIN_RUNSTATE_full_run           0
+
+/* All vcpus are runnable (i.e., waiting for cpu) */
+#define DOMAIN_RUNSTATE_full_contention    1
+
+/* Some vcpus are running, some are runnable */
+#define DOMAIN_RUNSTATE_concurrency_hazard 2
+
+/* All vcpus are blocked / offline */
+#define DOMAIN_RUNSTATE_blocked            3
+
+/* Some vpcus are running, some are blocked */
+#define DOMAIN_RUNSTATE_partial_run        4
+
+/* Some vcpus are runnable, some are blocked */
+#define DOMAIN_RUNSTATE_partial_contention 5
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -868,6 +908,7 @@
 #define XEN_DOMCTL_getpageframeinfo3             61
 #define XEN_DOMCTL_setvcpuextstate               62
 #define XEN_DOMCTL_getvcpuextstate               63
+#define XEN_DOMCTL_get_runstate_info             64
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -923,6 +964,7 @@
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
+        struct xen_domctl_runstate_info     domain_runstate;
         uint8_t                             pad[128];
     } u;
 };
diff -r c0c1f5f0745e -r 692200e7ce1e xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Nov 22 19:16:34 2010 +0000
+++ b/xen/include/xen/sched.h   Tue Nov 23 11:25:50 2010 +0000
@@ -200,6 +200,8 @@
     int xen_port;
 };
 
+typedef struct xen_domctl_runstate_info domain_runstate_info_t;
+
 struct domain
 {
     domid_t          domain_id;
@@ -332,6 +334,11 @@
     nodemask_t node_affinity;
     unsigned int last_alloc_node;
     spinlock_t node_affinity_lock;
+
+    /* Domain runstate tracking */
+    spinlock_t runstate_lock;
+    atomic_t runstate_missed_changes;
+    domain_runstate_info_t runstate;
 };
 
 struct domain_setup_info
@@ -614,6 +621,7 @@
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
+void domain_runstate_get(struct domain *d, domain_runstate_info_t *runstate);
 uint64_t get_cpu_idle_time(unsigned int cpu);
 
 /*

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.