# HG changeset patch
# User Emmanuel Ackaouy <ack@xxxxxxxxxxxxx>
# Node ID bb6cd7ba259b7552e2f46f986c1580350af10517
# Parent 32e4952c063866165d9cab913f67b57f8e78aded
[XEN] Initial support for multi-core and multi-threaded CPU scheduling.
In multi-core and multi-threaded systems, not all idling "CPUs" are
equal: When there are idling execution vehicles, it's better to spread
VCPUs across sockets and cores before co-scheduling cores and threads.
Signed-off-by: Emmanuel Ackaouy <ack@xxxxxxxxxxxxx>
---
xen/common/sched_credit.c | 210 ++++++++++++++++++++++++++++++++++++++++-----
xen/common/sched_sedf.c | 9 +
xen/common/schedule.c | 4
xen/include/xen/sched-if.h | 1
4 files changed, 202 insertions(+), 22 deletions(-)
diff -r 32e4952c0638 -r bb6cd7ba259b xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Mon Nov 06 16:55:56 2006 +0000
+++ b/xen/common/sched_credit.c Mon Nov 06 17:32:00 2006 +0000
@@ -115,6 +115,10 @@
_MACRO(steal_peer_idle) \
_MACRO(steal_peer_running) \
_MACRO(steal_peer_pinned) \
+ _MACRO(steal_peer_migrating) \
+ _MACRO(steal_peer_best_idler) \
+ _MACRO(steal_loner_candidate) \
+ _MACRO(steal_loner_signal) \
_MACRO(dom_init) \
_MACRO(dom_destroy) \
_MACRO(vcpu_init) \
@@ -370,8 +374,42 @@ __csched_vcpu_check(struct vcpu *vc)
#define CSCHED_VCPU_CHECK(_vc)
#endif
+/*
+ * Indicates which of two given idlers is most efficient to run
+ * an additional VCPU.
+ *
+ * Returns:
+ * 0: They are the same.
+ * negative: One is less efficient than Two.
+ * positive: One is more efficient than Two.
+ */
+static int
+csched_idler_compare(int one, int two)
+{
+ cpumask_t idlers;
+ cpumask_t one_idlers;
+ cpumask_t two_idlers;
+
+ idlers = csched_priv.idlers;
+ cpu_clear(one, idlers);
+ cpu_clear(two, idlers);
+
+ if ( cpu_isset(one, cpu_core_map[two]) )
+ {
+ cpus_and(one_idlers, idlers, cpu_sibling_map[one]);
+ cpus_and(two_idlers, idlers, cpu_sibling_map[two]);
+ }
+ else
+ {
+ cpus_and(one_idlers, idlers, cpu_core_map[one]);
+ cpus_and(two_idlers, idlers, cpu_core_map[two]);
+ }
+
+ return cpus_weight(one_idlers) - cpus_weight(two_idlers);
+}
+
static inline int
-__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
{
/*
* Don't pick up work that's in the peer's scheduling tail. Also only pick
@@ -386,6 +424,32 @@ __csched_vcpu_is_stealable(int local_cpu
if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
{
CSCHED_STAT_CRANK(steal_peer_pinned);
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int
+__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_pinned);
+ return 0;
+ }
+
+ if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_migrating);
+ return 0;
+ }
+
+ if ( csched_idler_compare(local_cpu, vc->processor) <= 0 )
+ {
+ CSCHED_STAT_CRANK(steal_peer_best_idler);
return 0;
}
@@ -652,6 +716,64 @@ csched_dom_destroy(struct domain *dom)
xfree(sdom);
}
+static int
+csched_cpu_pick(struct vcpu *vc)
+{
+ cpumask_t cpus;
+ int cpu, nxt;
+
+ /*
+ * Pick from online CPUs in VCPU's affinity mask, giving a
+ * preference to its current processor if it's in there.
+ */
+ cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+ ASSERT( !cpus_empty(cpus) );
+ cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus);
+
+ /*
+ * Try to find an idle processor within the above constraints.
+ */
+ cpus_and(cpus, cpus, csched_priv.idlers);
+ if ( !cpus_empty(cpus) )
+ {
+ cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus);
+ cpu_clear(cpu, cpus);
+
+ /*
+ * In multi-core and multi-threaded CPUs, not all idle execution
+ * vehicles are equal!
+ *
+ * We give preference to the idle execution vehicle with the most
+ * idling neighbours in its grouping. This distributes work across
+ * distinct cores first and guarantees we don't do something stupid
+ * like run two VCPUs on co-hyperthreads while there are idle cores
+ * or sockets.
+ */
+ while ( !cpus_empty(cpus) )
+ {
+ nxt = first_cpu(cpus);
+
+ if ( csched_idler_compare(cpu, nxt) < 0 )
+ {
+ cpu = nxt;
+ cpu_clear(nxt, cpus);
+ }
+ else if ( cpu_isset(cpu, cpu_core_map[nxt]) )
+ {
+ cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]);
+ }
+ else
+ {
+ cpus_andnot(cpus, cpus, cpu_core_map[nxt]);
+ }
+
+ ASSERT( !cpu_isset(nxt, cpus) );
+ }
+ }
+
+ return cpu;
+}
+
/*
* This is a O(n) optimized sort of the runq.
*
@@ -939,7 +1061,7 @@ csched_runq_steal(struct csched_pcpu *sp
vc = speer->vcpu;
BUG_ON( is_idle_vcpu(vc) );
- if ( __csched_vcpu_is_stealable(cpu, vc) )
+ if ( __csched_queued_vcpu_is_stealable(cpu, vc) )
{
/* We got a candidate. Grab it! */
__runq_remove(speer);
@@ -959,6 +1081,7 @@ csched_load_balance(int cpu, struct csch
struct csched_pcpu *spc;
struct vcpu *peer_vcpu;
cpumask_t workers;
+ cpumask_t loners;
int peer_cpu;
if ( snext->pri == CSCHED_PRI_IDLE )
@@ -971,6 +1094,7 @@ csched_load_balance(int cpu, struct csch
/*
* Peek at non-idling CPUs in the system
*/
+ cpus_clear(loners);
cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
cpu_clear(cpu, workers);
@@ -999,13 +1123,12 @@ csched_load_balance(int cpu, struct csch
continue;
}
+ peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
spc = CSCHED_PCPU(peer_cpu);
- peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
if ( unlikely(spc == NULL) )
{
CSCHED_STAT_CRANK(steal_peer_down);
- speer = NULL;
}
else if ( unlikely(is_idle_vcpu(peer_vcpu)) )
{
@@ -1014,26 +1137,72 @@ csched_load_balance(int cpu, struct csch
* pick up work from it itself.
*/
CSCHED_STAT_CRANK(steal_peer_idle);
- speer = NULL;
+ }
+ else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) )
+ {
+ if ( snext->pri == CSCHED_PRI_IDLE &&
+ __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
+ {
+ CSCHED_STAT_CRANK(steal_loner_candidate);
+ cpu_set(peer_cpu, loners);
+ }
}
else
{
- /* Try to steal work from an online non-idle CPU. */
+ /* Try to steal work from a remote CPU's runq. */
speer = csched_runq_steal(spc, cpu, snext->pri);
+ if ( speer != NULL )
+ {
+ spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
+ CSCHED_STAT_CRANK(vcpu_migrate);
+ speer->stats.migrate++;
+ return speer;
+ }
}
spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-
- /* Got one? */
- if ( speer )
- {
- CSCHED_STAT_CRANK(vcpu_migrate);
- speer->stats.migrate++;
- return speer;
- }
- }
-
- /* Failed to find more important work */
+ }
+
+ /*
+ * If we failed to find any remotely queued VCPUs to move here,
+ * see if it would be more efficient to move any of the running
+ * remote VCPUs over here.
+ */
+ while ( !cpus_empty(loners) )
+ {
+ /* For each CPU of interest, starting with our neighbour... */
+ peer_cpu = next_cpu(peer_cpu, loners);
+ if ( peer_cpu == NR_CPUS )
+ peer_cpu = first_cpu(loners);
+
+ cpu_clear(peer_cpu, loners);
+
+ if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) )
+ {
+ CSCHED_STAT_CRANK(steal_trylock_failed);
+ continue;
+ }
+
+ peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
+ spc = CSCHED_PCPU(peer_cpu);
+
+ if ( !is_idle_vcpu(peer_vcpu) &&
+ is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) &&
+ __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
+ {
+ set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags);
+ spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
+
+ CSCHED_STAT_CRANK(steal_loner_signal);
+ cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ);
+ }
+ else
+ {
+ spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
+ }
+ }
+
+ /* Failed to find more important work elsewhere... */
__runq_remove(snext);
return snext;
}
@@ -1139,9 +1308,11 @@ csched_dump_pcpu(int cpu)
spc = CSCHED_PCPU(cpu);
runq = &spc->runq;
- printk(" tick=%lu, sort=%d\n",
+ printk(" tick=%lu, sort=%d, sibling=0x%lx, core=0x%lx\n",
per_cpu(schedule_data, cpu).tick,
- spc->runq_sort_last);
+ spc->runq_sort_last,
+ cpu_sibling_map[cpu].bits[0],
+ cpu_core_map[cpu].bits[0]);
/* current VCPU */
svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
@@ -1247,6 +1418,7 @@ struct scheduler sched_credit_def = {
.adjust = csched_dom_cntl,
+ .pick_cpu = csched_cpu_pick,
.tick = csched_tick,
.do_schedule = csched_schedule,
diff -r 32e4952c0638 -r bb6cd7ba259b xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Mon Nov 06 16:55:56 2006 +0000
+++ b/xen/common/sched_sedf.c Mon Nov 06 17:32:00 2006 +0000
@@ -409,6 +409,14 @@ static void sedf_destroy_domain(struct d
static void sedf_destroy_domain(struct domain *d)
{
xfree(d->sched_priv);
+}
+
+static int sedf_pick_cpu(struct vcpu *v)
+{
+ cpumask_t online_affinity;
+
+ cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
+ return first_cpu(online_affinity);
}
/*
@@ -1436,6 +1444,7 @@ struct scheduler sched_sedf_def = {
.destroy_vcpu = sedf_destroy_vcpu,
.do_schedule = sedf_do_schedule,
+ .pick_cpu = sedf_pick_cpu,
.dump_cpu_state = sedf_dump_cpu_state,
.sleep = sedf_sleep,
.wake = sedf_wake,
diff -r 32e4952c0638 -r bb6cd7ba259b xen/common/schedule.c
--- a/xen/common/schedule.c Mon Nov 06 16:55:56 2006 +0000
+++ b/xen/common/schedule.c Mon Nov 06 17:32:00 2006 +0000
@@ -203,7 +203,6 @@ void vcpu_wake(struct vcpu *v)
static void vcpu_migrate(struct vcpu *v)
{
- cpumask_t online_affinity;
unsigned long flags;
int old_cpu;
@@ -218,8 +217,7 @@ static void vcpu_migrate(struct vcpu *v)
/* Switch to new CPU, then unlock old CPU. */
old_cpu = v->processor;
- cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
- v->processor = first_cpu(online_affinity);
+ v->processor = SCHED_OP(pick_cpu, v);
spin_unlock_irqrestore(
&per_cpu(schedule_data, old_cpu).schedule_lock, flags);
diff -r 32e4952c0638 -r bb6cd7ba259b xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Mon Nov 06 16:55:56 2006 +0000
+++ b/xen/include/xen/sched-if.h Mon Nov 06 17:32:00 2006 +0000
@@ -74,6 +74,7 @@ struct scheduler {
struct task_slice (*do_schedule) (s_time_t);
+ int (*pick_cpu) (struct vcpu *);
int (*adjust) (struct domain *,
struct xen_domctl_scheduler_op *);
void (*dump_settings) (void);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|