diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c index 26c6f0f129..4fc6f3a3c5 100644 --- a/xen/common/sched_null.c +++ b/xen/common/sched_null.c @@ -565,50 +565,52 @@ static void null_vcpu_wake(const struct scheduler *ops, struct vcpu *v) else SCHED_STAT_CRANK(vcpu_wake_not_runnable); + if ( likely(per_cpu(npc, cpu).vcpu == v) ) + { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + return; + } + /* * If a vcpu is neither on a pCPU nor in the waitqueue, it means it was - * offline, and that it is now coming back being online. + * offline, and that it is now coming back being online. If we're lucky, + * and v->processor is free (and affinities match), we can just assign + * the vcpu to it (we own the proper lock already) and be done. */ - if ( unlikely(per_cpu(npc, cpu).vcpu != v && list_empty(&nvc->waitq_elem)) ) + if ( per_cpu(npc, cpu).vcpu == NULL && + vcpu_check_affinity(v, cpu, BALANCE_HARD_AFFINITY) ) { - spin_lock(&prv->waitq_lock); - list_add_tail(&nvc->waitq_elem, &prv->waitq); - spin_unlock(&prv->waitq_lock); - - cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, - cpupool_domain_cpumask(v->domain)); - - if ( !cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) ) + if ( !has_soft_affinity(v) || + vcpu_check_affinity(v, cpu, BALANCE_SOFT_AFFINITY) ) { - dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n", - v->domain->domain_id, v->vcpu_id); + vcpu_assign(prv, v, cpu); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); return; } + } - /* - * Now we would want to assign the vcpu to cpu, but we can't, because - * we don't have the lock. So, let's do the following: - * - try to remove cpu from the list of free cpus, to avoid races with - * other onlining, inserting or migrating operations; - * - tickle the cpu, which will pickup work from the waitqueue, and - * assign it to itself; - * - if we're racing already, and if there still are free cpus, try - * again. - */ - while ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) ) - { - unsigned int new_cpu = pick_cpu(prv, v); + /* + * If v->processor is not free (or affinities do not match) we need + * to assign v to some other CPU, but we can't do it here, as: + * - we don't own the proper lock, + * - we can't change v->processor under vcpu_wake()'s feet. + * So we add it to the waitqueue, and tickle all the free CPUs (if any) + * on which v can run. The first one that schedules will pick it up. + */ + spin_lock(&prv->waitq_lock); + list_add_tail(&nvc->waitq_elem, &prv->waitq); + spin_unlock(&prv->waitq_lock); - if ( test_and_clear_bit(new_cpu, &prv->cpus_free) ) - { - cpu_raise_softirq(new_cpu, SCHEDULE_SOFTIRQ); - return; - } - } - } + cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, + cpupool_domain_cpumask(v->domain)); + cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), + &prv->cpus_free); - /* Note that we get here only for vCPUs assigned to a pCPU */ - cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ); + if ( cpumask_empty(cpumask_scratch_cpu(cpu)) ) + dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n", + v->domain->domain_id, v->vcpu_id); + else + cpumask_raise_softirq(cpumask_scratch_cpu(cpu), SCHEDULE_SOFTIRQ); } static void null_vcpu_sleep(const struct scheduler *ops, struct vcpu *v) @@ -822,6 +824,8 @@ static struct task_slice null_schedule(const struct scheduler *ops, */ if ( unlikely(ret.task == NULL) ) { + bool vcpu_found; + spin_lock(&prv->waitq_lock); if ( list_empty(&prv->waitq) ) @@ -834,6 +838,7 @@ static struct task_slice null_schedule(const struct scheduler *ops, * it only in cases where a pcpu has no vcpu associated (e.g., as * said above, the cpu has just joined a cpupool). */ + vcpu_found = false; for_each_affinity_balance_step( bs ) { list_for_each_entry( wvc, &prv->waitq, waitq_elem ) @@ -844,13 +849,44 @@ static struct task_slice null_schedule(const struct scheduler *ops, if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) ) { - vcpu_assign(prv, wvc->vcpu, cpu); - list_del_init(&wvc->waitq_elem); - ret.task = wvc->vcpu; - goto unlock; + spinlock_t *lock; + + vcpu_found = true; + + /* + * If the vcpu in the waitqueue has just come up online, + * we risk racing with vcpu_wake(). To avoid this, sync + * on the spinlock that vcpu_wake() holds, while waking up + * this vcpu (but only with trylock, or we may deadlock). + */ + lock = pcpu_schedule_trylock(wvc->vcpu->processor); + + /* + * We know the vcpu's lock is not this cpu's lock. In + * fact, if it were, since this cpu is free, vcpu_wake() + * would have assigned the vcpu to this cpu directly. + */ + ASSERT(lock != per_cpu(schedule_data, cpu).schedule_lock); + + if ( lock ) { + vcpu_assign(prv, wvc->vcpu, cpu); + list_del_init(&wvc->waitq_elem); + ret.task = wvc->vcpu; + spin_unlock(lock); + goto unlock; + } } } } + /* + * If we did find a vcpu with suitable affinity in the waitqueue, but + * we could not pick it up (due to lock contention), and hence we are + * still free, plan for another try. In fact, we don't want such vcpu + * to be stuck in the waitqueue, when there are free cpus where it + * could run. + */ + if ( unlikely( vcpu_found && ret.task == NULL && !list_empty(&prv->waitq)) ) + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); unlock: spin_unlock(&prv->waitq_lock);