Xen project Mailing List

[Xen-devel] [PATCH v2 46/48] xen/sched: support core scheduling for moving cpus to/from cpupools

From: Juergen Gross <jgross@xxxxxxxx>

Date: Fri, 9 Aug 2019 16:58:31 +0200

Cc: Juergen Gross <jgross@xxxxxxxx>, Tim Deegan <tim@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>, George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Ian Jackson <ian.jackson@xxxxxxxxxxxxx>, Dario Faggioli <dfaggioli@xxxxxxxx>, Julien Grall <julien.grall@xxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>

Delivery-date: Fri, 09 Aug 2019 14:59:45 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

With core scheduling active it is necessary to move multiple cpus at the same time to or from a cpupool in order to avoid split scheduling resources in between. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- V1: new patch --- xen/common/cpupool.c | 100 +++++++++++++++++++++++++++++++++------------ xen/common/schedule.c | 3 +- xen/include/xen/sched-if.h | 1 + 3 files changed, 76 insertions(+), 28 deletions(-) diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c index 41d594dace..6917488210 100644 --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -265,23 +265,30 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) { int ret; struct domain *d; + const cpumask_t *cpus; + + cpus = sched_get_opt_cpumask(c->opt_granularity, cpu); if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) ) return -EADDRNOTAVAIL; - ret = schedule_cpu_add(cpu, c); + ret = schedule_cpu_add(cpumask_first(cpus), c); if ( ret ) return ret; - cpumask_clear_cpu(cpu, &cpupool_free_cpus); + rcu_read_lock(&sched_res_rculock); + + cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus); if (cpupool_moving_cpu == cpu) { cpupool_moving_cpu = -1; cpupool_put(cpupool_cpu_moving); cpupool_cpu_moving = NULL; } - cpumask_set_cpu(cpu, c->cpu_valid); + cpumask_or(c->cpu_valid, c->cpu_valid, cpus); cpumask_and(c->res_valid, c->cpu_valid, sched_res_mask); + rcu_read_unlock(&sched_res_rculock); + rcu_read_lock(&domlist_read_lock); for_each_domain_in_cpupool(d, c) { @@ -295,6 +302,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) static int cpupool_unassign_cpu_epilogue(struct cpupool *c) { int cpu = cpupool_moving_cpu; + const cpumask_t *cpus; struct domain *d; int ret; @@ -307,7 +315,10 @@ static int cpupool_unassign_cpu_epilogue(struct cpupool *c) */ rcu_read_lock(&domlist_read_lock); ret = cpu_disable_scheduler(cpu); - cpumask_set_cpu(cpu, &cpupool_free_cpus); + + rcu_read_lock(&sched_res_rculock); + cpus = get_sched_res(cpu)->cpus; + cpumask_or(&cpupool_free_cpus, &cpupool_free_cpus, cpus); /* * cpu_disable_scheduler() returning an error doesn't require resetting @@ -320,7 +331,7 @@ static int cpupool_unassign_cpu_epilogue(struct cpupool *c) { ret = schedule_cpu_rm(cpu); if ( ret ) - cpumask_clear_cpu(cpu, &cpupool_free_cpus); + cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus); else { cpupool_moving_cpu = -1; @@ -328,6 +339,7 @@ static int cpupool_unassign_cpu_epilogue(struct cpupool *c) cpupool_cpu_moving = NULL; } } + rcu_read_unlock(&sched_res_rculock); for_each_domain_in_cpupool(d, c) { @@ -342,6 +354,7 @@ static int cpupool_unassign_cpu_prologue(struct cpupool *c, unsigned int cpu) { int ret; struct domain *d; + const cpumask_t *cpus; spin_lock(&cpupool_lock); ret = -EADDRNOTAVAIL; @@ -352,7 +365,11 @@ static int cpupool_unassign_cpu_prologue(struct cpupool *c, unsigned int cpu) if ( !cpumask_test_cpu(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) ) goto out; - if ( (c->n_dom > 0) && (cpumask_weight(c->cpu_valid) == 1) && + rcu_read_lock(&sched_res_rculock); + cpus = get_sched_res(cpu)->cpus; + + if ( (c->n_dom > 0) && + (cpumask_weight(c->cpu_valid) == cpumask_weight(cpus)) && (cpu != cpupool_moving_cpu) ) { rcu_read_lock(&domlist_read_lock); @@ -374,9 +391,10 @@ static int cpupool_unassign_cpu_prologue(struct cpupool *c, unsigned int cpu) cpupool_moving_cpu = cpu; atomic_inc(&c->refcnt); cpupool_cpu_moving = c; - cpumask_clear_cpu(cpu, c->cpu_valid); + cpumask_andnot(c->cpu_valid, c->cpu_valid, cpus); cpumask_and(c->res_valid, c->cpu_valid, sched_res_mask); + rcu_read_unlock(&domlist_read_lock); out: spin_unlock(&cpupool_lock); @@ -416,11 +434,13 @@ static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) { int work_cpu; int ret; + unsigned int master_cpu; cpupool_dprintk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n", c->cpupool_id, cpu); - ret = cpupool_unassign_cpu_prologue(c, cpu); + master_cpu = sched_get_resource_cpu(cpu); + ret = cpupool_unassign_cpu_prologue(c, master_cpu); if ( ret ) { cpupool_dprintk("cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n", @@ -428,12 +448,12 @@ static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) return ret; } - work_cpu = smp_processor_id(); - if ( work_cpu == cpu ) + work_cpu = sched_get_resource_cpu(smp_processor_id()); + if ( work_cpu == master_cpu ) { work_cpu = cpumask_first(cpupool0->cpu_valid); - if ( work_cpu == cpu ) - work_cpu = cpumask_next(cpu, cpupool0->cpu_valid); + if ( work_cpu == master_cpu ) + work_cpu = cpumask_last(cpupool0->cpu_valid); } return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c); } @@ -499,6 +519,7 @@ void cpupool_rm_domain(struct domain *d) static int cpupool_cpu_add(unsigned int cpu) { int ret = 0; + const cpumask_t *cpus; spin_lock(&cpupool_lock); cpumask_clear_cpu(cpu, &cpupool_locked_cpus); @@ -512,7 +533,11 @@ static int cpupool_cpu_add(unsigned int cpu) */ rcu_read_lock(&sched_res_rculock); get_sched_res(cpu)->cpupool = NULL; - ret = cpupool_assign_cpu_locked(cpupool0, cpu); + + cpus = sched_get_opt_cpumask(cpupool0->opt_granularity, cpu); + if ( cpumask_subset(cpus, &cpupool_free_cpus) ) + ret = cpupool_assign_cpu_locked(cpupool0, cpu); + rcu_read_unlock(&sched_res_rculock); spin_unlock(&cpupool_lock); @@ -547,27 +572,33 @@ static void cpupool_cpu_remove(unsigned int cpu) static int cpupool_cpu_remove_prologue(unsigned int cpu) { int ret = 0; + cpumask_t *cpus; + unsigned int master_cpu; spin_lock(&cpupool_lock); - if ( cpumask_test_cpu(cpu, &cpupool_locked_cpus) ) + rcu_read_lock(&sched_res_rculock); + cpus = get_sched_res(cpu)->cpus; + master_cpu = sched_get_resource_cpu(cpu); + if ( cpumask_intersects(cpus, &cpupool_locked_cpus) ) ret = -EBUSY; else cpumask_set_cpu(cpu, &cpupool_locked_cpus); + rcu_read_unlock(&sched_res_rculock); spin_unlock(&cpupool_lock); if ( ret ) return ret; - if ( cpumask_test_cpu(cpu, cpupool0->cpu_valid) ) + if ( cpumask_test_cpu(master_cpu, cpupool0->cpu_valid) ) { /* Cpupool0 is populated only after all cpus are up. */ ASSERT(system_state == SYS_STATE_active); - ret = cpupool_unassign_cpu_prologue(cpupool0, cpu); + ret = cpupool_unassign_cpu_prologue(cpupool0, master_cpu); } - else if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) ) + else if ( !cpumask_test_cpu(master_cpu, &cpupool_free_cpus) ) ret = -ENODEV; return ret; @@ -584,12 +615,13 @@ static void cpupool_cpu_remove_forced(unsigned int cpu) { struct cpupool **c; int ret; + unsigned int master_cpu = sched_get_resource_cpu(cpu); for_each_cpupool ( c ) { - if ( cpumask_test_cpu(cpu, (*c)->cpu_valid) ) + if ( cpumask_test_cpu(master_cpu, (*c)->cpu_valid) ) { - ret = cpupool_unassign_cpu_prologue(*c, cpu); + ret = cpupool_unassign_cpu_prologue(*c, master_cpu); BUG_ON(ret); ret = cpupool_unassign_cpu_epilogue(*c); BUG_ON(ret); @@ -657,27 +689,43 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op) case XEN_SYSCTL_CPUPOOL_OP_ADDCPU: { unsigned cpu; + const cpumask_t *cpus; cpu = op->cpu; cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d)\n", op->cpupool_id, cpu); + spin_lock(&cpupool_lock); + + c = cpupool_find_by_id(op->cpupool_id); + ret = -ENOENT; + if ( c == NULL ) + goto addcpu_out; if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY ) - cpu = cpumask_first(&cpupool_free_cpus); + { + for_each_cpu ( cpu, &cpupool_free_cpus ) + { + cpus = sched_get_opt_cpumask(c->opt_granularity, cpu); + if ( cpumask_subset(cpus, &cpupool_free_cpus) ) + break; + } + ret = -ENODEV; + if ( cpu >= nr_cpu_ids ) + goto addcpu_out; + } ret = -EINVAL; if ( cpu >= nr_cpu_ids ) goto addcpu_out; ret = -ENODEV; - if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) || - cpumask_test_cpu(cpu, &cpupool_locked_cpus) ) - goto addcpu_out; - c = cpupool_find_by_id(op->cpupool_id); - ret = -ENOENT; - if ( c == NULL ) + cpus = sched_get_opt_cpumask(c->opt_granularity, cpu); + if ( !cpumask_subset(cpus, &cpupool_free_cpus) || + cpumask_intersects(cpus, &cpupool_locked_cpus) ) goto addcpu_out; ret = cpupool_assign_cpu_locked(c, cpu); + addcpu_out: spin_unlock(&cpupool_lock); + cpupool_dprintk("cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n", op->cpupool_id, cpu, ret); } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index a4555fd0fa..e0521de8ce 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -2555,8 +2555,7 @@ static struct notifier_block cpu_schedule_nfb = { .notifier_call = cpu_schedule_callback }; -static const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, - unsigned int cpu) +const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu) { const cpumask_t *mask; diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index 5a93ba3686..c152547a94 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -624,5 +624,6 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step, } void sched_rm_cpu(unsigned int cpu); +const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu); #endif /* __XEN_SCHED_IF_H__ */ -- 2.16.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.