Xen project Mailing List

[Xen-devel] [PATCH v2 1/1] ratelimit: Implement rate limit for credit2 scheduler Rate limit assures that a vcpu will execute for a minimum amount of time before being put at the back of a queue or being preempted by higher priority thread.

To: <xen-devel@xxxxxxxxxxxxx>

From: Anshul Makkar <anshul.makkar@xxxxxxxxxx>

Date: Mon, 18 Jul 2016 13:22:24 +0100

Cc: george.dunlap@xxxxxxxxxxxxx, dario.faggioli@xxxxxxxxxx, Anshul Makkar <anshul.makkar@xxxxxxxxxx>

Delivery-date: Mon, 18 Jul 2016 12:22:48 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

It introduces a minimum amount of latency to enable a VM to batch its work and it also ensures that system is not spending most of its time in VMEXIT/VMENTRY because of VM that is waking/sleeping at high rate. ratelimit can be disabled by setting it to 0. Signed-off-by: Anshul Makkar <anshul.makkar@xxxxxxxxxx> --- Changes in v2: * algo for time slice calculation based on ratelimit_us has changed. * initial value of prv->ratelimit_us. * other changes based on review comments. --- xen/common/sched_credit2.c | 124 +++++++++++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 31 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 8b95a47..68bcdb8 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -280,6 +280,7 @@ struct csched2_private { struct csched2_runqueue_data rqd[NR_CPUS]; unsigned int load_window_shift; + unsigned ratelimit_us; /* each cpupool can have its onw ratelimit */ }; /* @@ -1601,6 +1602,34 @@ csched2_dom_cntl( return rc; } +static int csched2_sys_cntl(const struct scheduler *ops, + struct xen_sysctl_scheduler_op *sc) +{ + int rc = -EINVAL; + xen_sysctl_credit_schedule_t *params = &sc->u.sched_credit; + struct csched2_private *prv = CSCHED2_PRIV(ops); + unsigned long flags; + + switch (sc->cmd ) + { + case XEN_SYSCTL_SCHEDOP_putinfo: + if ( params->ratelimit_us && + ( params->ratelimit_us < CSCHED2_MIN_TIMER || + params->ratelimit_us > MICROSECS(CSCHED2_MAX_TIMER) )) + return rc; + spin_lock_irqsave(&prv->lock, flags); + prv->ratelimit_us = params->ratelimit_us; + spin_unlock_irqrestore(&prv->lock, flags); + break; + + case XEN_SYSCTL_SCHEDOP_getinfo: + params->ratelimit_us = prv->ratelimit_us; + rc = 0; + break; + } + return rc; +} + static void * csched2_alloc_domdata(const struct scheduler *ops, struct domain *dom) { @@ -1670,12 +1699,14 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom) /* How long should we let this vcpu run for? */ static s_time_t -csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext) +csched2_runtime(const struct scheduler *ops, int cpu, + struct csched2_vcpu *snext, s_time_t now) { - s_time_t time; + s_time_t time, min_time; int rt_credit; /* Proposed runtime measured in credits */ struct csched2_runqueue_data *rqd = RQD(ops, cpu); struct list_head *runq = &rqd->runq; + struct csched2_private *prv = CSCHED2_PRIV(ops); /* * If we're idle, just stay so. Others (or external events) @@ -1688,9 +1719,20 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext * 1) Run until snext's credit will be 0 * 2) But if someone is waiting, run until snext's credit is equal * to his - * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER. + * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER or + * run for ratelimit time. */ + /* Calculate mintime */ + min_time = CSCHED2_MIN_TIMER; + if ( prv->ratelimit_us ) { + s_time_t ratelimit_min = prv->ratelimit_us; + ratelimit_min = snext->vcpu->runstate.state_entry_time + + MICROSECS(prv->ratelimit_us) - now; + if ( ratelimit_min > min_time ) + min_time = ratelimit_min; + } + /* 1) Basic time: Run until credit is 0. */ rt_credit = snext->credit; @@ -1707,32 +1749,33 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext } } - /* The next guy may actually have a higher credit, if we've tried to - * avoid migrating him from a different cpu. DTRT. */ - if ( rt_credit <= 0 ) + /* + * The next guy ont the runqueue may actually have a higher credit, + * if we've tried to avoid migrating him from a different cpu. + * Setting time=0 will ensure the minimum timeslice is chosen. + * FIXME: See if we can eliminate this conversion if we know time + * will be outside (MIN,MAX). Probably requires pre-calculating + * credit values of MIN,MAX per vcpu, since each vcpu burns credit + * at a different rate. + */ + if (rt_credit > 0) + time = c2t(rqd, rt_credit, snext); + else + time = 0; + + /* + * Never run longer than MAX_TIMER or less than MIN_TIMER or for + * rate_limit time. + */ + if ( time < min_time) { - time = CSCHED2_MIN_TIMER; - SCHED_STAT_CRANK(runtime_min_timer); + time = min_time; + SCHED_STAT_CRANK(runtime_min_timer); } - else + else if (time > CSCHED2_MAX_TIMER) { - /* FIXME: See if we can eliminate this conversion if we know time - * will be outside (MIN,MAX). Probably requires pre-calculating - * credit values of MIN,MAX per vcpu, since each vcpu burns credit - * at a different rate. */ - time = c2t(rqd, rt_credit, snext); - - /* Check limits */ - if ( time < CSCHED2_MIN_TIMER ) - { - time = CSCHED2_MIN_TIMER; - SCHED_STAT_CRANK(runtime_min_timer); - } - else if ( time > CSCHED2_MAX_TIMER ) - { - time = CSCHED2_MAX_TIMER; - SCHED_STAT_CRANK(runtime_max_timer); - } + time = CSCHED2_MAX_TIMER; + SCHED_STAT_CRANK(runtime_max_timer); } return time; @@ -1746,7 +1789,7 @@ void __dump_execstate(void *unused); static struct csched2_vcpu * runq_candidate(struct csched2_runqueue_data *rqd, struct csched2_vcpu *scurr, - int cpu, s_time_t now) + int cpu, s_time_t now, struct csched2_private *prv) { struct list_head *iter; struct csched2_vcpu *snext = NULL; @@ -1757,6 +1800,17 @@ runq_candidate(struct csched2_runqueue_data *rqd, else snext = CSCHED2_VCPU(idle_vcpu[cpu]); + /* + * Return the current vcpu if it has executed for less than ratelimit. + * Adjuststment for the selected vcpu's credit and decision + * for how long it will run will be taken in csched2_runtime. + */ + if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) && + vcpu_runnable(scurr->vcpu) && + (now - scurr->vcpu->runstate.state_entry_time) < + MICROSECS(prv->ratelimit_us) ) + return scurr; + list_for_each( iter, &rqd->runq ) { struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem); @@ -1775,9 +1829,13 @@ runq_candidate(struct csched2_runqueue_data *rqd, } /* If the next one on the list has more credit than current - * (or idle, if current is not runnable), choose it. */ + * (or idle, if current is not runnable) and current one has already + * executed for more than ratelimit. choose it. + * Control has reached here means that current vcpu has executed > + * ratelimit_us or ratelimit is off, so chose the next one. + */ if ( svc->credit > snext->credit ) - snext = svc; + snext = svc; /* In any case, if we got this far, break. */ break; @@ -1800,6 +1858,7 @@ csched2_schedule( struct csched2_vcpu * const scurr = CSCHED2_VCPU(current); struct csched2_vcpu *snext = NULL; struct task_slice ret; + struct csched2_private *prv = CSCHED2_PRIV(ops); SCHED_STAT_CRANK(schedule); CSCHED2_VCPU_CHECK(current); @@ -1870,7 +1929,7 @@ csched2_schedule( snext = CSCHED2_VCPU(idle_vcpu[cpu]); } else - snext=runq_candidate(rqd, scurr, cpu, now); + snext=runq_candidate(rqd, scurr, cpu, now, prv); /* If switching from a non-idle runnable vcpu, put it * back on the runqueue. */ @@ -1934,7 +1993,7 @@ csched2_schedule( /* * Return task to run next... */ - ret.time = csched2_runtime(ops, cpu, snext); + ret.time = csched2_runtime(ops, cpu, snext, now); ret.task = snext->vcpu; CSCHED2_VCPU_CHECK(ret.task); @@ -2366,6 +2425,8 @@ csched2_init(struct scheduler *ops) prv->runq_map[i] = -1; prv->rqd[i].id = -1; } + /* initialize ratelimit */ + prv->ratelimit_us = sched_ratelimit_us; prv->load_window_shift = opt_load_window_shift; @@ -2398,6 +2459,7 @@ static const struct scheduler sched_credit2_def = { .wake = csched2_vcpu_wake, .adjust = csched2_dom_cntl, + .adjust_global = csched2_sys_cntl, .pick_cpu = csched2_cpu_pick, .migrate = csched2_vcpu_migrate, -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.