[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/4] xen: credit2: allow to set and get utilization cap



As cap is already present in Credit1, as a parameter, all
the wiring is there already for it to be percolate down
to csched2_dom_cntl() too.

In this commit, we actually deal with it, and implement
setting, changing or disabling the cap of a domain.

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
---
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Anshul Makkar <anshul.makkar@xxxxxxxxxx>
---
 xen/common/sched_credit2.c  |  119 +++++++++++++++++++++++++++++++++++++++++--
 xen/include/public/domctl.h |    1 
 2 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index ba4bf4b..3f7b8f0 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -2498,30 +2498,35 @@ csched2_dom_cntl(
     struct csched2_dom * const sdom = csched2_dom(d);
     struct csched2_private *prv = csched2_priv(ops);
     unsigned long flags;
+    struct vcpu *v;
     int rc = 0;
 
     /*
      * Locking:
      *  - we must take the private lock for accessing the weights of the
-     *    vcpus of d,
+     *    vcpus of d, and/or the cap;
      *  - in the putinfo case, we also need the runqueue lock(s), for
      *    updating the max waight of the runqueue(s).
+     *    If changing the cap, we also need the budget_lock, for updating
+     *    the value of the domain budget pool (and the runqueue lock,
+     *    for adjusting the parameters and rescheduling any vCPU that is
+     *    running at the time of the change).
      */
     switch ( op->cmd )
     {
     case XEN_DOMCTL_SCHEDOP_getinfo:
         read_lock_irqsave(&prv->lock, flags);
         op->u.credit2.weight = sdom->weight;
+        op->u.credit2.cap = sdom->cap;
         read_unlock_irqrestore(&prv->lock, flags);
         break;
     case XEN_DOMCTL_SCHEDOP_putinfo:
+        write_lock_irqsave(&prv->lock, flags);
+        /* Weight */
         if ( op->u.credit2.weight != 0 )
         {
-            struct vcpu *v;
             int old_weight;
 
-            write_lock_irqsave(&prv->lock, flags);
-
             old_weight = sdom->weight;
 
             sdom->weight = op->u.credit2.weight;
@@ -2539,9 +2544,113 @@ csched2_dom_cntl(
 
                 vcpu_schedule_unlock(lock, svc->vcpu);
             }
+        }
+        /* Cap */
+        if ( op->u.credit2.cap != 0 )
+        {
+            spin_lock(&sdom->budget_lock);
+            sdom->tot_budget = (CSCHED2_BDGT_REPL_PERIOD / 100) * 
op->u.credit2.cap;
+            spin_unlock(&sdom->budget_lock);
+
+            if ( sdom->cap == 0 )
+            {
+                /*
+                 * Let's give to the domain the budget it is entitled of,
+                 * and queue its first replenishment event.
+                 *
+                 * Since cap is currently disabled for this domain, we
+                 * know no vCPU is messing with the domain's budget, and
+                 * the replenishment timer is still off.
+                 * For these reasons, it is safe to do the following without
+                 * taking the budget_lock.
+                 */
+                sdom->budget = sdom->tot_budget;
+                sdom->next_repl = NOW() + CSCHED2_BDGT_REPL_PERIOD;
+                set_timer(&sdom->repl_timer, sdom->next_repl);
+
+                /*
+                 * Now, let's enable budget accounting for all the vCPUs.
+                 * For making sure that they will start to honour the domain's
+                 * cap, we set their budget to 0.
+                 * This way, as soon as they will try to run, they will have
+                 * to get some budget.
+                 *
+                 * For the vCPUs that are already running, we trigger the
+                 * scheduler on their pCPU. When, as a consequence of this,
+                 * csched2_schedule() will run, it will figure out there is
+                 * no budget, and the vCPU will try to get some (and be parked,
+                 * if there's none, and we'll switch to someone else).
+                 */
+                for_each_vcpu ( d, v )
+                {
+                    struct csched2_vcpu *svc = csched2_vcpu(v);
+                    spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
+
+                    if ( v->is_running )
+                    {
+                        unsigned int cpu = v->processor;
+                        struct csched2_runqueue_data *rqd = c2rqd(ops, cpu);
+
+                        ASSERT(curr_on_cpu(cpu) == v);
+
+                        /*
+                         * We are triggering a reschedule on the vCPU's
+                         * pCPU. That will run burn_credits() and, since
+                         * the vCPU is capped now, it would charge all the
+                         * execution time of this last round as budget as
+                         * well. That will make the vCPU budget go negative,
+                         * potentially by a large amount, and it's unfair.
+                         *
+                         * To avoid that, call burn_credit() here, to do the
+                         * accounting of this current running instance now,
+                         * with budgetting still disabled. This does not
+                         * prevent some small amount of budget being charged
+                         * to the vCPU (i.e., the amount of time it runs from
+                         * now, to when scheduling happens). The budget will
+                         * also go below 0, but a lot less than how it would
+                         * if we don't do this.
+                         */
+                        burn_credits(rqd, svc, NOW());
+                        __cpumask_set_cpu(cpu, &rqd->tickled);
+                        ASSERT(!cpumask_test_cpu(cpu, &rqd->smt_idle));
+                        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+                    }
+                    svc->budget = 0;
+                    vcpu_schedule_unlock(lock, svc->vcpu);
+                }
+            }
+            sdom->cap = op->u.credit2.cap;
+        }
+        else if ( sdom->cap != 0 )
+        {
+            stop_timer(&sdom->repl_timer);
+
+            /* Disable budget accounting for all the vCPUs. */
+            for_each_vcpu ( d, v )
+            {
+                struct csched2_vcpu *svc = csched2_vcpu(v);
+                spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
+
+                svc->budget = STIME_MAX;
 
-            write_unlock_irqrestore(&prv->lock, flags);
+                vcpu_schedule_unlock(lock, svc->vcpu);
+            }
+            sdom->cap = 0;
+            /*
+             * We are disabling the cap for this domain, which may have vCPUs
+             * waiting for a replenishment, and we need to unpark them all.
+             * Parked vcpus sit in the domain's parked_vcpus list, which would
+             * require being manipulated with the budget_lock held. However,
+             * we have already disabled budget accounting for all the vCPUs of
+             * this domain in the loop above, and therefore, no vCPU will run
+             * out of budget and need being added to the list.
+             *
+             * For this reason, it is safe, in this case, to just go ahead and
+             * drain the list, without the need of taking the budget_lock.
+             */
+            unpark_parked_vcpus(ops, &sdom->parked_vcpus);
         }
+        write_unlock_irqrestore(&prv->lock, flags);
         break;
     default:
         rc = -EINVAL;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 515c603..795972e 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -355,6 +355,7 @@ typedef struct xen_domctl_sched_credit {
 
 typedef struct xen_domctl_sched_credit2 {
     uint16_t weight;
+    uint16_t cap;
 } xen_domctl_sched_credit2_t;
 
 typedef struct xen_domctl_sched_rtds {


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.