[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC v1] xen:rtds: towards work conserving RTDS



Make RTDS scheduler work conserving to utilize the idle resource,
without breaking the real-time guarantees.

VCPU model:
Each real-time VCPU is extended to have a work conserving flag
and a priority_level field.
When a VCPU's budget is depleted in the current period,
if it has work conserving flag set,
its priority_level will increase by 1 and its budget will be refilled;
othewrise, the VCPU will be moved to the depletedq.

Scheduling policy: modified global EDF:
A VCPU v1 has higher priority than another VCPU v2 if
(i) v1 has smaller priority_leve; or
(ii) v1 has the same priority_level but has a smaller deadline

Signed-off-by: Meng Xu <mengxu@xxxxxxxxxxxxx>
---
 xen/common/sched_rt.c | 71 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 39f6bee..740a712 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -49,13 +49,16 @@
  * A PCPU is feasible if the VCPU can run on this PCPU and (the PCPU is idle or
  * has a lower-priority VCPU running on it.)
  *
- * Each VCPU has a dedicated period and budget.
+ * Each VCPU has a dedicated period, budget and is_work_conserving flag
  * The deadline of a VCPU is at the end of each period;
  * A VCPU has its budget replenished at the beginning of each period;
  * While scheduled, a VCPU burns its budget.
  * The VCPU needs to finish its budget before its deadline in each period;
  * The VCPU discards its unused budget at the end of each period.
- * If a VCPU runs out of budget in a period, it has to wait until next period.
+ * A work conserving VCPU has is_work_conserving flag set to true;
+ * When a VCPU runs out of budget in a period, if it is work conserving,
+ * it increases its priority_level by 1 and refill its budget; otherwise,
+ * it has to wait until next period.
  *
  * Each VCPU is implemented as a deferable server.
  * When a VCPU has a task running on it, its budget is continuously burned;
@@ -63,7 +66,8 @@
  *
  * Queue scheme:
  * A global runqueue and a global depletedqueue for each CPU pool.
- * The runqueue holds all runnable VCPUs with budget, sorted by deadline;
+ * The runqueue holds all runnable VCPUs with budget,
+ * sorted by priority_level and deadline;
  * The depletedqueue holds all VCPUs without budget, unsorted;
  *
  * Note: cpumask and cpupool is supported.
@@ -191,6 +195,7 @@ struct rt_vcpu {
     /* VCPU parameters, in nanoseconds */
     s_time_t period;
     s_time_t budget;
+    bool_t is_work_conserving;   /* is vcpu work conserving */
 
     /* VCPU current infomation in nanosecond */
     s_time_t cur_budget;         /* current budget */
@@ -201,6 +206,8 @@ struct rt_vcpu {
     struct rt_dom *sdom;
     struct vcpu *vcpu;
 
+    unsigned priority_level;
+
     unsigned flags;              /* mark __RTDS_scheduled, etc.. */
 };
 
@@ -245,6 +252,11 @@ static inline struct list_head *rt_replq(const struct 
scheduler *ops)
     return &rt_priv(ops)->replq;
 }
 
+static inline bool_t is_work_conserving(const struct rt_vcpu *svc)
+{
+    return svc->is_work_conserving;
+}
+
 /*
  * Helper functions for manipulating the runqueue, the depleted queue,
  * and the replenishment events queue.
@@ -273,6 +285,20 @@ vcpu_on_replq(const struct rt_vcpu *svc)
     return !list_empty(&svc->replq_elem);
 }
 
+/* If v1 priority >= v2 priority, return value > 0
+ * Otherwise, return value < 0
+ */
+static int
+compare_vcpu_priority(const struct rt_vcpu *v1, const struct rt_vcpu *v2)
+{
+    if ( v1->priority_level < v2->priority_level ||
+         ( v1->priority_level == v2->priority_level && 
+             v1->cur_deadline <= v2->cur_deadline ) )
+            return 1;
+    else
+        return -1;
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -303,6 +329,7 @@ rt_dump_vcpu(const struct scheduler *ops, const struct 
rt_vcpu *svc)
     cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), mask);
     printk("[%5d.%-2u] cpu %u, (%"PRI_stime", %"PRI_stime"),"
            " cur_b=%"PRI_stime" cur_d=%"PRI_stime" last_start=%"PRI_stime"\n"
+           " \t\t priority_level=%d work_conserving=%d\n"
            " \t\t onQ=%d runnable=%d flags=%x effective hard_affinity=%s\n",
             svc->vcpu->domain->domain_id,
             svc->vcpu->vcpu_id,
@@ -312,6 +339,8 @@ rt_dump_vcpu(const struct scheduler *ops, const struct 
rt_vcpu *svc)
             svc->cur_budget,
             svc->cur_deadline,
             svc->last_start,
+            svc->priority_level,
+            is_work_conserving(svc),
             vcpu_on_q(svc),
             vcpu_runnable(svc->vcpu),
             svc->flags,
@@ -423,15 +452,18 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc)
      */
     svc->last_start = now;
     svc->cur_budget = svc->budget;
+    svc->priority_level = 0;
 
     /* TRACE */
     {
         struct __packed {
             unsigned vcpu:16, dom:16;
+            unsigned priority_level;
             uint64_t cur_deadline, cur_budget;
         } d;
         d.dom = svc->vcpu->domain->domain_id;
         d.vcpu = svc->vcpu->vcpu_id;
+        d.priority_level = svc->priority_level;
         d.cur_deadline = (uint64_t) svc->cur_deadline;
         d.cur_budget = (uint64_t) svc->cur_budget;
         trace_var(TRC_RTDS_BUDGET_REPLENISH, 1,
@@ -477,7 +509,7 @@ deadline_queue_insert(struct rt_vcpu * (*qelem)(struct 
list_head *),
     list_for_each ( iter, queue )
     {
         struct rt_vcpu * iter_svc = (*qelem)(iter);
-        if ( svc->cur_deadline <= iter_svc->cur_deadline )
+        if ( compare_vcpu_priority(svc, iter_svc) > 0 )
             break;
         pos++;
     }
@@ -537,8 +569,9 @@ runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
     ASSERT( !vcpu_on_q(svc) );
     ASSERT( vcpu_on_replq(svc) );
 
-    /* add svc to runq if svc still has budget */
-    if ( svc->cur_budget > 0 )
+    /* add svc to runq if svc still has budget or svc is work_conserving */
+    if ( svc->cur_budget > 0 ||
+         is_work_conserving(svc) )
         deadline_runq_insert(svc, &svc->q_elem, runq);
     else
         list_add(&svc->q_elem, &prv->depletedq);
@@ -857,6 +890,8 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu 
*vc, void *dd)
     svc->vcpu = vc;
     svc->last_start = 0;
 
+    svc->is_work_conserving = 1;
+    svc->priority_level = 0;
     svc->period = RTDS_DEFAULT_PERIOD;
     if ( !is_idle_vcpu(vc) )
         svc->budget = RTDS_DEFAULT_BUDGET;
@@ -966,8 +1001,16 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu 
*svc, s_time_t now)
 
     if ( svc->cur_budget <= 0 )
     {
-        svc->cur_budget = 0;
-        __set_bit(__RTDS_depleted, &svc->flags);
+        if ( is_work_conserving(svc) )
+        {
+            svc->priority_level++;
+            svc->cur_budget = svc->budget;
+        }
+        else
+        {
+            svc->cur_budget = 0;
+            __set_bit(__RTDS_depleted, &svc->flags);
+        }
     }
 
     /* TRACE */
@@ -976,11 +1019,15 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu 
*svc, s_time_t now)
             unsigned vcpu:16, dom:16;
             uint64_t cur_budget;
             int delta;
+            unsigned priority_level;
+            bool_t is_work_conserving;
         } d;
         d.dom = svc->vcpu->domain->domain_id;
         d.vcpu = svc->vcpu->vcpu_id;
         d.cur_budget = (uint64_t) svc->cur_budget;
         d.delta = delta;
+        d.priority_level = svc->priority_level;
+        d.is_work_conserving = svc->is_work_conserving;
         trace_var(TRC_RTDS_BUDGET_BURN, 1,
                   sizeof(d),
                   (unsigned char *) &d);
@@ -1088,7 +1135,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
              vcpu_runnable(current) &&
              scurr->cur_budget > 0 &&
              ( is_idle_vcpu(snext->vcpu) ||
-               scurr->cur_deadline <= snext->cur_deadline ) )
+               compare_vcpu_priority(scurr, snext) > 0 ) )
             snext = scurr;
     }
 
@@ -1198,13 +1245,13 @@ runq_tickle(const struct scheduler *ops, struct rt_vcpu 
*new)
         }
         iter_svc = rt_vcpu(iter_vc);
         if ( latest_deadline_vcpu == NULL ||
-             iter_svc->cur_deadline > latest_deadline_vcpu->cur_deadline )
+             compare_vcpu_priority(iter_svc, latest_deadline_vcpu) < 0 )
             latest_deadline_vcpu = iter_svc;
     }
 
     /* 3) candicate has higher priority, kick out lowest priority vcpu */
     if ( latest_deadline_vcpu != NULL &&
-         new->cur_deadline < latest_deadline_vcpu->cur_deadline )
+         compare_vcpu_priority(latest_deadline_vcpu, new) < 0 )
     {
         SCHED_STAT_CRANK(tickled_busy_cpu);
         cpu_to_tickle = latest_deadline_vcpu->vcpu->processor;
@@ -1493,7 +1540,7 @@ static void repl_timer_handler(void *data){
         {
             struct rt_vcpu *next_on_runq = q_elem(runq->next);
 
-            if ( svc->cur_deadline > next_on_runq->cur_deadline )
+            if ( compare_vcpu_priority(svc, next_on_runq) < 0 )
                 runq_tickle(ops, next_on_runq);
         }
         else if ( __test_and_clear_bit(__RTDS_depleted, &svc->flags) &&
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.