[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2] Modified RTDS scheduler to use an event-driven model instead of polling.



All,

Sorry for the repeated send to the mailing list, I forgot to add some
cc's and wanted to make sure
everyone was included.

This is the new patch that works towards how Dario suggested
structuring the event-driven move.
It uses the previous timer to drive the rt_schedule function, which
enforces budget depletions and
performs scheduling decisions.

There is an added timer that only handles replenishments, which is
called at the time the next
replenishment will occur. To do this, we now also keep the depletedq
sorted. If it detects it has
moved a vCPU into the first [# CPUs] slots in the runq, it tickles the
runq with the added vCPU.
If the depletedq becomes empty the timer is stopped, and if the
scheduler moves a vCPU into
a previously empty depletedq it restarts the timer.

This may have some issues with corner cases that were discussed
earlier, such as unexpected
behavior if the two timers are armed for the same time. It should be
correct for the common case.

Dario, let me know if this is closer to what you envisioned.

~Dagaen

On Sat, Jun 27, 2015 at 3:46 PM, Dagaen Golomb <dgolomb@xxxxxxxxxxxxxx> wrote:
> To do this, we create a new list that holds, for each
> vcpu, the time least into the future that it may need to be
> rescheduled. The scheduler chooses the lowest time off of this
> list and waits until the specified time instead of running every
> 1 ms as it did before.
>
> Signed-off-by: Dagaen Golomb <dgolomb@xxxxxxxxxxxxxx>
> Signed-off-by: Meng Xu <mengxu@xxxxxxxxxxxxx>
> ---
>  xen/common/sched_rt.c |  100 
> +++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 93 insertions(+), 7 deletions(-)
>
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 4372486..cce3446 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -4,6 +4,7 @@
>   *
>   * by Sisu Xi, 2013, Washington University in Saint Louis
>   * and Meng Xu, 2014, University of Pennsylvania
> + * and Dagaen Golomb, 2015, University of Pennsylvania
>   *
>   * based on the code of credit Scheduler
>   */
> @@ -152,6 +153,8 @@ struct rt_private {
>      struct list_head sdom;      /* list of availalbe domains, used for dump 
> */
>      struct list_head runq;      /* ordered list of runnable vcpus */
>      struct list_head depletedq; /* unordered list of depleted vcpus */
> +    struct timer replenishment_timer;
> +    unsigned NUM_CPUS;
>      cpumask_t tickled;          /* cpus been tickled */
>  };
>
> @@ -178,6 +181,8 @@ struct rt_vcpu {
>      unsigned flags;             /* mark __RTDS_scheduled, etc.. */
>  };
>
> +static void runq_tickle(const struct scheduler *, struct rt_vcpu *);
> +
>  /*
>   * Domain
>   */
> @@ -391,13 +396,19 @@ __q_remove(struct rt_vcpu *svc)
>   * Insert svc with budget in RunQ according to EDF:
>   * vcpus with smaller deadlines go first.
>   * Insert svc without budget in DepletedQ unsorted;
> + *
> + * Returns the position, 1-indexed, of where the item was inserted.
> + * Returns a positive index if placed on runq, and a negative index (the 
> index
> + *   in the depletedq * -1) if placed on the depletedq
>   */
> -static void
> +static int
>  __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
>  {
>      struct rt_private *prv = rt_priv(ops);
>      struct list_head *runq = rt_runq(ops);
> +    struct list_head *depletedq = rt_depletedq(ops);
>      struct list_head *iter;
> +    unsigned inserted_index = 1;
>
>      ASSERT( spin_is_locked(&prv->lock) );
>
> @@ -411,15 +422,77 @@ __runq_insert(const struct scheduler *ops, struct 
> rt_vcpu *svc)
>              struct rt_vcpu * iter_svc = __q_elem(iter);
>              if ( svc->cur_deadline <= iter_svc->cur_deadline )
>                      break;
> +            else
> +                ++inserted_index;
>           }
>          list_add_tail(&svc->q_elem, iter);
> +        return inserted_index;
>      }
>      else
>      {
> -        list_add(&svc->q_elem, &prv->depletedq);
> +        // If we are inserting into previously empty depletedq, rearm
> +        //   rearm replenish timer. It will handle further scheduling until
> +        //   the depletedq is empty again (if ever)
> +        if( list_empty(depletedq) )
> +            set_timer( &prv->replenishment_timer, svc->cur_deadline );
> +
> +        list_for_each(iter, depletedq)
> +        {
> +            struct rt_vcpu * iter_svc = __q_elem(iter);
> +            if ( svc->cur_deadline <= iter_svc->cur_deadline )
> +                break;
> +            else
> +                ++inserted_index;
> +         }
> +        list_add_tail(&svc->q_elem, iter);
> +        return -inserted_index;
>      }
>  }
>
> +static void replenishment_handler(void* data)
> +{
> +    const struct scheduler *ops = data;
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *depletedq = rt_depletedq(ops);
> +    struct list_head *iter;
> +    struct list_head *tmp;
> +    struct rt_vcpu *svc = NULL;
> +    s_time_t now = NOW();
> +    int new_position = 0;
> +    unsigned long flags;
> +
> +    spin_lock_irqsave(&prv->lock, flags);
> +
> +    // Replenish the vCPU that triggered this, and resort it into runq
> +    list_for_each_safe(iter, tmp, depletedq)
> +    {
> +        svc = __q_elem(iter);
> +        if ( now >= svc->cur_deadline )
> +        {
> +            rt_update_deadline(now, svc);
> +            __q_remove(svc); /* remove from depleted queue */
> +            new_position = __runq_insert(ops, svc); /* add to runq */
> +        }
> +        else break; // leave list_for_each_safe
> +    }
> +
> +    // If we become one of top [# CPUs] in the runq, tickle it
> +    // TODO: make this work when multiple tickles are required
> +    if ( new_position > 0 && new_position <= prv->NUM_CPUS )
> +        runq_tickle(ops, svc);
> +
> +    // Use first item on deletedq to schedule next replenishment.
> +    // If no replenishments are pending, disable timer for now
> +    if( !list_empty(depletedq) )
> +    {
> +        struct rt_vcpu * soonest_to_replenish = __q_elem(depletedq->next);
> +        set_timer( &prv->replenishment_timer,
> +                   soonest_to_replenish->cur_deadline );
> +    }
> +
> +    spin_unlock_irqrestore(&prv->lock, flags);
> +}
> +
>  /*
>   * Init/Free related code
>   */
> @@ -427,6 +500,7 @@ static int
>  rt_init(struct scheduler *ops)
>  {
>      struct rt_private *prv = xzalloc(struct rt_private);
> +    int cpu = 0;
>
>      printk("Initializing RTDS scheduler\n"
>             "WARNING: This is experimental software in development.\n"
> @@ -450,6 +524,15 @@ rt_init(struct scheduler *ops)
>      INIT_LIST_HEAD(&prv->runq);
>      INIT_LIST_HEAD(&prv->depletedq);
>
> +    // Replenishment timer, for now always on CPU 0
> +    init_timer(&prv->replenishment_timer, replenishment_handler, ops, 0);
> +    // Calculate number of pCPUs present.
> +    // Note: assumes does not change online.
> +    for_each_present_cpu(cpu)
> +    {
> +        ++(prv->NUM_CPUS);
> +    }
> +
>      cpumask_clear(&prv->tickled);
>
>      ops->sched_data = prv;
> @@ -466,6 +549,8 @@ rt_deinit(const struct scheduler *ops)
>  {
>      struct rt_private *prv = rt_priv(ops);
>
> +    kill_timer(&prv->replenishment_timer);
> +
>      ASSERT( _cpumask_scratch && nr_rt_ops > 0 );
>
>      if ( (--nr_rt_ops) == 0 )
> @@ -653,8 +738,7 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu 
> *vc)
>      BUG_ON( sdom == NULL );
>
>      lock = vcpu_schedule_lock_irq(vc);
> -    if ( __vcpu_on_q(svc) )
> -        __q_remove(svc);
> +    __q_remove(svc);
>      vcpu_schedule_unlock_irq(lock, vc);
>
>      if ( !is_idle_vcpu(vc) )
> @@ -838,6 +922,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
> bool_t tasklet_work_sched
>  {
>      const int cpu = smp_processor_id();
>      struct rt_private *prv = rt_priv(ops);
> +    struct list_head *runq = rt_runq(ops);
>      struct rt_vcpu *const scurr = rt_vcpu(current);
>      struct rt_vcpu *snext = NULL;
>      struct task_slice ret = { .migrated = 0 };
> @@ -848,8 +933,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
> bool_t tasklet_work_sched
>      /* burn_budget would return for IDLE VCPU */
>      burn_budget(ops, scurr, now);
>
> -    __repl_update(ops, now);
> -
>      if ( tasklet_work_scheduled )
>      {
>          snext = rt_vcpu(idle_vcpu[cpu]);
> @@ -889,7 +972,10 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
> bool_t tasklet_work_sched
>          }
>      }
>
> -    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
> +    /* Use minimum from runq */
> +    ret.time = MAX_SCHEDULE;
> +    if( !list_empty(runq) )
> +        ret.time = __q_elem(runq->next)->cur_deadline;
>      ret.task = snext->vcpu;
>
>      /* TRACE */
> --
> 1.7.9.5
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.