[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v1 1/5] tasklet: Introduce per-cpu tasklet for softirq.



> >  static void tasklet_enqueue(struct tasklet *t)
> >  {
> >      unsigned int cpu = t->scheduled_on;
> >  
> > +    if ( t->is_percpu )
> > +    {
> > +        unsigned long flags;
> > +        struct list_head *list;
> > +
> > +        INIT_LIST_HEAD(&t->list);
> > +        BUG_ON( !t->is_softirq );
> > +        BUG_ON( cpu != smp_processor_id() ); /* Not implemented yet. */
> > +
> > +        local_irq_save(flags);
> > +
> > +        list = &__get_cpu_var(softirq_list);
> > +        list_add_tail(&t->list, list);
> > +        raise_softirq(TASKLET_SOFTIRQ_PERCPU);
> > +
> > +        local_irq_restore(flags);
> 
> The raise_softirq() call can be done with interrupts re-enabled, which
> reduces the critical window.
> 
> __get_cpu_var() does some inspection of the stack pointer behind your
> back.  It would be far more efficient in the critical window to take

Exactly 4 lines of assembler code :-)

> "unsigned int cpu = smp_processor_id();"  outside and use "per_cpu($FOO,
> cpu)" inside.

<nods> That would indeed be better.
> 
> > +        return;
> > +    }
> >      if ( t->is_softirq )
> >      {
> >          struct list_head *list = &per_cpu(softirq_tasklet_list, cpu);
> > @@ -56,16 +76,25 @@ void tasklet_schedule_on_cpu(struct tasklet *t, 
> > unsigned int cpu)
> >  {
> >      unsigned long flags;
> >  
> > -    spin_lock_irqsave(&tasklet_lock, flags);
> > +    if ( !tasklets_initialised || t->is_dead )
> > +        return;
> >  
> > -    if ( tasklets_initialised && !t->is_dead )
> > +    if ( t->is_percpu )
> >      {
> > -        t->scheduled_on = cpu;
> > -        if ( !t->is_running )
> > +        if ( !test_and_set_bit(TASKLET_STATE_SCHED, &t->state) )
> >          {
> > -            list_del(&t->list);
> > +            t->scheduled_on = cpu;
> >              tasklet_enqueue(t);
> >          }
> > +        return;
> > +    }
> > +    spin_lock_irqsave(&tasklet_lock, flags);
> > +
> > +    t->scheduled_on = cpu;
> > +    if ( !t->is_running )
> > +    {
> > +        list_del(&t->list);
> > +        tasklet_enqueue(t);
> >      }
> >  
> >      spin_unlock_irqrestore(&tasklet_lock, flags);
> > @@ -104,6 +133,66 @@ static void do_tasklet_work(unsigned int cpu, struct 
> > list_head *list)
> >      }
> >  }
> >  
> > +void do_tasklet_work_percpu(void)
> > +{
> > +    struct tasklet *t = NULL;
> > +    struct list_head *head;
> > +    bool_t poke = 0;
> > +
> > +    local_irq_disable();
> > +    head = &__get_cpu_var(softirq_list);
> > +
> > +    if ( !list_empty(head) )
> > +    {
> > +        t = list_entry(head->next, struct tasklet, list);
> > +
> > +        if ( head->next == head->prev ) /* One singular item. Re-init 
> > head. */
> > +            INIT_LIST_HEAD(&__get_cpu_var(softirq_list));
> 
> It would be most efficient to hoist "struct list_head *this_softirq_list
> = &this_cpu(softirq_list);" outside the critical region.

<nods>
> 
> > +        else
> > +        {
> > +            /* Multiple items, update head to skip 't'. */
> > +            struct list_head *list;
> > +
> > +            /* One item past 't'. */
> > +            list = head->next->next;
> > +
> > +            BUG_ON(list == NULL);
> > +
> > +            /* And update head to skip 't'. Note that t->list.prev still
> > +             * points to head, but we don't care as we only process one 
> > tasklet
> > +             * and once done the tasklet list is re-init one way or 
> > another.
> > +             */
> > +            head->next = list;
> > +            poke = 1;
> > +        }
> > +    }
> > +    local_irq_enable();
> > +
> > +    if ( !t )
> > +        return; /* Never saw it happend, but we might have a spurious 
> > case? */
> > +
> > +    if ( tasklet_trylock(t) )
> > +    {
> > +        if ( !test_and_clear_bit(TASKLET_STATE_SCHED, &t->state) )
> > +                BUG();
> > +        sync_local_execstate();
> > +        t->func(t->data);
> > +        tasklet_unlock(t);
> > +        if ( poke )
> > +            raise_softirq(TASKLET_SOFTIRQ_PERCPU);
> > +        /* We could reinit the t->list but tasklet_enqueue does it for us. 
> > */
> > +        return;
> > +    }
> > +
> > +    local_irq_disable();
> > +
> > +    INIT_LIST_HEAD(&t->list);
> > +    list_add_tail(&t->list, &__get_cpu_var(softirq_list));
> > +    smp_wmb();
> 
> Is this needed? all of this infrastructure is local to the cpu.

Artifact of debugging. Thought I do wonder what to do about
ARM. As I understand it, the world of ARM is a wild place
where there is a need for these barriers to exist. But maybe
I have just heard to many stories.

> 
> > +    raise_softirq(TASKLET_SOFTIRQ_PERCPU);
> > +    local_irq_enable();
> > +}
> > +
> >  /* VCPU context work */
> >  void do_tasklet(void)
> >  {
> > @@ -147,10 +236,29 @@ static void tasklet_softirq_action(void)
> >      spin_unlock_irq(&tasklet_lock);
> >  }
> >  
> > +/* Per CPU softirq context work. */
> > +static void tasklet_softirq_percpu_action(void)
> > +{
> > +    do_tasklet_work_percpu();
> > +}
> > +
> >  void tasklet_kill(struct tasklet *t)
> >  {
> >      unsigned long flags;
> >  
> > +    if ( t->is_percpu )
> > +    {
> > +        while ( test_and_set_bit(TASKLET_STATE_SCHED, &t->state) )
> > +        {
> > +            do {
> > +                process_pending_softirqs();
> > +            } while ( test_bit(TASKLET_STATE_SCHED, &t->state) );
> > +        }
> > +        tasklet_unlock_wait(t);
> > +        clear_bit(TASKLET_STATE_SCHED, &t->state);
> > +        t->is_dead = 1;
> > +        return;
> > +    }
> >      spin_lock_irqsave(&tasklet_lock, flags);
> >  
> >      if ( !list_empty(&t->list) )
> > @@ -208,6 +316,14 @@ void softirq_tasklet_init(
> >      t->is_softirq = 1;
> >  }
> >  
> > +void percpu_tasklet_init(
> > +    struct tasklet *t, void (*func)(unsigned long), unsigned long data)
> > +{
> > +    tasklet_init(t, func, data);
> > +    t->is_softirq = 1;
> > +    t->is_percpu = 1;
> > +}
> > +
> >  static int cpu_callback(
> >      struct notifier_block *nfb, unsigned long action, void *hcpu)
> >  {
> > @@ -218,11 +334,13 @@ static int cpu_callback(
> >      case CPU_UP_PREPARE:
> >          INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
> >          INIT_LIST_HEAD(&per_cpu(softirq_tasklet_list, cpu));
> > +        INIT_LIST_HEAD(&per_cpu(softirq_list, cpu));
> >          break;
> >      case CPU_UP_CANCELED:
> >      case CPU_DEAD:
> >          migrate_tasklets_from_cpu(cpu, &per_cpu(tasklet_list, cpu));
> >          migrate_tasklets_from_cpu(cpu, &per_cpu(softirq_tasklet_list, 
> > cpu));
> > +        migrate_tasklets_from_cpu(cpu, &per_cpu(softirq_list, cpu));
> >          break;
> >      default:
> >          break;
> > @@ -242,6 +360,7 @@ void __init tasklet_subsys_init(void)
> >      cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
> >      register_cpu_notifier(&cpu_nfb);
> >      open_softirq(TASKLET_SOFTIRQ, tasklet_softirq_action);
> > +    open_softirq(TASKLET_SOFTIRQ_PERCPU, tasklet_softirq_percpu_action);
> >      tasklets_initialised = 1;
> >  }
> >  
> > diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
> > index ef75b94..740cee5 100644
> > --- a/xen/drivers/passthrough/io.c
> > +++ b/xen/drivers/passthrough/io.c
> > @@ -114,7 +114,7 @@ int pt_irq_create_bind(
> >              spin_unlock(&d->event_lock);
> >              return -ENOMEM;
> >          }
> > -        softirq_tasklet_init(
> > +        percpu_tasklet_init(
> >              &hvm_irq_dpci->dirq_tasklet,
> >              hvm_dirq_assist, (unsigned long)d);
> >          for ( i = 0; i < NR_HVM_IRQS; i++ )
> > diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
> > index 0c0d481..8b15c8c 100644
> > --- a/xen/include/xen/softirq.h
> > +++ b/xen/include/xen/softirq.h
> > @@ -7,6 +7,7 @@ enum {
> >      SCHEDULE_SOFTIRQ,
> >      NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
> >      RCU_SOFTIRQ,
> > +    TASKLET_SOFTIRQ_PERCPU,
> >      TASKLET_SOFTIRQ,
> >      NR_COMMON_SOFTIRQS
> >  };
> > diff --git a/xen/include/xen/tasklet.h b/xen/include/xen/tasklet.h
> > index 8c3de7e..9497c47 100644
> > --- a/xen/include/xen/tasklet.h
> > +++ b/xen/include/xen/tasklet.h
> > @@ -17,21 +17,24 @@
> >  struct tasklet
> >  {
> >      struct list_head list;
> > +    unsigned long state;
> >      int scheduled_on;
> >      bool_t is_softirq;
> >      bool_t is_running;
> >      bool_t is_dead;
> > +    bool_t is_percpu;
> >      void (*func)(unsigned long);
> >      unsigned long data;
> >  };
> >  
> > -#define _DECLARE_TASKLET(name, func, data, softirq)                     \
> > +#define _DECLARE_TASKLET(name, func, data, softirq, percpu)             \
> >      struct tasklet name = {                                             \
> > -        LIST_HEAD_INIT(name.list), -1, softirq, 0, 0, func, data }
> > +        LIST_HEAD_INIT(name.list), 0, -1, softirq, 0, 0, percpu,        \
> > +        func, data }
> >  #define DECLARE_TASKLET(name, func, data)               \
> > -    _DECLARE_TASKLET(name, func, data, 0)
> > +    _DECLARE_TASKLET(name, func, data, 0, 0)
> >  #define DECLARE_SOFTIRQ_TASKLET(name, func, data)       \
> > -    _DECLARE_TASKLET(name, func, data, 1)
> > +    _DECLARE_TASKLET(name, func, data, 1, 0)
> >  
> >  /* Indicates status of tasklet work on each CPU. */
> >  DECLARE_PER_CPU(unsigned long, tasklet_work_to_do);
> > @@ -40,6 +43,54 @@ DECLARE_PER_CPU(unsigned long, tasklet_work_to_do);
> >  #define TASKLET_enqueued   (1ul << _TASKLET_enqueued)
> >  #define TASKLET_scheduled  (1ul << _TASKLET_scheduled)
> >  
> > +/* These fancy bit manipulations (bit 0 and bit 1) along with using a lock
> > + * operation allow us to have four stages in tasklet life-time.

s/./:/

> > + *  a) 0x0: Completely empty (not scheduled nor running).
> > + *  b) 0x1: Scheduled but not running. Used to guard in 'tasklet_schedule'

s/to guard/as a guard/
> > + *     such that we will only schedule one. If it is scheduled and had 
> > never
> > + *     run (hence never clearing STATE_SCHED bit), tasklet_kill will spin
> > + *     forever on said tasklet. However 'tasklet_schedule' raises the
> > + *     softirq associated with the per-cpu - so it will run, albeit there 
> > might
> > + *     be a race (tasklet_kill spinning until the softirq handler runs).
> > + *  c) 0x2: it is running (only on one CPU) and can be scheduled on any
> > + *     CPU. The bit 0 - scheduled is cleared at this stage allowing
> > + *     'tasklet_schedule' to succesfully schedule.
> > + *  d) 0x3: scheduled and running - only possible if the running tasklet 
> > calls
> > + *     tasklet_schedule (on same CPU) or the tasklet is scheduled from 
> > another
> > + *     CPU while the tasklet is running on another CPU.
> > + *
> > + * The two bits play a vital role in assuring that the tasklet is scheduled
> > + * once and runs only once. The steps are:
> > + *
> > + *  1) tasklet_schedule: STATE_SCHED bit set (0x1), added on the per cpu 
> > list.
> > + *  2) tasklet_softirq_percpu_action picks one tasklet from the list. 
> > Schedules
> > + *  itself later if there are more tasklets on it. Tries to set STATE_RUN 
> > bit
> > + *  (0x3) - if it fails adds tasklet back to the per-cpu list. If it 
> > succeeds
> > + *  clears the STATE_SCHED bit (0x2). Once tasklet completed, unsets 
> > STATE_RUN

s/completed/completes/

> > + *  (0x0 or 0x1 if tasklet called tasklet_schedule).
> > + */
> > +enum {
> > +    TASKLET_STATE_SCHED, /* Bit 0 */
> > +    TASKLET_STATE_RUN
> > +};
> > +
> > +static inline int tasklet_trylock(struct tasklet *t)
> > +{
> > +    return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
> 
> No need for brackets around (t) for these static inlines.

<nods>
> 
> > +}
> > +
> > +static inline void tasklet_unlock(struct tasklet *t)
> > +{
> > +    barrier();
> 
> clear_bit() has a memory clobber.  This barrier() is entirely redundant.
> 
> > +    clear_bit(TASKLET_STATE_RUN, &(t)->state);
> > +}
> > +static inline void tasklet_unlock_wait(struct tasklet *t)
> > +{
> > +    while (test_bit(TASKLET_STATE_RUN, &(t)->state))
> > +    {
> > +        barrier();
> 
> cpu_relax();

Ah yes!
> 
> ~Andrew

Thank you for your quick review! (less than hour after posting?!
Is that the record?
> 
> > +    }
> > +}
> >  void tasklet_schedule_on_cpu(struct tasklet *t, unsigned int cpu);
> >  void tasklet_schedule(struct tasklet *t);
> >  void do_tasklet(void);
> > @@ -48,6 +99,8 @@ void tasklet_init(
> >      struct tasklet *t, void (*func)(unsigned long), unsigned long data);
> >  void softirq_tasklet_init(
> >      struct tasklet *t, void (*func)(unsigned long), unsigned long data);
> > +void percpu_tasklet_init(
> > +    struct tasklet *t, void (*func)(unsigned long), unsigned long data);
> >  void tasklet_subsys_init(void);
> >  
> >  #endif /* __XEN_TASKLET_H__ */
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.