[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 09/13] xen: implement 3-level event channel routines



On Thu, Jan 31, 2013 at 02:47:03PM +0000, Wei Liu wrote:
> Only do_upcall, debug_interrupt and unmask_evtchn are required.
> 
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
>  drivers/xen/events.c |  291 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 291 insertions(+)
> 
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 30ca620..d953e81 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -51,6 +51,9 @@
>  #include <xen/interface/hvm/hvm_op.h>
>  #include <xen/interface/hvm/params.h>
>  
> +/* Helper macro(s) */
> +#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)

That really needs an explanation.
> +
>  /* N-level event channel, starting from 2 */
>  unsigned int evtchn_level = 2;
>  EXPORT_SYMBOL_GPL(evtchn_level);
> @@ -61,6 +64,9 @@ EXPORT_SYMBOL_GPL(nr_event_channels);
>  static unsigned long *evtchn_pending;
>  static unsigned long *evtchn_mask;
>  
> +/* 2nd level selector for 3-level event channel */

And that '8' there needs a #define

> +static DEFINE_PER_CPU(unsigned long[sizeof(unsigned long) * 8], 
> evtchn_sel_l2);
> +
>  /*
>   * This lock protects updates to the following mapping and reference-count
>   * arrays. The lock does not need to be acquired to read the mapping tables.
> @@ -396,6 +402,28 @@ static inline void __unmask_local_port_l2(int port)
>               vcpu_info->evtchn_upcall_pending = 1;
>  }
>  
> +static inline void __unmask_local_port_l3(int port)
> +{
> +     struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
> +     int cpu = smp_processor_id();
> +     unsigned int l1bit = port >> (LONG_BITORDER << 1);
> +     unsigned int l2bit = port >> LONG_BITORDER;
> +
> +     sync_clear_bit(port, &evtchn_mask[0]);
> +
> +     /*
> +      * The following is basically the equivalent of
> +      * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
> +      * the interrupt edge' if the channel is masked.
> +      */
> +     if (sync_test_bit(port, &evtchn_pending[0]) &&
> +         !sync_test_and_set_bit(l2bit,
> +                                &per_cpu(evtchn_sel_l2, cpu)[0]) &&
> +         !sync_test_and_set_bit(l1bit,
> +                                &vcpu_info->evtchn_pending_sel))
> +             vcpu_info->evtchn_upcall_pending = 1;
> +}
> +
>  static void unmask_evtchn(int port)
>  {
>       unsigned int cpu = get_cpu();
> @@ -411,6 +439,9 @@ static void unmask_evtchn(int port)
>               case 2:
>                       __unmask_local_port_l2(port);
>                       break;
> +             case 3:
> +                     __unmask_local_port_l3(port);
> +                     break;
>               default:
>                       BUG();
>               }
> @@ -1185,6 +1216,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector 
> vector)
>  }
>  
>  static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id);
> +static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id);
>  
>  irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
>  {
> @@ -1215,6 +1247,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
>       case 2:
>               rc = xen_debug_interrupt_l2(irq, dev_id);
>               break;
> +     case 3:
> +             rc = xen_debug_interrupt_l3(irq, dev_id);
> +             break;
>       default:
>               BUG();
>       }
> @@ -1285,8 +1320,109 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, 
> void *dev_id)
>       return IRQ_HANDLED;
>  }
>  
> +static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
> +{
> +     int cpu = smp_processor_id();
> +     unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
> +     unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_LONG;
> +     int i;
> +     struct vcpu_info *v;
> +
> +     v = per_cpu(xen_vcpu, cpu);
> +
> +     printk(KERN_DEBUG "\npending (only show words which have bits set to 
> 1):\n   ");
> +     for (i = nr_elems-1; i >= 0; i--)
> +             if (evtchn_pending[i] != 0UL) {
> +                     printk(KERN_DEBUG " word index %d %0*lx\n",
> +                            i,
> +                            (int)sizeof(evtchn_pending[0])*2,
> +                            evtchn_pending[i]);
> +             }
> +
> +     printk(KERN_DEBUG "\nglobal mask (only show words which have bits set 
> to 0):\n   ");
> +     for (i = nr_elems-1; i >= 0; i--)
> +             if (evtchn_mask[i] != ~0UL) {
> +                     printk(KERN_DEBUG " word index %d %0*lx\n",
> +                            i,
> +                            (int)sizeof(evtchn_mask[0])*2,
> +                            evtchn_mask[i]);
> +             }
> +
> +     printk(KERN_DEBUG "\nglobally unmasked (only show result words which 
> have bits set to 1):\n   ");
> +     for (i = nr_elems-1; i >= 0; i--)
> +             if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
> +                     printk(KERN_DEBUG " word index %d %0*lx\n",
> +                            i,
> +                            (int)(sizeof(evtchn_mask[0])*2),
> +                            evtchn_pending[i] & ~evtchn_mask[i]);
> +             }
> +
> +     printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits 
> set to 1):\n   ", cpu);
> +     for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
> +             if (cpu_evtchn[i] != 0UL) {
> +                     printk(KERN_DEBUG " word index %d %0*lx\n",
> +                            i,
> +                            (int)(sizeof(cpu_evtchn[0])*2),
> +                            cpu_evtchn[i]);
> +             }
> +
> +     printk(KERN_DEBUG "\nlocally unmasked (only show result words which 
> have bits set to 1):\n   ");
> +     for (i = nr_elems-1; i >= 0; i--) {
> +             unsigned long pending = evtchn_pending[i]
> +                     & ~evtchn_mask[i]
> +                     & cpu_evtchn[i];
> +             if (pending != 0UL) {
> +                     printk(KERN_DEBUG " word index %d %0*lx\n",
> +                            i,
> +                            (int)(sizeof(evtchn_mask[0])*2),
> +                            pending);
> +             }
> +     }
> +
> +     printk(KERN_DEBUG "\npending list:\n");
> +     for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
> +             if (sync_test_bit(i, evtchn_pending)) {
> +                     int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
> +                     int word_idx_l2 = i / BITS_PER_LONG;
> +                     printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s%s\n",
> +                            cpu_from_evtchn(i), i,
> +                            evtchn_to_irq[i],
> +                            !sync_test_bit(word_idx, &v->evtchn_pending_sel)
> +                            ? "" : " l1-clear",
> +                            !sync_test_bit(word_idx_l2, 
> per_cpu(evtchn_sel_l2, cpu))
> +                            ? "" : " l2-clear",
> +                            sync_test_bit(i, evtchn_mask)
> +                            ? "" : " globally-masked",
> +                            sync_test_bit(i, cpu_evtchn)
> +                            ? "" : " locally-masked");
> +             }
> +     }
> +
> +     return IRQ_HANDLED;

Um, there has to be a way to fold the most common cases of the L2 and L3
of this function in one?

> +}
> +
> +/* The following per-cpu variables are used to save current state of event
> + * processing loop.
> + *
> + * 2-level event channel:
> + *  current_word_idx is the bit index in L1 selector indicating the currently
> + *  processing word in shared bitmap.
> + *  current_bit_idx is the bit index in the currently processing word in 
> shared
> + *  bitmap.
> + *  N.B. current_word_idx_l2 is not used.
> + *
> + * 3-level event channel:
> + *  current_word_idx is the bit index in L1 selector indicating the currently
> + *  processing word in L2 selector.
> + *  current_word_idx_l2 is the bit index in L2 selector word indicating the
> + *  currently processing word in shared bitmap.
> + *  current_bit_idx is the bit index in the currently processing word in 
> shared
> + *  bitmap.
> + *
> + */
>  static DEFINE_PER_CPU(unsigned, xed_nesting_count);
>  static DEFINE_PER_CPU(unsigned int, current_word_idx);
> +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
>  static DEFINE_PER_CPU(unsigned int, current_bit_idx);
>  
>  /*
> @@ -1409,6 +1545,155 @@ out:
>       put_cpu();
>  }
>  
> +/*
> + * In the 3-level event channel implementation, the first level is a
> + * bitset of words which contain pending bits in the second level.
> + * The second level is another bitsets which contain pending bits in
> + * the third level.  The third level is a bit set of pending events
> + * themselves.
> + */
> +static void __xen_evtchn_do_upcall_l3(void)
> +{
> +     struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
> +     unsigned count;
> +     int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
> +     int word_idx_l1, word_idx_l2, bit_idx;
> +     int i, j;
> +     int cpu = get_cpu();
> +
> +     do {
> +             unsigned long pending_words_l1;
> +
> +             vcpu_info->evtchn_upcall_pending = 0;
> +
> +             if (__this_cpu_inc_return(xed_nesting_count) - 1)
> +                     goto out;
> +#ifndef CONFIG_X86
> +             /* No need for a barrier -- XCHG is a barrier on x86. */
> +             /* Clear master flag /before/ clearing selector flag. */
> +             wmb();
> +#endif
> +             /* here we get l1 pending selector */
> +             pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
> +
> +             start_word_idx_l1 = __this_cpu_read(current_word_idx);
> +             start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
> +             start_bit_idx = __this_cpu_read(current_bit_idx);
> +
> +             word_idx_l1 = start_word_idx_l1;
> +
> +             /* loop through l1, try to pick up l2 */
> +             for (i = 0; pending_words_l1 != 0; i++) {
> +                     unsigned long words_l1;
> +                     unsigned long pending_words_l2;
> +
> +                     words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
> +
> +                     if (words_l1 == 0) {
> +                             word_idx_l1 = 0;
> +                             start_word_idx_l2 = 0;
> +                             continue;
> +                     }
> +
> +                     word_idx_l1 = __ffs(words_l1);
> +
> +                     pending_words_l2 =
> +                             xchg(&per_cpu(evtchn_sel_l2, cpu)[word_idx_l1],
> +                                  0);
> +
> +                     word_idx_l2 = 0;
> +                     if (word_idx_l1 == start_word_idx_l1) {
> +                             if (i == 0)
> +                                     word_idx_l2 = start_word_idx_l2;
> +                             else
> +                                     word_idx_l2 &= (1UL << 
> start_word_idx_l2) - 1;
> +                     }
> +
> +                     for (j = 0; pending_words_l2 != 0; j++) {
> +                             unsigned long pending_bits;
> +                             unsigned long words_l2;
> +                             unsigned long idx;
> +
> +                             words_l2 = MASK_LSBS(pending_words_l2,
> +                                                  word_idx_l2);
> +
> +                             if (words_l2 == 0) {
> +                                     word_idx_l2 = 0;
> +                                     bit_idx = 0;
> +                                     continue;
> +                             }
> +
> +                             word_idx_l2 = __ffs(words_l2);
> +
> +                             idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
> +                             pending_bits =
> +                                     active_evtchns(cpu, idx);
> +
> +                             bit_idx = 0;
> +                             if (word_idx_l2 == start_word_idx_l2) {
> +                                     if (j == 0)
> +                                             bit_idx = start_bit_idx;
> +                                     else
> +                                             bit_idx &= 
> (1UL<<start_bit_idx)-1;
> +                             }
> +
> +                             /* process port */
> +                             do {
> +                                     unsigned long bits;
> +                                     int port, irq;
> +                                     struct irq_desc *desc;
> +
> +                                     bits = MASK_LSBS(pending_bits, bit_idx);
> +
> +                                     if (bits == 0)
> +                                             break;
> +
> +                                     bit_idx = __ffs(bits);
> +
> +                                     port = (word_idx_l1 << (LONG_BITORDER 
> << 1)) +
> +                                             (word_idx_l2 << LONG_BITORDER) +
> +                                             bit_idx;
> +
> +                                     irq = evtchn_to_irq[port];
> +
> +                                     if (irq != -1) {
> +                                             desc = irq_to_desc(irq);
> +                                             if (desc)
> +                                                     
> generic_handle_irq_desc(irq, desc);
> +                                     }
> +
> +                                     bit_idx = (bit_idx + 1) % BITS_PER_LONG;
> +
> +                                     __this_cpu_write(current_bit_idx, 
> bit_idx);
> +                                     __this_cpu_write(current_word_idx_l2,
> +                                                      bit_idx ? word_idx_l2 :
> +                                                      (word_idx_l2+1) % 
> BITS_PER_LONG);
> +                                     __this_cpu_write(current_word_idx_l2,
> +                                                      word_idx_l2 ? 
> word_idx_l1 :
> +                                                      (word_idx_l1+1) % 
> BITS_PER_LONG);
> +                             } while (bit_idx != 0);
> +
> +                             if ((word_idx_l2 != start_word_idx_l2) || (j != 
> 0))
> +                                     pending_words_l2 &= ~(1UL << 
> word_idx_l2);
> +
> +                             word_idx_l2 = (word_idx_l2 + 1) % BITS_PER_LONG;
> +                     }

This is a bit of a complex code. Is there any way you can split this up
in smaller inline functions?
> +
> +                     if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
> +                             pending_words_l1 &= ~(1UL << word_idx_l1);
> +
> +                     word_idx_l1 = (word_idx_l1 + 1) % BITS_PER_LONG;
> +             }
> +
> +             BUG_ON(!irqs_disabled());
> +             count = __this_cpu_read(xed_nesting_count);
> +             __this_cpu_write(xed_nesting_count, 0);
> +     } while (count != 1 || vcpu_info->evtchn_upcall_pending);
> +
> +out:
> +     put_cpu();
> +}
> +
>  void xen_evtchn_do_upcall(struct pt_regs *regs)
>  {
>       struct pt_regs *old_regs = set_irq_regs(regs);
> @@ -1420,6 +1705,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
>       case 2:
>               __xen_evtchn_do_upcall_l2();
>               break;
> +     case 3:
> +             __xen_evtchn_do_upcall_l3();
> +             break;
>       default:
>               BUG();
>       }
> @@ -1434,6 +1722,9 @@ void xen_hvm_evtchn_do_upcall(void)
>       case 2:
>               __xen_evtchn_do_upcall_l2();
>               break;
> +     case 3:
> +             __xen_evtchn_do_upcall_l3();
> +             break;
>       default:
>               BUG();
>       }
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.