|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 09/13] xen: implement 3-level event channel routines
On Thu, Jan 31, 2013 at 02:47:03PM +0000, Wei Liu wrote:
> Only do_upcall, debug_interrupt and unmask_evtchn are required.
>
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
> drivers/xen/events.c | 291
> ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 291 insertions(+)
>
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 30ca620..d953e81 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -51,6 +51,9 @@
> #include <xen/interface/hvm/hvm_op.h>
> #include <xen/interface/hvm/params.h>
>
> +/* Helper macro(s) */
> +#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)
That really needs an explanation.
> +
> /* N-level event channel, starting from 2 */
> unsigned int evtchn_level = 2;
> EXPORT_SYMBOL_GPL(evtchn_level);
> @@ -61,6 +64,9 @@ EXPORT_SYMBOL_GPL(nr_event_channels);
> static unsigned long *evtchn_pending;
> static unsigned long *evtchn_mask;
>
> +/* 2nd level selector for 3-level event channel */
And that '8' there needs a #define
> +static DEFINE_PER_CPU(unsigned long[sizeof(unsigned long) * 8],
> evtchn_sel_l2);
> +
> /*
> * This lock protects updates to the following mapping and reference-count
> * arrays. The lock does not need to be acquired to read the mapping tables.
> @@ -396,6 +402,28 @@ static inline void __unmask_local_port_l2(int port)
> vcpu_info->evtchn_upcall_pending = 1;
> }
>
> +static inline void __unmask_local_port_l3(int port)
> +{
> + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
> + int cpu = smp_processor_id();
> + unsigned int l1bit = port >> (LONG_BITORDER << 1);
> + unsigned int l2bit = port >> LONG_BITORDER;
> +
> + sync_clear_bit(port, &evtchn_mask[0]);
> +
> + /*
> + * The following is basically the equivalent of
> + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
> + * the interrupt edge' if the channel is masked.
> + */
> + if (sync_test_bit(port, &evtchn_pending[0]) &&
> + !sync_test_and_set_bit(l2bit,
> + &per_cpu(evtchn_sel_l2, cpu)[0]) &&
> + !sync_test_and_set_bit(l1bit,
> + &vcpu_info->evtchn_pending_sel))
> + vcpu_info->evtchn_upcall_pending = 1;
> +}
> +
> static void unmask_evtchn(int port)
> {
> unsigned int cpu = get_cpu();
> @@ -411,6 +439,9 @@ static void unmask_evtchn(int port)
> case 2:
> __unmask_local_port_l2(port);
> break;
> + case 3:
> + __unmask_local_port_l3(port);
> + break;
> default:
> BUG();
> }
> @@ -1185,6 +1216,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector
> vector)
> }
>
> static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id);
> +static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id);
>
> irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
> {
> @@ -1215,6 +1247,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
> case 2:
> rc = xen_debug_interrupt_l2(irq, dev_id);
> break;
> + case 3:
> + rc = xen_debug_interrupt_l3(irq, dev_id);
> + break;
> default:
> BUG();
> }
> @@ -1285,8 +1320,109 @@ static irqreturn_t xen_debug_interrupt_l2(int irq,
> void *dev_id)
> return IRQ_HANDLED;
> }
>
> +static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
> +{
> + int cpu = smp_processor_id();
> + unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
> + unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_LONG;
> + int i;
> + struct vcpu_info *v;
> +
> + v = per_cpu(xen_vcpu, cpu);
> +
> + printk(KERN_DEBUG "\npending (only show words which have bits set to
> 1):\n ");
> + for (i = nr_elems-1; i >= 0; i--)
> + if (evtchn_pending[i] != 0UL) {
> + printk(KERN_DEBUG " word index %d %0*lx\n",
> + i,
> + (int)sizeof(evtchn_pending[0])*2,
> + evtchn_pending[i]);
> + }
> +
> + printk(KERN_DEBUG "\nglobal mask (only show words which have bits set
> to 0):\n ");
> + for (i = nr_elems-1; i >= 0; i--)
> + if (evtchn_mask[i] != ~0UL) {
> + printk(KERN_DEBUG " word index %d %0*lx\n",
> + i,
> + (int)sizeof(evtchn_mask[0])*2,
> + evtchn_mask[i]);
> + }
> +
> + printk(KERN_DEBUG "\nglobally unmasked (only show result words which
> have bits set to 1):\n ");
> + for (i = nr_elems-1; i >= 0; i--)
> + if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
> + printk(KERN_DEBUG " word index %d %0*lx\n",
> + i,
> + (int)(sizeof(evtchn_mask[0])*2),
> + evtchn_pending[i] & ~evtchn_mask[i]);
> + }
> +
> + printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits
> set to 1):\n ", cpu);
> + for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
> + if (cpu_evtchn[i] != 0UL) {
> + printk(KERN_DEBUG " word index %d %0*lx\n",
> + i,
> + (int)(sizeof(cpu_evtchn[0])*2),
> + cpu_evtchn[i]);
> + }
> +
> + printk(KERN_DEBUG "\nlocally unmasked (only show result words which
> have bits set to 1):\n ");
> + for (i = nr_elems-1; i >= 0; i--) {
> + unsigned long pending = evtchn_pending[i]
> + & ~evtchn_mask[i]
> + & cpu_evtchn[i];
> + if (pending != 0UL) {
> + printk(KERN_DEBUG " word index %d %0*lx\n",
> + i,
> + (int)(sizeof(evtchn_mask[0])*2),
> + pending);
> + }
> + }
> +
> + printk(KERN_DEBUG "\npending list:\n");
> + for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
> + if (sync_test_bit(i, evtchn_pending)) {
> + int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
> + int word_idx_l2 = i / BITS_PER_LONG;
> + printk(KERN_DEBUG " %d: event %d -> irq %d%s%s%s%s\n",
> + cpu_from_evtchn(i), i,
> + evtchn_to_irq[i],
> + !sync_test_bit(word_idx, &v->evtchn_pending_sel)
> + ? "" : " l1-clear",
> + !sync_test_bit(word_idx_l2,
> per_cpu(evtchn_sel_l2, cpu))
> + ? "" : " l2-clear",
> + sync_test_bit(i, evtchn_mask)
> + ? "" : " globally-masked",
> + sync_test_bit(i, cpu_evtchn)
> + ? "" : " locally-masked");
> + }
> + }
> +
> + return IRQ_HANDLED;
Um, there has to be a way to fold the most common cases of the L2 and L3
of this function in one?
> +}
> +
> +/* The following per-cpu variables are used to save current state of event
> + * processing loop.
> + *
> + * 2-level event channel:
> + * current_word_idx is the bit index in L1 selector indicating the currently
> + * processing word in shared bitmap.
> + * current_bit_idx is the bit index in the currently processing word in
> shared
> + * bitmap.
> + * N.B. current_word_idx_l2 is not used.
> + *
> + * 3-level event channel:
> + * current_word_idx is the bit index in L1 selector indicating the currently
> + * processing word in L2 selector.
> + * current_word_idx_l2 is the bit index in L2 selector word indicating the
> + * currently processing word in shared bitmap.
> + * current_bit_idx is the bit index in the currently processing word in
> shared
> + * bitmap.
> + *
> + */
> static DEFINE_PER_CPU(unsigned, xed_nesting_count);
> static DEFINE_PER_CPU(unsigned int, current_word_idx);
> +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
> static DEFINE_PER_CPU(unsigned int, current_bit_idx);
>
> /*
> @@ -1409,6 +1545,155 @@ out:
> put_cpu();
> }
>
> +/*
> + * In the 3-level event channel implementation, the first level is a
> + * bitset of words which contain pending bits in the second level.
> + * The second level is another bitsets which contain pending bits in
> + * the third level. The third level is a bit set of pending events
> + * themselves.
> + */
> +static void __xen_evtchn_do_upcall_l3(void)
> +{
> + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
> + unsigned count;
> + int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
> + int word_idx_l1, word_idx_l2, bit_idx;
> + int i, j;
> + int cpu = get_cpu();
> +
> + do {
> + unsigned long pending_words_l1;
> +
> + vcpu_info->evtchn_upcall_pending = 0;
> +
> + if (__this_cpu_inc_return(xed_nesting_count) - 1)
> + goto out;
> +#ifndef CONFIG_X86
> + /* No need for a barrier -- XCHG is a barrier on x86. */
> + /* Clear master flag /before/ clearing selector flag. */
> + wmb();
> +#endif
> + /* here we get l1 pending selector */
> + pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
> +
> + start_word_idx_l1 = __this_cpu_read(current_word_idx);
> + start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
> + start_bit_idx = __this_cpu_read(current_bit_idx);
> +
> + word_idx_l1 = start_word_idx_l1;
> +
> + /* loop through l1, try to pick up l2 */
> + for (i = 0; pending_words_l1 != 0; i++) {
> + unsigned long words_l1;
> + unsigned long pending_words_l2;
> +
> + words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
> +
> + if (words_l1 == 0) {
> + word_idx_l1 = 0;
> + start_word_idx_l2 = 0;
> + continue;
> + }
> +
> + word_idx_l1 = __ffs(words_l1);
> +
> + pending_words_l2 =
> + xchg(&per_cpu(evtchn_sel_l2, cpu)[word_idx_l1],
> + 0);
> +
> + word_idx_l2 = 0;
> + if (word_idx_l1 == start_word_idx_l1) {
> + if (i == 0)
> + word_idx_l2 = start_word_idx_l2;
> + else
> + word_idx_l2 &= (1UL <<
> start_word_idx_l2) - 1;
> + }
> +
> + for (j = 0; pending_words_l2 != 0; j++) {
> + unsigned long pending_bits;
> + unsigned long words_l2;
> + unsigned long idx;
> +
> + words_l2 = MASK_LSBS(pending_words_l2,
> + word_idx_l2);
> +
> + if (words_l2 == 0) {
> + word_idx_l2 = 0;
> + bit_idx = 0;
> + continue;
> + }
> +
> + word_idx_l2 = __ffs(words_l2);
> +
> + idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
> + pending_bits =
> + active_evtchns(cpu, idx);
> +
> + bit_idx = 0;
> + if (word_idx_l2 == start_word_idx_l2) {
> + if (j == 0)
> + bit_idx = start_bit_idx;
> + else
> + bit_idx &=
> (1UL<<start_bit_idx)-1;
> + }
> +
> + /* process port */
> + do {
> + unsigned long bits;
> + int port, irq;
> + struct irq_desc *desc;
> +
> + bits = MASK_LSBS(pending_bits, bit_idx);
> +
> + if (bits == 0)
> + break;
> +
> + bit_idx = __ffs(bits);
> +
> + port = (word_idx_l1 << (LONG_BITORDER
> << 1)) +
> + (word_idx_l2 << LONG_BITORDER) +
> + bit_idx;
> +
> + irq = evtchn_to_irq[port];
> +
> + if (irq != -1) {
> + desc = irq_to_desc(irq);
> + if (desc)
> +
> generic_handle_irq_desc(irq, desc);
> + }
> +
> + bit_idx = (bit_idx + 1) % BITS_PER_LONG;
> +
> + __this_cpu_write(current_bit_idx,
> bit_idx);
> + __this_cpu_write(current_word_idx_l2,
> + bit_idx ? word_idx_l2 :
> + (word_idx_l2+1) %
> BITS_PER_LONG);
> + __this_cpu_write(current_word_idx_l2,
> + word_idx_l2 ?
> word_idx_l1 :
> + (word_idx_l1+1) %
> BITS_PER_LONG);
> + } while (bit_idx != 0);
> +
> + if ((word_idx_l2 != start_word_idx_l2) || (j !=
> 0))
> + pending_words_l2 &= ~(1UL <<
> word_idx_l2);
> +
> + word_idx_l2 = (word_idx_l2 + 1) % BITS_PER_LONG;
> + }
This is a bit of a complex code. Is there any way you can split this up
in smaller inline functions?
> +
> + if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
> + pending_words_l1 &= ~(1UL << word_idx_l1);
> +
> + word_idx_l1 = (word_idx_l1 + 1) % BITS_PER_LONG;
> + }
> +
> + BUG_ON(!irqs_disabled());
> + count = __this_cpu_read(xed_nesting_count);
> + __this_cpu_write(xed_nesting_count, 0);
> + } while (count != 1 || vcpu_info->evtchn_upcall_pending);
> +
> +out:
> + put_cpu();
> +}
> +
> void xen_evtchn_do_upcall(struct pt_regs *regs)
> {
> struct pt_regs *old_regs = set_irq_regs(regs);
> @@ -1420,6 +1705,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
> case 2:
> __xen_evtchn_do_upcall_l2();
> break;
> + case 3:
> + __xen_evtchn_do_upcall_l3();
> + break;
> default:
> BUG();
> }
> @@ -1434,6 +1722,9 @@ void xen_hvm_evtchn_do_upcall(void)
> case 2:
> __xen_evtchn_do_upcall_l2();
> break;
> + case 3:
> + __xen_evtchn_do_upcall_l3();
> + break;
> default:
> BUG();
> }
> --
> 1.7.10.4
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |