[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 09/13] xen: implement 3-level event channel routines



Only do_upcall, debug_interrupt and unmask_evtchn are required.

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 drivers/xen/events.c |  291 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 291 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30ca620..d953e81 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -51,6 +51,9 @@
 #include <xen/interface/hvm/hvm_op.h>
 #include <xen/interface/hvm/params.h>
 
+/* Helper macro(s) */
+#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)
+
 /* N-level event channel, starting from 2 */
 unsigned int evtchn_level = 2;
 EXPORT_SYMBOL_GPL(evtchn_level);
@@ -61,6 +64,9 @@ EXPORT_SYMBOL_GPL(nr_event_channels);
 static unsigned long *evtchn_pending;
 static unsigned long *evtchn_mask;
 
+/* 2nd level selector for 3-level event channel */
+static DEFINE_PER_CPU(unsigned long[sizeof(unsigned long) * 8], evtchn_sel_l2);
+
 /*
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -396,6 +402,28 @@ static inline void __unmask_local_port_l2(int port)
                vcpu_info->evtchn_upcall_pending = 1;
 }
 
+static inline void __unmask_local_port_l3(int port)
+{
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       int cpu = smp_processor_id();
+       unsigned int l1bit = port >> (LONG_BITORDER << 1);
+       unsigned int l2bit = port >> LONG_BITORDER;
+
+       sync_clear_bit(port, &evtchn_mask[0]);
+
+       /*
+        * The following is basically the equivalent of
+        * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+        * the interrupt edge' if the channel is masked.
+        */
+       if (sync_test_bit(port, &evtchn_pending[0]) &&
+           !sync_test_and_set_bit(l2bit,
+                                  &per_cpu(evtchn_sel_l2, cpu)[0]) &&
+           !sync_test_and_set_bit(l1bit,
+                                  &vcpu_info->evtchn_pending_sel))
+               vcpu_info->evtchn_upcall_pending = 1;
+}
+
 static void unmask_evtchn(int port)
 {
        unsigned int cpu = get_cpu();
@@ -411,6 +439,9 @@ static void unmask_evtchn(int port)
                case 2:
                        __unmask_local_port_l2(port);
                        break;
+               case 3:
+                       __unmask_local_port_l3(port);
+                       break;
                default:
                        BUG();
                }
@@ -1185,6 +1216,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector 
vector)
 }
 
 static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id);
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 {
@@ -1215,6 +1247,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
        case 2:
                rc = xen_debug_interrupt_l2(irq, dev_id);
                break;
+       case 3:
+               rc = xen_debug_interrupt_l3(irq, dev_id);
+               break;
        default:
                BUG();
        }
@@ -1285,8 +1320,109 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, void 
*dev_id)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
+{
+       int cpu = smp_processor_id();
+       unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+       unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_LONG;
+       int i;
+       struct vcpu_info *v;
+
+       v = per_cpu(xen_vcpu, cpu);
+
+       printk(KERN_DEBUG "\npending (only show words which have bits set to 
1):\n   ");
+       for (i = nr_elems-1; i >= 0; i--)
+               if (evtchn_pending[i] != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*lx\n",
+                              i,
+                              (int)sizeof(evtchn_pending[0])*2,
+                              evtchn_pending[i]);
+               }
+
+       printk(KERN_DEBUG "\nglobal mask (only show words which have bits set 
to 0):\n   ");
+       for (i = nr_elems-1; i >= 0; i--)
+               if (evtchn_mask[i] != ~0UL) {
+                       printk(KERN_DEBUG " word index %d %0*lx\n",
+                              i,
+                              (int)sizeof(evtchn_mask[0])*2,
+                              evtchn_mask[i]);
+               }
+
+       printk(KERN_DEBUG "\nglobally unmasked (only show result words which 
have bits set to 1):\n   ");
+       for (i = nr_elems-1; i >= 0; i--)
+               if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*lx\n",
+                              i,
+                              (int)(sizeof(evtchn_mask[0])*2),
+                              evtchn_pending[i] & ~evtchn_mask[i]);
+               }
+
+       printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits 
set to 1):\n   ", cpu);
+       for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
+               if (cpu_evtchn[i] != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*lx\n",
+                              i,
+                              (int)(sizeof(cpu_evtchn[0])*2),
+                              cpu_evtchn[i]);
+               }
+
+       printk(KERN_DEBUG "\nlocally unmasked (only show result words which 
have bits set to 1):\n   ");
+       for (i = nr_elems-1; i >= 0; i--) {
+               unsigned long pending = evtchn_pending[i]
+                       & ~evtchn_mask[i]
+                       & cpu_evtchn[i];
+               if (pending != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*lx\n",
+                              i,
+                              (int)(sizeof(evtchn_mask[0])*2),
+                              pending);
+               }
+       }
+
+       printk(KERN_DEBUG "\npending list:\n");
+       for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+               if (sync_test_bit(i, evtchn_pending)) {
+                       int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
+                       int word_idx_l2 = i / BITS_PER_LONG;
+                       printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s%s\n",
+                              cpu_from_evtchn(i), i,
+                              evtchn_to_irq[i],
+                              !sync_test_bit(word_idx, &v->evtchn_pending_sel)
+                              ? "" : " l1-clear",
+                              !sync_test_bit(word_idx_l2, 
per_cpu(evtchn_sel_l2, cpu))
+                              ? "" : " l2-clear",
+                              sync_test_bit(i, evtchn_mask)
+                              ? "" : " globally-masked",
+                              sync_test_bit(i, cpu_evtchn)
+                              ? "" : " locally-masked");
+               }
+       }
+
+       return IRQ_HANDLED;
+}
+
+/* The following per-cpu variables are used to save current state of event
+ * processing loop.
+ *
+ * 2-level event channel:
+ *  current_word_idx is the bit index in L1 selector indicating the currently
+ *  processing word in shared bitmap.
+ *  current_bit_idx is the bit index in the currently processing word in shared
+ *  bitmap.
+ *  N.B. current_word_idx_l2 is not used.
+ *
+ * 3-level event channel:
+ *  current_word_idx is the bit index in L1 selector indicating the currently
+ *  processing word in L2 selector.
+ *  current_word_idx_l2 is the bit index in L2 selector word indicating the
+ *  currently processing word in shared bitmap.
+ *  current_bit_idx is the bit index in the currently processing word in shared
+ *  bitmap.
+ *
+ */
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
 static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
 static DEFINE_PER_CPU(unsigned int, current_bit_idx);
 
 /*
@@ -1409,6 +1545,155 @@ out:
        put_cpu();
 }
 
+/*
+ * In the 3-level event channel implementation, the first level is a
+ * bitset of words which contain pending bits in the second level.
+ * The second level is another bitsets which contain pending bits in
+ * the third level.  The third level is a bit set of pending events
+ * themselves.
+ */
+static void __xen_evtchn_do_upcall_l3(void)
+{
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       unsigned count;
+       int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
+       int word_idx_l1, word_idx_l2, bit_idx;
+       int i, j;
+       int cpu = get_cpu();
+
+       do {
+               unsigned long pending_words_l1;
+
+               vcpu_info->evtchn_upcall_pending = 0;
+
+               if (__this_cpu_inc_return(xed_nesting_count) - 1)
+                       goto out;
+#ifndef CONFIG_X86
+               /* No need for a barrier -- XCHG is a barrier on x86. */
+               /* Clear master flag /before/ clearing selector flag. */
+               wmb();
+#endif
+               /* here we get l1 pending selector */
+               pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+               start_word_idx_l1 = __this_cpu_read(current_word_idx);
+               start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
+               start_bit_idx = __this_cpu_read(current_bit_idx);
+
+               word_idx_l1 = start_word_idx_l1;
+
+               /* loop through l1, try to pick up l2 */
+               for (i = 0; pending_words_l1 != 0; i++) {
+                       unsigned long words_l1;
+                       unsigned long pending_words_l2;
+
+                       words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
+
+                       if (words_l1 == 0) {
+                               word_idx_l1 = 0;
+                               start_word_idx_l2 = 0;
+                               continue;
+                       }
+
+                       word_idx_l1 = __ffs(words_l1);
+
+                       pending_words_l2 =
+                               xchg(&per_cpu(evtchn_sel_l2, cpu)[word_idx_l1],
+                                    0);
+
+                       word_idx_l2 = 0;
+                       if (word_idx_l1 == start_word_idx_l1) {
+                               if (i == 0)
+                                       word_idx_l2 = start_word_idx_l2;
+                               else
+                                       word_idx_l2 &= (1UL << 
start_word_idx_l2) - 1;
+                       }
+
+                       for (j = 0; pending_words_l2 != 0; j++) {
+                               unsigned long pending_bits;
+                               unsigned long words_l2;
+                               unsigned long idx;
+
+                               words_l2 = MASK_LSBS(pending_words_l2,
+                                                    word_idx_l2);
+
+                               if (words_l2 == 0) {
+                                       word_idx_l2 = 0;
+                                       bit_idx = 0;
+                                       continue;
+                               }
+
+                               word_idx_l2 = __ffs(words_l2);
+
+                               idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
+                               pending_bits =
+                                       active_evtchns(cpu, idx);
+
+                               bit_idx = 0;
+                               if (word_idx_l2 == start_word_idx_l2) {
+                                       if (j == 0)
+                                               bit_idx = start_bit_idx;
+                                       else
+                                               bit_idx &= 
(1UL<<start_bit_idx)-1;
+                               }
+
+                               /* process port */
+                               do {
+                                       unsigned long bits;
+                                       int port, irq;
+                                       struct irq_desc *desc;
+
+                                       bits = MASK_LSBS(pending_bits, bit_idx);
+
+                                       if (bits == 0)
+                                               break;
+
+                                       bit_idx = __ffs(bits);
+
+                                       port = (word_idx_l1 << (LONG_BITORDER 
<< 1)) +
+                                               (word_idx_l2 << LONG_BITORDER) +
+                                               bit_idx;
+
+                                       irq = evtchn_to_irq[port];
+
+                                       if (irq != -1) {
+                                               desc = irq_to_desc(irq);
+                                               if (desc)
+                                                       
generic_handle_irq_desc(irq, desc);
+                                       }
+
+                                       bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+
+                                       __this_cpu_write(current_bit_idx, 
bit_idx);
+                                       __this_cpu_write(current_word_idx_l2,
+                                                        bit_idx ? word_idx_l2 :
+                                                        (word_idx_l2+1) % 
BITS_PER_LONG);
+                                       __this_cpu_write(current_word_idx_l2,
+                                                        word_idx_l2 ? 
word_idx_l1 :
+                                                        (word_idx_l1+1) % 
BITS_PER_LONG);
+                               } while (bit_idx != 0);
+
+                               if ((word_idx_l2 != start_word_idx_l2) || (j != 
0))
+                                       pending_words_l2 &= ~(1UL << 
word_idx_l2);
+
+                               word_idx_l2 = (word_idx_l2 + 1) % BITS_PER_LONG;
+                       }
+
+                       if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
+                               pending_words_l1 &= ~(1UL << word_idx_l1);
+
+                       word_idx_l1 = (word_idx_l1 + 1) % BITS_PER_LONG;
+               }
+
+               BUG_ON(!irqs_disabled());
+               count = __this_cpu_read(xed_nesting_count);
+               __this_cpu_write(xed_nesting_count, 0);
+       } while (count != 1 || vcpu_info->evtchn_upcall_pending);
+
+out:
+       put_cpu();
+}
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1420,6 +1705,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
        case 2:
                __xen_evtchn_do_upcall_l2();
                break;
+       case 3:
+               __xen_evtchn_do_upcall_l3();
+               break;
        default:
                BUG();
        }
@@ -1434,6 +1722,9 @@ void xen_hvm_evtchn_do_upcall(void)
        case 2:
                __xen_evtchn_do_upcall_l2();
                break;
+       case 3:
+               __xen_evtchn_do_upcall_l3();
+               break;
        default:
                BUG();
        }
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.