[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH V5 09/14] xen: implement 3-level event channel routines



Implement several routines for 3-level event channel ABI. Some routines are
shared between 2/3-level ABIs.

For N-level (now only 2 and 3) event channel ABIs, the active events are
processed in a top-down approach, i.e. L1 -> L2 -> .. -> L(n-1) -> bitmap. The
selectors are processed recursively, the event bitmap is processed by a
dedicated function called process_port.

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 drivers/xen/events.c |  376 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 293 insertions(+), 83 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ee35ff9..fe1831b 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -76,7 +76,12 @@ static const struct evtchn_ops *eops;
 static xen_ulong_t *evtchn_pending;
 static xen_ulong_t *evtchn_mask;
 /* The following per-cpu var points to selector(s). */
-static DEFINE_PER_CPU(xen_ulong_t *[1], evtchn_sel);
+static DEFINE_PER_CPU(xen_ulong_t *[2], evtchn_sel);
+/*
+ * 2nd level selector for 3-level event channel, '8' stands for 8 bits
+ * per byte.
+ */
+static DEFINE_PER_CPU(xen_ulong_t [sizeof(xen_ulong_t) * 8], evtchn_sel_l2);
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -150,6 +155,11 @@ static bool (*pirq_needs_eoi)(unsigned irq);
  */
 #define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
 /*
+ * If xen_ulong_t is 8 byte, it's 64 bits wide, 2^6 == 64, otherwise
+ * it is 32 bits, 2^5 == 32
+ */
+#define EVTCHN_WORD_BITORDER (sizeof(xen_ulong_t) == 8 ? 6 : 5)
+/*
  * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
  * array. Primarily to avoid long lines (hence the terse name).
  */
@@ -435,6 +445,29 @@ static inline void __unmask_local_port_l2(int port)
                vcpu_info->evtchn_upcall_pending = 1;
 }
 
+static inline void __unmask_local_port_l3(int port)
+{
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       int cpu = smp_processor_id();
+       unsigned int l1bit = port >> (EVTCHN_WORD_BITORDER << 1);
+       unsigned int l2bit = port >> EVTCHN_WORD_BITORDER;
+
+       sync_clear_bit(port, BM(&evtchn_mask[0]));
+
+       /*
+        * The following is basically the equivalent of
+        * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+        * the interrupt edge' if the channel is masked.
+        */
+       if (sync_test_bit(port, BM(&evtchn_pending[0])) &&
+           !sync_test_and_set_bit(l2bit,
+                                  BM(per_cpu(evtchn_sel, cpu)[1])) &&
+           !sync_test_and_set_bit(l1bit,
+                                  BM(per_cpu(evtchn_sel, cpu)[0])))
+               vcpu_info->evtchn_upcall_pending = 1;
+
+}
+
 static void unmask_evtchn(int port)
 {
        unsigned int cpu = get_cpu();
@@ -1326,119 +1359,254 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, 
void *dev_id)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
+{
+       int cpu = smp_processor_id();
+       xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+       unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_EVTCHN_WORD;
+       int i;
+       struct vcpu_info *v;
+
+       v = per_cpu(xen_vcpu, cpu);
+
+       printk(KERN_DEBUG "\npending (only show words which have bits set to 
1):\n   ");
+       for (i = nr_elems-1; i >= 0; i--)
+               if (evtchn_pending[i] != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+                              i,
+                              (int)(sizeof(evtchn_pending[0])*2),
+                              evtchn_pending[i]);
+               }
+
+       printk(KERN_DEBUG "\nglobal mask (only show words which have bits set 
to 0):\n   ");
+       for (i = nr_elems-1; i >= 0; i--)
+               if (evtchn_mask[i] != ~0UL) {
+                       printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+                              i,
+                              (int)(sizeof(evtchn_mask[0])*2),
+                              evtchn_mask[i]);
+               }
+
+       printk(KERN_DEBUG "\nglobally unmasked (only show result words which 
have bits set to 1):\n   ");
+       for (i = nr_elems-1; i >= 0; i--)
+               if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+                              i,
+                              (int)(sizeof(evtchn_mask[0])*2),
+                              evtchn_pending[i] & ~evtchn_mask[i]);
+               }
+
+       printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits 
set to 1):\n   ", cpu);
+       for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+               if (cpu_evtchn[i] != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+                              i,
+                              (int)(sizeof(cpu_evtchn[0])*2),
+                              cpu_evtchn[i]);
+               }
+
+       printk(KERN_DEBUG "\nlocally unmasked (only show result words which 
have bits set to 1):\n   ");
+       for (i = nr_elems-1; i >= 0; i--) {
+               xen_ulong_t pending = evtchn_pending[i]
+                       & ~evtchn_mask[i]
+                       & cpu_evtchn[i];
+               if (pending != 0UL) {
+                       printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+                              i,
+                              (int)(sizeof(evtchn_mask[0])*2),
+                              pending);
+               }
+       }
+
+       printk(KERN_DEBUG "\npending list:\n");
+       for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+               if (sync_test_bit(i, evtchn_pending)) {
+                       int word_idx = i / (BITS_PER_EVTCHN_WORD * 
BITS_PER_EVTCHN_WORD);
+                       int word_idx_l2 = i / BITS_PER_EVTCHN_WORD;
+                       printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s%s\n",
+                              cpu_from_evtchn(i), i,
+                              evtchn_to_irq[i],
+                              !sync_test_bit(word_idx, BM(per_cpu(evtchn_sel, 
cpu)[0]))
+                              ? "" : " l1-clear",
+                              !sync_test_bit(word_idx_l2, 
BM(per_cpu(evtchn_sel, cpu)[1]))
+                              ? "" : " l2-clear",
+                              sync_test_bit(i, BM(evtchn_mask))
+                              ? "" : " globally-masked",
+                              sync_test_bit(i, BM(cpu_evtchn))
+                              ? "" : " locally-masked");
+               }
+       }
+
+       return IRQ_HANDLED;
+}
+
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+static DEFINE_PER_CPU(unsigned int[3], current_idx);
 
 /*
  * Mask out the i least significant bits of w
  */
 #define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
 
+static __always_inline void process_port(int cpu,
+                                        unsigned int base,
+                                        unsigned int *idx,
+                                        unsigned int *idx_array)
+{
+       xen_ulong_t pending_bits, bits;
+       int port, irq;
+       struct irq_desc *desc;
+
+       pending_bits = active_evtchns(cpu, base >> EVTCHN_WORD_BITORDER);
+
+       do {
+               bits = MASK_LSBS(pending_bits, *idx);
+
+               /* If we masked out all events, move on. */
+               if (bits == 0)
+                       break;
+
+               *idx = EVTCHN_FIRST_BIT(bits);
+
+               /* Process port. */
+               port = base + *idx;
+               irq = evtchn_to_irq[port];
+
+               if (irq != -1) {
+                       desc = irq_to_desc(irq);
+                       if (desc)
+                               generic_handle_irq_desc(irq, desc);
+               }
+
+               *idx = (*idx + 1) % BITS_PER_EVTCHN_WORD;
+
+               /* Next caller starts at last processed + 1 */
+               /*
+                * As this routine is shared by 2/3-level event
+                * channel, we need to write all three current_idx
+                * elements. In the 2-level case, the caller /should/
+                * always set idx_array[2] to ~0U, so in practice the
+                * write to current_idx[1] is equivalent to writing
+                * idx_array[1].
+                */
+               __this_cpu_write(current_idx[0],
+                                idx_array[1] ? idx_array[0] :
+                                (idx_array[0]+1) % BITS_PER_EVTCHN_WORD);
+               __this_cpu_write(current_idx[1],
+                                idx_array[2] ? idx_array[1] :
+                                (idx_array[1]+1) % BITS_PER_EVTCHN_WORD);
+               __this_cpu_write(current_idx[2], idx_array[2]);
+       } while (*idx != 0);
+}
+
 /*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
+ * This function process active event channel top-down, L1 -> L2 ->
+ * .. -> L(n-1) -> bitmap. The selectors are processed recursively,
+ * the event bitmap is processed by process_port
  *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
+ * @cpu: current cpu id
+ * @base: accumulated offsets along selector processing
+ * @start_idx: array used to resume index
+ * @idx: array of current processing index
+ * @sel_idx: selector word index
+ * @level: current processing level, from 0 to highest_level
+ * @highest_level: highest recursion level
+ *
+ * If level == higest_level, we reach the event bitmap.  level
+ * variable starts from 0, so highest_level for 2-level ABI is 1,
+ * while for 3-level ABI it is 2.
  */
-static void __xen_evtchn_do_upcall_l2(void)
+static void process(int cpu,
+                   unsigned int base,
+                   unsigned int *start_idx,
+                   unsigned int *idx,
+                   unsigned int sel_idx,
+                   unsigned short level,
+                   unsigned short highest_level)
 {
-       int start_word_idx, start_bit_idx;
-       int word_idx, bit_idx;
        int i;
-       int cpu = get_cpu();
-       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-       unsigned count;
+       xen_ulong_t pending_words;
 
-       do {
-               xen_ulong_t pending_words;
+       if (level == highest_level) {
+               process_port(cpu, base, &idx[level], idx);
+               return;
+       }
 
-               vcpu_info->evtchn_upcall_pending = 0;
+       pending_words =
+               xchg_xen_ulong(&per_cpu(evtchn_sel, cpu)[level][sel_idx], 0);
 
-               if (__this_cpu_inc_return(xed_nesting_count) - 1)
-                       goto out;
+       /* This loop is used to process selectors. */
+       for (i = 0; pending_words != 0; i++) {
+               xen_ulong_t words;
+               unsigned int saved_base;
+
+               words = MASK_LSBS(pending_words, idx[level]);
 
                /*
-                * Master flag must be cleared /before/ clearing
-                * selector flag. xchg_xen_ulong must contain an
-                * appropriate barrier.
+                * If we masked out all events, wrap to beginning.
                 */
-               pending_words = xchg_xen_ulong(per_cpu(evtchn_sel, cpu)[0], 0);
-
-               start_word_idx = __this_cpu_read(current_word_idx);
-               start_bit_idx = __this_cpu_read(current_bit_idx);
-
-               word_idx = start_word_idx;
+               if (words == 0) {
+                       idx[level] = 0;
+                       start_idx[level+1] = 0;
+                       continue;
+               }
 
-               for (i = 0; pending_words != 0; i++) {
-                       xen_ulong_t pending_bits;
-                       xen_ulong_t words;
+               idx[level] = EVTCHN_FIRST_BIT(words);
+
+               idx[level+1] = 0; /* usually scan entire word from start */
+               if (idx[level] == start_idx[level]) {
+                       /* We scan the starting word in two parts */
+                       if (i == 0)
+                               /* 1st time: start in the middle */
+                               idx[level+1] = start_idx[level+1];
+                       else
+                               /* 2nd time: mask bits done already */
+                               idx[level+1] &= (1UL << start_idx[level+1]) - 1;
+               }
 
-                       words = MASK_LSBS(pending_words, word_idx);
+               saved_base = base;
+               base += (idx[level] <<
+                        (EVTCHN_WORD_BITORDER * (highest_level-level)));
 
-                       /*
-                        * If we masked out all events, wrap to beginning.
-                        */
-                       if (words == 0) {
-                               word_idx = 0;
-                               bit_idx = 0;
-                               continue;
-                       }
-                       word_idx = EVTCHN_FIRST_BIT(words);
-
-                       pending_bits = active_evtchns(cpu, word_idx);
-                       bit_idx = 0; /* usually scan entire word from start */
-                       if (word_idx == start_word_idx) {
-                               /* We scan the starting word in two parts */
-                               if (i == 0)
-                                       /* 1st time: start in the middle */
-                                       bit_idx = start_bit_idx;
-                               else
-                                       /* 2nd time: mask bits done already */
-                                       bit_idx &= (1UL << start_bit_idx) - 1;
-                       }
+               process(cpu, base, start_idx, idx, idx[level],
+                       level+1, highest_level);
 
-                       do {
-                               xen_ulong_t bits;
-                               int port, irq;
-                               struct irq_desc *desc;
+               base = saved_base;
 
-                               bits = MASK_LSBS(pending_bits, bit_idx);
+               /* Scan start_l1i twice; all others once. */
+               if ((idx[level] != start_idx[level]) || (i != 0))
+                       pending_words &= ~(1UL << idx[level]);
 
-                               /* If we masked out all events, move on. */
-                               if (bits == 0)
-                                       break;
+               idx[level] = (idx[level] + 1) % BITS_PER_EVTCHN_WORD;
+       }
+}
 
-                               bit_idx = EVTCHN_FIRST_BIT(bits);
 
-                               /* Process port. */
-                               port = (word_idx * BITS_PER_EVTCHN_WORD) + 
bit_idx;
-                               irq = evtchn_to_irq[port];
+/* This routine is shared between 2/3-level ABI */
+static void ___xen_evtchn_do_upcall(unsigned int *start_idx,
+                                   unsigned int *idx,
+                                   unsigned short highest_level)
+{
+       int cpu = get_cpu();
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       unsigned count;
 
-                               if (irq != -1) {
-                                       desc = irq_to_desc(irq);
-                                       if (desc)
-                                               generic_handle_irq_desc(irq, 
desc);
-                               }
+       do {
+               vcpu_info->evtchn_upcall_pending = 0;
 
-                               bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+               if (__this_cpu_inc_return(xed_nesting_count) - 1)
+                       goto out;
 
-                               /* Next caller starts at last processed + 1 */
-                               __this_cpu_write(current_word_idx,
-                                                bit_idx ? word_idx :
-                                                (word_idx+1) % 
BITS_PER_EVTCHN_WORD);
-                               __this_cpu_write(current_bit_idx, bit_idx);
-                       } while (bit_idx != 0);
+               start_idx[0] = __this_cpu_read(current_idx[0]);
+               start_idx[1] = __this_cpu_read(current_idx[1]);
+               start_idx[2] = __this_cpu_read(current_idx[2]);
 
-                       /* Scan start_l1i twice; all others once. */
-                       if ((word_idx != start_word_idx) || (i != 0))
-                               pending_words &= ~(1UL << word_idx);
+               idx[0] = start_idx[0];
 
-                       word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
-               }
+               process(cpu, 0 /* base */, start_idx, idx,
+                       0 /* selector index */,
+                       0 /* starting from L1 (1-1=0) */,
+                       highest_level);
 
                BUG_ON(!irqs_disabled());
 
@@ -1451,6 +1619,42 @@ out:
        put_cpu();
 }
 
+/*
+ * Search the CPUs pending events bitmasks.  For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for
+ * handling.
+ *
+ * Xen uses a two-level bitmap to speed searching.  The first level is
+ * a bitset of words which contain pending event bits.  The second
+ * level is a bitset of pending events themselves.
+ */
+static void __xen_evtchn_do_upcall_l2(void)
+{
+       /*
+        * Need three elements to feed into __process_port, but the
+        * third element is never used for 2-level ABI and should
+        * always be set to ~0U.
+        */
+       unsigned int start_idx[3] = { 0, 0, ~0U };
+       unsigned int idx[3] = { 0, 0, ~0U };
+
+       ___xen_evtchn_do_upcall(start_idx, idx, 1);
+}
+
+static void __xen_evtchn_do_upcall_l3(void)
+{
+       /*
+        * Need three elements to feed into __process_port, but the
+        * third element is never used for 2-level ABI and should
+        * always be set to ~0U.
+        */
+       unsigned int start_idx[3] = { 0, 0, 0 };
+       unsigned int idx[3] = { 0, 0, 0 };
+
+       ___xen_evtchn_do_upcall(start_idx, idx, 2);
+
+}
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1870,6 +2074,12 @@ const struct evtchn_ops evtchn_l2_ops = {
        .do_upcall = __xen_evtchn_do_upcall_l2
 };
 
+const struct evtchn_ops evtchn_l3_ops = {
+       .unmask = __unmask_local_port_l3,
+       .debug_interrupt = xen_debug_interrupt_l3,
+       .do_upcall = __xen_evtchn_do_upcall_l3
+};
+
 static int __cpuinit xen_events_notifier_cb(struct notifier_block *self,
                                            unsigned long action,
                                            void *hcpu)
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.