[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v1] Replace tasklets with per-cpu implementation.



On Fri, Aug 29, 2014 at 03:10:19PM +0100, Jan Beulich wrote:
> >>> On 29.08.14 at 15:46, <konrad.wilk@xxxxxxxxxx> wrote:
> > On Thu, Aug 28, 2014 at 01:39:54PM +0100, Jan Beulich wrote:
> >> >>> On 27.08.14 at 19:58, <konrad.wilk@xxxxxxxxxx> wrote:
> >> > The 'hvm_do_IRQ_dpci' is the on that is most often scheduled
> >> > and run. The performance bottleneck comes from the fact that
> >> > we take the same spinlock three times: tasklet_schedule,
> >> > when we are about to execute the tasklet, and when we are
> >> > done executing the tasklet.
> >> 
> >> Before starting all the work here, did you investigate alternatives
> >> to this specific use of a tasklet? E.g., it being a softirq one, making
> >> it have its own softirq?
> > 
> > If I understand you right, you mean implement an tasklet API that
> > would only been be used by the hvm_do_IRQ_dpci? Its own spinlock,
> > list, and an seperate tasklet_schedule?
> 
> No, just a new softirq type, e.g. HVM_DPCI_SOFTIRQ (added to
> the enum in xen/include/xen/softirq.h and all the necessary
> handling put in place).

I typed this prototype up and asked folks with the right hardware to
test it. It _ought_ to, thought I think that the tasklet code
still could use an overhaul.


From deecf148e0061027c61af30882eee76a66299686 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Fri, 29 Aug 2014 13:40:09 -0400
Subject: [PATCH] dpci: Replace tasklet with an softirq

PROTOTYPE.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
 xen/drivers/passthrough/io.c  | 149 ++++++++++++++++++++++++++++++++++++++++--
 xen/drivers/passthrough/pci.c |   5 +-
 xen/include/xen/hvm/irq.h     |   3 +
 xen/include/xen/sched.h       |   3 +
 xen/include/xen/softirq.h     |   1 +
 5 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index c83af68..4c8ff3b 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -26,10 +26,105 @@
 #include <xen/hvm/irq.h>
 #include <xen/tasklet.h>
 
+bool_t use_softirq;
+boolean_param("use_softirq", use_softirq);
+
 struct rangeset *__read_mostly mmio_ro_ranges;
 
 static void hvm_dirq_assist(unsigned long _d);
 
+static DEFINE_PER_CPU(struct list_head, dpci_list);
+
+enum {
+    STATE_SCHED,
+    STATE_RUN
+};
+static void schedule_dpci_for(struct domain *d)
+{
+    if ( !test_and_set_bit(STATE_SCHED, &d->state) )
+    {
+        unsigned long flags;
+        struct list_head *list;
+
+        local_irq_save(flags);
+        INIT_LIST_HEAD(&d->list);
+        list = &__get_cpu_var(dpci_list);
+        list_add_tail(&d->list, list);
+
+        local_irq_restore(flags);
+        raise_softirq(HVM_DPCI_SOFTIRQ);
+    }
+}
+
+void dpci_kill(struct domain *d)
+{
+    s_time_t s, now;
+    int i = 0;
+
+    s = NOW();
+    while ( test_and_set_bit(STATE_SCHED, &d->state) )
+    {
+        do {
+            now = NOW();
+            process_pending_softirqs();
+            if ( ((now - s) >> 30) > 5 )
+            {
+                s = now;
+                printk("%s stuck .. \n", __func__);
+                i++;
+            }
+            if ( i > 12 )
+                BUG();
+        } while ( test_bit(STATE_SCHED, &d->state) );
+    }
+    while (test_bit(STATE_RUN, &(d)->state))
+    {
+        cpu_relax();
+    }
+    clear_bit(STATE_SCHED, &d->state);
+}
+
+static void dpci_softirq(void)
+{
+
+    struct domain *d;
+    struct list_head *list;
+    struct list_head our_list;
+
+    local_irq_disable();
+    list = &__get_cpu_var(dpci_list);
+
+    INIT_LIST_HEAD(&our_list);
+    list_splice(list, &our_list);
+
+    INIT_LIST_HEAD(&__get_cpu_var(dpci_list));
+
+    local_irq_enable();
+
+    while (!list_empty(&our_list))
+    {
+        d = list_entry(our_list.next, struct domain, list);
+        list_del(&d->list);
+
+        if ( !test_and_set_bit(STATE_RUN, &(d)->state) )
+        {
+            if ( !test_and_clear_bit(STATE_SCHED, &d->state) )
+                BUG();
+            hvm_dirq_assist((unsigned long)d);
+            clear_bit(STATE_RUN, &(d)->state);
+            continue;
+        }
+
+        local_irq_disable();
+
+        INIT_LIST_HEAD(&d->list);
+        list_add_tail(&d->list, &__get_cpu_var(dpci_list));
+        local_irq_enable();
+
+        raise_softirq(HVM_DPCI_SOFTIRQ);
+    }
+}
+
 bool_t pt_irq_need_timer(uint32_t flags)
 {
     return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
@@ -119,9 +214,13 @@ int pt_irq_create_bind_vtd(
             return -ENOMEM;
         }
         memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
-        percpu_tasklet_init(
-            &hvm_irq_dpci->dirq_tasklet,
-            hvm_dirq_assist, (unsigned long)d);
+        if ( !use_softirq )
+            percpu_tasklet_init(
+                &hvm_irq_dpci->dirq_tasklet,
+                hvm_dirq_assist, (unsigned long)d);
+        else
+            printk("%s: Using HVM_DPCI_SOFTIRQ for d%d.\n", __func__, 
d->domain_id);
+
         hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping,
                                            d->nr_pirqs);
         hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long,
@@ -398,7 +497,10 @@ int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
         return 0;
 
     set_bit(mirq, dpci->dirq_mask);
-    tasklet_schedule(&dpci->dirq_tasklet);
+    if ( !use_softirq )
+        tasklet_schedule(&dpci->dirq_tasklet);
+    else
+        schedule_dpci_for(d);
     return 1;
 }
 
@@ -574,11 +676,50 @@ void hvm_dpci_eoi(struct domain *d, unsigned int 
guest_gsi,
 unlock:
     spin_unlock(&d->event_lock);
 }
+#include <xen/cpu.h>
+static int cpu_callback(
+    struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+
+    printk("%s for CPU%d\n", __func__, cpu);
+
+    switch ( action )
+    {
+    case CPU_UP_PREPARE:
+        INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+        break;
+    case CPU_UP_CANCELED:
+    case CPU_DEAD:
+        BUG(); /* To implement. */
+        break;
+    default:
+        break;
+    }
+
+    return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+    .notifier_call = cpu_callback,
+    .priority = 99
+};
 
 static int __init setup_mmio_ro_ranges(void)
 {
     mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
                                   RANGESETF_prettyprint_hex);
+    printk("HVM_DPCI_SOFTIRQ is %s\n", use_softirq ? "active" : "offline");
+    if ( use_softirq )
+    {
+        int cpu;
+
+        for_each_online_cpu(cpu)
+            INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+
+        open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
+        register_cpu_notifier(&cpu_nfb);
+    }
     return 0;
 }
 __initcall(setup_mmio_ro_ranges);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 111ac96..24900da 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -296,7 +296,10 @@ static void pci_clean_dpci_irqs(struct domain *d)
     hvm_irq_dpci = domain_get_irq_dpci(d);
     if ( hvm_irq_dpci != NULL )
     {
-        tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
+        if ( !use_softirq )
+            tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
+        else
+            dpci_kill(d);
 
         for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
               i < d->nr_pirqs;
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index f21b02c..340293c 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -102,6 +102,9 @@ struct hvm_irq_dpci {
     struct tasklet dirq_tasklet;
 };
 
+extern bool_t use_softirq;
+void dpci_kill(struct domain *d);
+
 /* Modify state of a PCI INTx wire. */
 void hvm_pci_intx_assert(
     struct domain *d, unsigned int device, unsigned int intx);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c04b25d..ba9982e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -332,6 +332,9 @@ struct domain
     nodemask_t node_affinity;
     unsigned int last_alloc_node;
     spinlock_t node_affinity_lock;
+    /* For HVM_DPCI_SOFTIRQ. Locking is bit wonky. */
+    struct list_head list;
+    unsigned long state;
 };
 
 struct domain_setup_info
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index c5b429c..7134727 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -9,6 +9,7 @@ enum {
     RCU_SOFTIRQ,
     TASKLET_SOFTIRQ_PERCPU,
     TASKLET_SOFTIRQ,
+    HVM_DPCI_SOFTIRQ,
     NR_COMMON_SOFTIRQS
 };
 
-- 
1.9.3

> 
> Jan
> 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.