[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 2/3] x86/pt: enable binding of GSIs to a PVH Dom0



Achieve this by expanding pt_irq_create_bind in order to support
mapping interrupts of type PT_IRQ_TYPE_PCI to a PVH Dom0. GSIs bound
to Dom0 are always identity bound, which means the all the fields
inside of the u.pci sub-struct are ignored, and only the machine_irq
is actually used in order to determine which GSI the caller wants to
bind.

Also, the hvm_irq_dpci struct is not used by a PVH Dom0, since that's
used to route interrupts and allow different host to guest GSI
mappings, which is not used by a PVH Dom0.

This requires adding some specific handlers for such directly mapped
GSIs, which bypass the PCI interrupt routing done by Xen for HVM
guests.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
Changes since v4:
 - Remove the local pirq vairable in pt_irq_time_out (it was used only
   once).
 - Change the == NULL checks to ! instead.
 - Fix comments' grammar errors.
 - Check whether hvm_irq_dpci is NULL instead of checking if d is the
   hardware domain (and add ASSERTs when appropriate).
 - Make vioapic const in pt_irq_create_bind.

Changes since v3:
 - Rewrite the comment in hvm_gsi_assert.
 - Unconditionally set gsi_assert_count to 0 in hvm_gsi_deassert.
 - In the pirq timeout function do not defer the EOI for the identity
   mapped case.
 - Assert that the vIO APIC entry is not masked before checking the
   trigger mode.
 - In the failure path of pt_irq_create_bind check that girq and digl
   are not NULL instead of relying on whether the domain is Dom0.
 - In pt_irq_destroy_bind move a condition to the outer if in order to
   avoid code indentation.

Changes since v2:
 - Turn the assert in hvm_gsi_{de}assert into an assert_unreachable
   (like it's done in __hvm_pci_intx_{de}assert.
 - Do not increase/decrease gsi_assert_count, instead set it to 1/0.
 - Fix a comment grammar error.
 - Convert the pt_irq_create_bind asserts for bind type and pirq range
   into an error path.
 - Reduce the size of the message buffers, 24 should be enough.
 - Allow pt_irq_create_bind to unbind hardware domain GSIs.
 - s/__hvm_pirq_eoi/hvm_pirq_eoi/.
 - Remove ASSERT(pirq_dpci) from hvm_pirq_eoi.
 - Remove pirq_dpci local variable from hvm_gsi_eoi (it's used only
   once).
 - s/__hvm_gsi_eoi/hvm_gsi_eoi/.
 - Add a comment to document hvm_gsi_assert usage of
   gsi_assert_count.

Changes since v1:
 - Remove the PT_IRQ_TYPE_GSI and instead just use PT_IRQ_TYPE_PCI
   with a hardware domain special casing.
 - Check the trigger mode of the Dom0 vIO APIC in order to set the
   shareable flags in pt_irq_create_bind.
---
 xen/arch/x86/hvm/irq.c       |  42 +++++++++
 xen/drivers/passthrough/io.c | 218 ++++++++++++++++++++++++++++++++-----------
 xen/include/xen/hvm/irq.h    |   6 ++
 3 files changed, 214 insertions(+), 52 deletions(-)

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 86255847a6..e425df913c 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -126,6 +126,48 @@ void hvm_pci_intx_deassert(
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
+void hvm_gsi_assert(struct domain *d, unsigned int gsi)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+
+    if ( gsi >= hvm_irq->nr_gsis )
+    {
+        ASSERT_UNREACHABLE();
+        return;
+    }
+
+    /*
+     * __hvm_pci_intx_{de}assert uses a bitfield in pci_intx.i to track the
+     * status of each interrupt line, and Xen does the routing and GSI
+     * assertion based on that. The value of the pci_intx.i bitmap prevents the
+     * same line from triggering multiple times. As we don't use that bitmap
+     * for the hardware domain, Xen needs to rely on gsi_assert_count in order
+     * to know if the GSI is pending or not.
+     */
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    if ( !hvm_irq->gsi_assert_count[gsi] )
+    {
+        hvm_irq->gsi_assert_count[gsi] = 1;
+        assert_gsi(d, gsi);
+    }
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+}
+
+void hvm_gsi_deassert(struct domain *d, unsigned int gsi)
+{
+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+
+    if ( gsi >= hvm_irq->nr_gsis )
+    {
+        ASSERT_UNREACHABLE();
+        return;
+    }
+
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    hvm_irq->gsi_assert_count[gsi] = 0;
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+}
+
 void hvm_isa_irq_assert(
     struct domain *d, unsigned int isa_irq)
 {
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 7158afea88..7d09335185 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -164,6 +164,23 @@ static void pt_irq_time_out(void *data)
 
     spin_lock(&irq_map->dom->event_lock);
 
+    if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
+    {
+        ASSERT(is_hardware_domain(irq_map->dom));
+        /*
+         * Identity mapped, no need to iterate over the guest GSI list to find
+         * other pirqs sharing the same guest GSI.
+         *
+         * In the identity mapped case the EOI can also be done now, this way
+         * the iteration over the list of domain pirqs is avoided.
+         */
+        hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq);
+        irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
+        pt_irq_guest_eoi(irq_map->dom, irq_map, NULL);
+        spin_unlock(&irq_map->dom->event_lock);
+        return;
+    }
+
     dpci = domain_get_irq_dpci(irq_map->dom);
     if ( unlikely(!dpci) )
     {
@@ -274,10 +291,16 @@ int pt_irq_create_bind(
     spin_lock(&d->event_lock);
 
     hvm_irq_dpci = domain_get_irq_dpci(d);
-    if ( hvm_irq_dpci == NULL )
+    if ( !hvm_irq_dpci && !is_hardware_domain(d) )
     {
         unsigned int i;
 
+        /*
+         * NB: the hardware domain doesn't use a hvm_irq_dpci struct because
+         * it's only allowed to identity map GSIs, and so the data contained in
+         * that struct (used to map guest GSIs into machine GSIs and perform
+         * interrupt routing) is completely useless to it.
+         */
         hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
         if ( hvm_irq_dpci == NULL )
         {
@@ -422,35 +445,54 @@ int pt_irq_create_bind(
     case PT_IRQ_TYPE_PCI:
     case PT_IRQ_TYPE_MSI_TRANSLATE:
     {
-        unsigned int bus = pt_irq_bind->u.pci.bus;
-        unsigned int device = pt_irq_bind->u.pci.device;
-        unsigned int intx = pt_irq_bind->u.pci.intx;
-        unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx);
-        unsigned int link = hvm_pci_intx_link(device, intx);
-        struct dev_intx_gsi_link *digl = xmalloc(struct dev_intx_gsi_link);
-        struct hvm_girq_dpci_mapping *girq =
-            xmalloc(struct hvm_girq_dpci_mapping);
+        struct dev_intx_gsi_link *digl = NULL;
+        struct hvm_girq_dpci_mapping *girq = NULL;
+        unsigned int guest_gsi;
 
-        if ( !digl || !girq )
+        /*
+         * Mapping GSIs for the hardware domain is different than doing it for
+         * an unpriviledged guest, the hardware domain is only allowed to
+         * identity map GSIs, and as such all the data in the u.pci union is
+         * discarded.
+         */
+        if ( hvm_irq_dpci )
         {
-            spin_unlock(&d->event_lock);
-            xfree(girq);
-            xfree(digl);
-            return -ENOMEM;
-        }
+            unsigned int link;
+
+            digl = xmalloc(struct dev_intx_gsi_link);
+            girq = xmalloc(struct hvm_girq_dpci_mapping);
+
+            if ( !digl || !girq )
+            {
+                spin_unlock(&d->event_lock);
+                xfree(girq);
+                xfree(digl);
+                return -ENOMEM;
+            }
+
+            girq->bus = digl->bus = pt_irq_bind->u.pci.bus;
+            girq->device = digl->device = pt_irq_bind->u.pci.device;
+            girq->intx = digl->intx = pt_irq_bind->u.pci.intx;
+            list_add_tail(&digl->list, &pirq_dpci->digl_list);
 
-        hvm_irq_dpci->link_cnt[link]++;
+            guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
+            link = hvm_pci_intx_link(digl->device, digl->intx);
 
-        digl->bus = bus;
-        digl->device = device;
-        digl->intx = intx;
-        list_add_tail(&digl->list, &pirq_dpci->digl_list);
+            hvm_irq_dpci->link_cnt[link]++;
 
-        girq->bus = bus;
-        girq->device = device;
-        girq->intx = intx;
-        girq->machine_gsi = pirq;
-        list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
+            girq->machine_gsi = pirq;
+            list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
+        }
+        else
+        {
+            ASSERT(is_hardware_domain(d));
+
+            /* MSI_TRANSLATE is not supported for the hardware domain. */
+            if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI ||
+                 pirq >= hvm_domain_irq(d)->nr_gsis )
+                return -EINVAL;
+            guest_gsi = pirq;
+        }
 
         /* Bind the same mirq once in the same domain */
         if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
@@ -472,7 +514,29 @@ int pt_irq_create_bind(
                 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
                                    HVM_IRQ_DPCI_MACH_PCI |
                                    HVM_IRQ_DPCI_GUEST_PCI;
-                share = BIND_PIRQ__WILL_SHARE;
+                if ( !is_hardware_domain(d) )
+                    share = BIND_PIRQ__WILL_SHARE;
+                else
+                {
+                    unsigned int pin;
+                    const struct hvm_vioapic *vioapic = gsi_vioapic(d,
+                                                                    guest_gsi,
+                                                                    &pin);
+
+                    if ( !vioapic )
+                    {
+                        ASSERT_UNREACHABLE();
+                        return -EINVAL;
+                    }
+                    pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI;
+                    /*
+                     * Check if the corresponding vIO APIC pin is configured
+                     * level or edge trigger, level triggered interrupts will
+                     * be marked as shareable.
+                     */
+                    ASSERT(!vioapic->redirtbl[pin].fields.mask);
+                    share = vioapic->redirtbl[pin].fields.trig_mode;
+                }
             }
 
             /* Init timer before binding */
@@ -489,9 +553,16 @@ int pt_irq_create_bind(
                  * IRQ_GUEST is not set. As such we can reset 'dom' directly.
                  */
                 pirq_dpci->dom = NULL;
-                list_del(&girq->list);
-                list_del(&digl->list);
-                hvm_irq_dpci->link_cnt[link]--;
+                if ( hvm_irq_dpci )
+                {
+                    unsigned int link;
+
+                    ASSERT(girq && digl);
+                    list_del(&girq->list);
+                    list_del(&digl->list);
+                    link = hvm_pci_intx_link(digl->device, digl->intx);
+                    hvm_irq_dpci->link_cnt[link]--;
+                }
                 pirq_dpci->flags = 0;
                 pirq_cleanup_check(info, d);
                 spin_unlock(&d->event_lock);
@@ -504,10 +575,17 @@ int pt_irq_create_bind(
         spin_unlock(&d->event_lock);
 
         if ( iommu_verbose )
-            printk(XENLOG_G_INFO
-                   "d%d: bind: m_gsi=%u g_gsi=%u dev=%02x.%02x.%u intx=%u\n",
-                   d->domain_id, pirq, guest_gsi, bus,
-                   PCI_SLOT(device), PCI_FUNC(device), intx);
+        {
+            char buf[24] = "";
+
+            if ( digl )
+                snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
+                         digl->bus, PCI_SLOT(digl->device),
+                         PCI_FUNC(digl->device), digl->intx);
+
+            printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n",
+                   d->domain_id, pirq, guest_gsi, buf);
+        }
         break;
     }
 
@@ -554,7 +632,7 @@ int pt_irq_destroy_bind(
 
     hvm_irq_dpci = domain_get_irq_dpci(d);
 
-    if ( hvm_irq_dpci == NULL )
+    if ( !hvm_irq_dpci && !is_hardware_domain(d) )
     {
         spin_unlock(&d->event_lock);
         return -EINVAL;
@@ -563,7 +641,7 @@ int pt_irq_destroy_bind(
     pirq = pirq_info(d, machine_gsi);
     pirq_dpci = pirq_dpci(pirq);
 
-    if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
+    if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
     {
         unsigned int bus = pt_irq_bind->u.pci.bus;
         unsigned int device = pt_irq_bind->u.pci.device;
@@ -638,11 +716,15 @@ int pt_irq_destroy_bind(
     if ( what && iommu_verbose )
     {
         unsigned int device = pt_irq_bind->u.pci.device;
+        char buf[24] = "";
+
+        if ( hvm_irq_dpci )
+            snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
+                     pt_irq_bind->u.pci.bus, PCI_SLOT(device),
+                     PCI_FUNC(device), pt_irq_bind->u.pci.intx);
 
-        printk(XENLOG_G_INFO
-               "d%d %s unmap: m_irq=%u dev=%02x:%02x.%u intx=%u\n",
-               d->domain_id, what, machine_gsi, pt_irq_bind->u.pci.bus,
-               PCI_SLOT(device), PCI_FUNC(device), pt_irq_bind->u.pci.intx);
+        printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n",
+               d->domain_id, what, machine_gsi, buf);
     }
 
     return 0;
@@ -698,8 +780,8 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
 
     ASSERT(is_hvm_domain(d));
 
-    if ( !iommu_enabled || !dpci || !pirq_dpci ||
-         !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
+    if ( !iommu_enabled || (!is_hardware_domain(d) && !dpci) ||
+         !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         return 0;
 
     pirq_dpci->masked = 1;
@@ -759,7 +841,7 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
 
 static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
 {
-    if ( unlikely(!hvm_domain_irq(d)->dpci) )
+    if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) )
     {
         ASSERT_UNREACHABLE();
         return;
@@ -791,10 +873,17 @@ static void hvm_dirq_assist(struct domain *d, struct 
hvm_pirq_dpci *pirq_dpci)
 
         list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
         {
+            ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI));
             hvm_pci_intx_assert(d, digl->device, digl->intx);
             pirq_dpci->pending++;
         }
 
+        if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
+        {
+            hvm_gsi_assert(d, pirq->pirq);
+            pirq_dpci->pending++;
+        }
+
         if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
         {
             /* for translated MSI to INTx interrupt, eoi as early as possible 
*/
@@ -816,17 +905,10 @@ static void hvm_dirq_assist(struct domain *d, struct 
hvm_pirq_dpci *pirq_dpci)
     spin_unlock(&d->event_lock);
 }
 
-static void __hvm_dpci_eoi(struct domain *d,
-                           const struct hvm_girq_dpci_mapping *girq,
-                           const union vioapic_redir_entry *ent)
+static void hvm_pirq_eoi(struct pirq *pirq,
+                         const union vioapic_redir_entry *ent)
 {
-    struct pirq *pirq = pirq_info(d, girq->machine_gsi);
-    struct hvm_pirq_dpci *pirq_dpci;
-
-    if ( !hvm_domain_use_pirq(d, pirq) )
-        hvm_pci_intx_deassert(d, girq->device, girq->intx);
-
-    pirq_dpci = pirq_dpci(pirq);
+    struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
 
     /*
      * No need to get vector lock for timer
@@ -841,6 +923,31 @@ static void __hvm_dpci_eoi(struct domain *d,
     pirq_guest_eoi(pirq);
 }
 
+static void __hvm_dpci_eoi(struct domain *d,
+                           const struct hvm_girq_dpci_mapping *girq,
+                           const union vioapic_redir_entry *ent)
+{
+    struct pirq *pirq = pirq_info(d, girq->machine_gsi);
+
+    if ( !hvm_domain_use_pirq(d, pirq) )
+        hvm_pci_intx_deassert(d, girq->device, girq->intx);
+
+    hvm_pirq_eoi(pirq, ent);
+}
+
+static void hvm_gsi_eoi(struct domain *d, unsigned int gsi,
+                        const union vioapic_redir_entry *ent)
+{
+    struct pirq *pirq = pirq_info(d, gsi);
+
+    /* Check if GSI is actually mapped. */
+    if ( !pirq_dpci(pirq) )
+        return;
+
+    hvm_gsi_deassert(d, gsi);
+    hvm_pirq_eoi(pirq, ent);
+}
+
 void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
                   const union vioapic_redir_entry *ent)
 {
@@ -850,6 +957,13 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
     if ( !iommu_enabled )
         return;
 
+    if ( is_hardware_domain(d) )
+    {
+        spin_lock(&d->event_lock);
+        hvm_gsi_eoi(d, guest_gsi, ent);
+        goto unlock;
+    }
+
     if ( guest_gsi < NR_ISAIRQS )
     {
         hvm_dpci_isairq_eoi(d, guest_gsi);
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 671a6f2e06..0d2c72c109 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -40,6 +40,7 @@ struct dev_intx_gsi_link {
 #define _HVM_IRQ_DPCI_EOI_LATCH_SHIFT           3
 #define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT           4
 #define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT           5
+#define _HVM_IRQ_DPCI_IDENTITY_GSI_SHIFT        6
 #define _HVM_IRQ_DPCI_TRANSLATE_SHIFT          15
 #define HVM_IRQ_DPCI_MACH_PCI        (1 << _HVM_IRQ_DPCI_MACH_PCI_SHIFT)
 #define HVM_IRQ_DPCI_MACH_MSI        (1 << _HVM_IRQ_DPCI_MACH_MSI_SHIFT)
@@ -47,6 +48,7 @@ struct dev_intx_gsi_link {
 #define HVM_IRQ_DPCI_EOI_LATCH       (1 << _HVM_IRQ_DPCI_EOI_LATCH_SHIFT)
 #define HVM_IRQ_DPCI_GUEST_PCI       (1 << _HVM_IRQ_DPCI_GUEST_PCI_SHIFT)
 #define HVM_IRQ_DPCI_GUEST_MSI       (1 << _HVM_IRQ_DPCI_GUEST_MSI_SHIFT)
+#define HVM_IRQ_DPCI_IDENTITY_GSI    (1 << _HVM_IRQ_DPCI_IDENTITY_GSI_SHIFT)
 #define HVM_IRQ_DPCI_TRANSLATE       (1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT)
 
 #define VMSI_DEST_ID_MASK 0xff
@@ -123,6 +125,10 @@ void hvm_isa_irq_assert(
 void hvm_isa_irq_deassert(
     struct domain *d, unsigned int isa_irq);
 
+/* Modify state of GSIs. */
+void hvm_gsi_assert(struct domain *d, unsigned int gsi);
+void hvm_gsi_deassert(struct domain *d, unsigned int gsi);
+
 int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
 
 int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data);
-- 
2.11.0 (Apple Git-81)


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.