[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v9 5/8] VT-d: Introduce a new function update_irte_for_msi_common



Both pi_update_irte() and msi_msg_to_remap_entry() update the content of IRTE;
besides, the current msi_msg_to_remap_entry is buggy when the live IRTE is in
posted format. This patch try to rework these two functions to make them
clearer by moving their common part to the new function.

Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
---
v9:
- Newly added.

 xen/drivers/passthrough/vtd/intremap.c | 232 +++++++++++++++++++--------------
 1 file changed, 131 insertions(+), 101 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index bfd468b..4269cd4 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -420,7 +420,8 @@ void io_apic_write_remap_rte(
         __ioapic_write_entry(apic, ioapic_pin, 1, old_rte);
 }
 
-static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
+static void set_msi_source_id(const struct pci_dev *pdev,
+                              struct iremap_entry *ire)
 {
     u16 seg;
     u8 bus, devfn, secbus;
@@ -547,16 +548,116 @@ static int remap_entry_to_msi_msg(
     return 0;
 }
 
+/*
+ * This function is a common interface to update irte for msi case.
+ *
+ * If @pi_desc != NULL and @gvec != 0, the IRTE will be updated to a posted
+ * format. In this case, @msg is ignored because constructing a posted format
+ * IRTE doesn't need any information about the msi address or msi data.
+ *
+ * If @pi_desc == NULL and @gvec == 0, the IRTE will be updated to a remapped
+ * format. In this case, @msg can't be NULL.
+ *
+ * Assume 'ir_ctrl->iremap_lock' has been acquired and the remap_index
+ * of msi_desc has a benign value.
+ */
+static int update_irte_for_msi_common(
+    struct iommu *iommu, const struct pci_dev *pdev,
+    const struct msi_desc *msi_desc, struct msi_msg *msg,
+    const struct pi_desc *pi_desc, const uint8_t gvec)
+{
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct iremap_entry new_ire = {{0}};
+    unsigned int index = msi_desc->remap_index;
+    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+
+    ASSERT( ir_ctrl );
+    ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
+    ASSERT( (index >= 0) && (index < IREMAP_ENTRY_NR) );
+
+    if ( (!pi_desc && gvec) || (pi_desc && !gvec) )
+        return -EINVAL;
+
+    if ( !pi_desc && !gvec && !msg )
+        return -EINVAL;
+
+    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
+                     iremap_entries, iremap_entry);
+
+    if ( !pi_desc )
+    {
+       /* Set interrupt remapping table entry */
+        new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
+        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
+        new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
+        /* Hardware require RH = 1 for LPR delivery mode */
+        new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+        new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
+                                MSI_DATA_VECTOR_MASK;
+        if ( x2apic_enabled )
+            new_ire.remap.dst = msg->dest32;
+        else
+            new_ire.remap.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
+                                 & 0xff) << 8;
+        new_ire.remap.p = 1;
+    }
+    else
+    {
+        new_ire.post.im = 1;
+        new_ire.post.vector = gvec;
+        new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
+        new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
+        new_ire.post.p = 1;
+    }
+
+    if ( pdev )
+        set_msi_source_id(pdev, &new_ire);
+    else
+        set_hpet_source_id(msi_desc->hpet_id, &new_ire);
+
+    if ( iremap_entry->val != new_ire.val )
+    {
+        if ( cpu_has_cx16 )
+        {
+            __uint128_t ret;
+            struct iremap_entry old_ire;
+
+            old_ire = *iremap_entry;
+            ret = cmpxchg16b(iremap_entry, &old_ire, &new_ire);
+
+            /*
+             * In the above, we use cmpxchg16 to atomically update the 128-bit
+             * IRTE, and the hardware cannot update the IRTE behind us, so
+             * the return value of cmpxchg16 should be the same as old_ire.
+             * This ASSERT validate it.
+             */
+            ASSERT(ret == old_ire.val);
+        }
+        else
+        {
+            iremap_entry->lo = new_ire.lo;
+            iremap_entry->hi = new_ire.hi;
+        }
+
+        iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
+        iommu_flush_iec_index(iommu, 0, index);
+    }
+
+    unmap_vtd_domain_page(iremap_entries);
+    return 0;
+}
+
 static int msi_msg_to_remap_entry(
-    struct iommu *iommu, struct pci_dev *pdev,
+    struct iommu *iommu, const struct pci_dev *pdev,
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
-    struct iremap_entry new_ire;
     struct msi_msg_remap_entry *remap_rte;
     unsigned int index, i, nr = 1;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    void *pi_desc;
+    int gvec;
 
     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
         nr = msi_desc->msi.nvec;
@@ -592,38 +693,33 @@ static int msi_msg_to_remap_entry(
         return -EFAULT;
     }
 
+    /* Get the IRTE's bind relationship with guest from the live IRTE. */
     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
                      iremap_entries, iremap_entry);
-
-    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
-
-    /* Set interrupt remapping table entry */
-    new_ire.remap.fpd = 0;
-    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
-    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
-    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
-    /* Hardware require RH = 1 for LPR delivery mode */
-    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
-    new_ire.remap.avail = 0;
-    new_ire.remap.res_1 = 0;
-    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
-                            MSI_DATA_VECTOR_MASK;
-    new_ire.remap.res_2 = 0;
-    if ( x2apic_enabled )
-        new_ire.remap.dst = msg->dest32;
+    if ( !iremap_entry->remap.im )
+    {
+        gvec = 0;
+        pi_desc = NULL;
+    }
     else
-        new_ire.remap.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
-                             & 0xff) << 8;
+    {
+        gvec = iremap_entry->post.vector;
+        pi_desc = (void *)((((u64)iremap_entry->post.pda_h) << PDA_LOW_BIT )
+                           + iremap_entry->post.pda_l);
+    }
+    unmap_vtd_domain_page(iremap_entries);
 
-    if ( pdev )
-        set_msi_source_id(pdev, &new_ire);
-    else
-        set_hpet_source_id(msi_desc->hpet_id, &new_ire);
-    new_ire.remap.res_3 = 0;
-    new_ire.remap.res_4 = 0;
-    new_ire.remap.p = 1;    /* finally, set present bit */
+    /*
+     * Actually we can just suppress the update when IRTE is already in posted
+     * format. After a msi gets bound to a guest interrupt, changes to the msi
+     * message have no effect to the IRTE.
+     */
+    update_irte_for_msi_common(iommu, pdev, msi_desc, msg, pi_desc, gvec);
 
     /* now construct new MSI/MSI-X rte entry */
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        nr = msi_desc->msi.nvec;
+
     remap_rte = (struct msi_msg_remap_entry *)msg;
     remap_rte->address_lo.dontcare = 0;
     i = index;
@@ -637,11 +733,6 @@ static int msi_msg_to_remap_entry(
     remap_rte->address_hi = 0;
     remap_rte->data = index - i;
 
-    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
-    iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
-    iommu_flush_iec_index(iommu, 0, index);
-
-    unmap_vtd_domain_page(iremap_entries);
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
     return 0;
 }
@@ -902,42 +993,6 @@ void iommu_disable_x2apic_IR(void)
         disable_qinval(drhd->iommu);
 }
 
-static void setup_posted_irte(
-    struct iremap_entry *new_ire, const struct iremap_entry *old_ire,
-    const struct pi_desc *pi_desc, const uint8_t gvec)
-{
-    memset(new_ire, 0, sizeof(*new_ire));
-
-    /*
-     * 'im' filed decides whether the irte is in posted format (with value 1)
-     * or remapped format (with value 0), if the old irte is in remapped 
format,
-     * we copy things from remapped part in 'struct iremap_entry', otherwise,
-     * we copy from posted part.
-     */
-    if ( !old_ire->remap.im )
-    {
-        new_ire->post.p = old_ire->remap.p;
-        new_ire->post.fpd = old_ire->remap.fpd;
-        new_ire->post.sid = old_ire->remap.sid;
-        new_ire->post.sq = old_ire->remap.sq;
-        new_ire->post.svt = old_ire->remap.svt;
-    }
-    else
-    {
-        new_ire->post.p = old_ire->post.p;
-        new_ire->post.fpd = old_ire->post.fpd;
-        new_ire->post.sid = old_ire->post.sid;
-        new_ire->post.sq = old_ire->post.sq;
-        new_ire->post.svt = old_ire->post.svt;
-        new_ire->post.urg = old_ire->post.urg;
-    }
-
-    new_ire->post.im = 1;
-    new_ire->post.vector = gvec;
-    new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
-    new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
-}
-
 /*
  * This function is used to update the IRTE for posted-interrupt
  * when guest changes MSI/MSI-X information.
@@ -947,16 +1002,13 @@ int pi_update_irte(const struct vcpu *v, const struct 
pirq *pirq,
 {
     struct irq_desc *desc;
     const struct msi_desc *msi_desc;
-    int remap_index;
     int rc = 0;
     const struct pci_dev *pci_dev;
     const struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     struct ir_ctrl *ir_ctrl;
-    struct iremap_entry *iremap_entries = NULL, *p = NULL;
-    struct iremap_entry new_ire, old_ire;
+    unsigned long flags;
     const struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
-    __uint128_t ret;
 
     desc = pirq_spin_lock_irq_desc(pirq, NULL);
     if ( !desc )
@@ -976,8 +1028,6 @@ int pi_update_irte(const struct vcpu *v, const struct pirq 
*pirq,
         goto unlock_out;
     }
 
-    remap_index = msi_desc->remap_index;
-
     spin_unlock_irq(&desc->lock);
 
     ASSERT(pcidevs_locked());
@@ -996,31 +1046,11 @@ int pi_update_irte(const struct vcpu *v, const struct 
pirq *pirq,
     if ( !ir_ctrl )
         return -ENODEV;
 
-    spin_lock_irq(&ir_ctrl->iremap_lock);
-
-    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
-
-    old_ire = *p;
-
-    /* Setup/Update interrupt remapping table entry. */
-    setup_posted_irte(&new_ire, &old_ire, pi_desc, gvec);
-    ret = cmpxchg16b(p, &old_ire, &new_ire);
-
-    /*
-     * In the above, we use cmpxchg16 to atomically update the 128-bit IRTE,
-     * and the hardware cannot update the IRTE behind us, so the return value
-     * of cmpxchg16 should be the same as old_ire. This ASSERT validate it.
-     */
-    ASSERT(ret == old_ire.val);
-
-    iommu_flush_cache_entry(p, sizeof(*p));
-    iommu_flush_iec_index(iommu, 0, remap_index);
-
-    unmap_vtd_domain_page(iremap_entries);
-
-    spin_unlock_irq(&ir_ctrl->iremap_lock);
-
-    return 0;
+    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+    rc = update_irte_for_msi_common(iommu, pci_dev, msi_desc, NULL, pi_desc,
+                                    gvec);
+    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+    return rc;
 
  unlock_out:
     spin_unlock_irq(&desc->lock);
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.