[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v9 11/17] vt-d: Add API to update IRTE when VT-d PI is used



> From: Wu, Feng
> Sent: Tuesday, November 03, 2015 4:43 PM
> 
> This patch adds an API which is used to update the IRTE
> for posted-interrupt when guest changes MSI/MSI-X information.
> 
> CC: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> CC: Kevin Tian <kevin.tian@xxxxxxxxx>
> CC: Keir Fraser <keir@xxxxxxx>
> CC: Jan Beulich <jbeulich@xxxxxxxx>
> CC: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> v8:
> - Some minor adjustment
> 
> v7:
> - Remove __uint128_t cast
> - Remove Kevin's Ack due to a bug fix for v6
> - Reword some comments
> - Setup posted IRTE from zeroed structure
> 
> v6:
> - In some error cases, the desc->lock will be unlocked twice, fix it.
> - Coding style fix.
> - Add some comments.
> 
> v5:
> - Make some function parameters const
> - Call "spin_unlock_irq(&desc->lock);" a little eariler
> - Add "ASSERT(spin_is_locked(&pcidevs_lock))"
> - -EBADSLT -> -ENODEV, EBADSLT is removed in the lasted Xen
> 
> v4:
> - Don't inline setup_posted_irte()
> - const struct pi_desc *pi_desc for setup_posted_irte()
> - return -EINVAL when pirq_spin_lock_irq_desc() fails.
> - Make some variables const
> - Release irq desc lock earlier in pi_update_irte()
> - Remove the pointless do-while() loop when doing cmpxchg16b()
> 
> v3:
> - Remove "adding PDA_MASK()" when updating 'pda_l' and 'pda_h' for IRTE.
> - Change the return type of pi_update_irte() to int.
> - Remove some pointless printk message in pi_update_irte().
> - Use structure assignment instead of memcpy() for irte copy.
> 
>  xen/drivers/passthrough/vtd/intremap.c | 120
> +++++++++++++++++++++++++++++++++
>  xen/drivers/passthrough/vtd/iommu.h    |   6 ++
>  xen/include/asm-x86/iommu.h            |   2 +
>  3 files changed, 128 insertions(+)
> 
> diff --git a/xen/drivers/passthrough/vtd/intremap.c
> b/xen/drivers/passthrough/vtd/intremap.c
> index 8f135e1..67e4f6d 100644
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -899,3 +899,123 @@ void iommu_disable_x2apic_IR(void)
>      for_each_drhd_unit ( drhd )
>          disable_qinval(drhd->iommu);
>  }
> +
> +static void setup_posted_irte(
> +    struct iremap_entry *new_ire, const struct iremap_entry *old_ire,
> +    const struct pi_desc *pi_desc, const uint8_t gvec)
> +{
> +    memset(new_ire, sizeof(*new_ire), 0);
> +
> +    if ( !old_ire->remap.im )
> +    {
> +        new_ire->post.p = old_ire->remap.p;
> +        new_ire->post.fpd = old_ire->remap.fpd;
> +        new_ire->post.sid = old_ire->remap.sid;
> +        new_ire->post.sq = old_ire->remap.sq;
> +        new_ire->post.svt = old_ire->remap.svt;
> +    }
> +    else
> +    {
> +        new_ire->post.p = old_ire->post.p;
> +        new_ire->post.fpd = old_ire->post.fpd;
> +        new_ire->post.sid = old_ire->post.sid;
> +        new_ire->post.sq = old_ire->post.sq;
> +        new_ire->post.svt = old_ire->post.svt;
> +        new_ire->post.urg = old_ire->post.urg;
> +    }

An comment here is preferred, otherwise it's not straightforward to
know what 'im' is and why branches are used for it.

> +
> +    new_ire->post.im = 1;
> +    new_ire->post.vector = gvec;
> +    new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
> +    new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
> +}
> +
> +/*
> + * This function is used to update the IRTE for posted-interrupt
> + * when guest changes MSI/MSI-X information.
> + */
> +int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
> +    const uint8_t gvec)
> +{
> +    struct irq_desc *desc;
> +    const struct msi_desc *msi_desc;
> +    int remap_index;
> +    int rc = 0;
> +    const struct pci_dev *pci_dev;
> +    const struct acpi_drhd_unit *drhd;
> +    struct iommu *iommu;
> +    struct ir_ctrl *ir_ctrl;
> +    struct iremap_entry *iremap_entries = NULL, *p = NULL;
> +    struct iremap_entry new_ire, old_ire;
> +    const struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
> +    __uint128_t ret;
> +
> +    desc = pirq_spin_lock_irq_desc(pirq, NULL);
> +    if ( !desc )
> +        return -EINVAL;
> +
> +    msi_desc = desc->msi_desc;
> +    if ( !msi_desc )
> +    {
> +        rc = -ENODEV;
> +        goto unlock_out;
> +    }
> +
> +    pci_dev = msi_desc->dev;
> +    if ( !pci_dev )
> +    {
> +        rc = -ENODEV;
> +        goto unlock_out;
> +    }
> +
> +    remap_index = msi_desc->remap_index;
> +
> +    spin_unlock_irq(&desc->lock);
> +
> +    ASSERT(spin_is_locked(&pcidevs_lock));
> +
> +    /*
> +     * FIXME: For performance reasons we should store the 'iommu' pointer in
> +     * 'struct msi_desc' in some other place, so we don't need to waste
> +     * time searching it here.
> +     */
> +    drhd = acpi_find_matched_drhd_unit(pci_dev);
> +    if ( !drhd )
> +        return -ENODEV;
> +
> +    iommu = drhd->iommu;
> +    ir_ctrl = iommu_ir_ctrl(iommu);
> +    if ( !ir_ctrl )
> +        return -ENODEV;
> +
> +    spin_lock_irq(&ir_ctrl->iremap_lock);
> +
> +    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
> +
> +    old_ire = *p;
> +
> +    /* Setup/Update interrupt remapping table entry. */
> +    setup_posted_irte(&new_ire, &old_ire, pi_desc, gvec);
> +    ret = cmpxchg16b(p, &old_ire, &new_ire);
> +
> +    /*
> +     * In the above, we use cmpxchg16 to atomically update the 128-bit IRTE,
> +     * and the hardware cannot update the IRTE behind us, so the return value

hardware can DEFINITELY update IRTE behind us, right? e.g. after the IRTE entry
is fully up, when interrupt is posted, etc. Here you might mean hardware cannot
update the IRTE at this point?

> +     * of cmpxchg16 should be the same as old_ire. This ASSERT validate it.
> +     */
> +    ASSERT(ret == old_ire.val);
> +
> +    iommu_flush_cache_entry(p, sizeof(*p));
> +    iommu_flush_iec_index(iommu, 0, remap_index);
> +
> +    unmap_vtd_domain_page(iremap_entries);
> +
> +    spin_unlock_irq(&ir_ctrl->iremap_lock);
> +
> +    return 0;
> +
> + unlock_out:
> +    spin_unlock_irq(&desc->lock);
> +
> +    return rc;
> +}
> diff --git a/xen/drivers/passthrough/vtd/iommu.h
> b/xen/drivers/passthrough/vtd/iommu.h
> index b440b69..c55ee08 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -323,6 +323,12 @@ struct iremap_entry {
>    };
>  };
> 
> +/*
> + * Posted-interrupt descriptor address is 64 bits with 64-byte aligned, only
> + * the upper 26 bits of lest significiant 32 bits is available.
> + */
> +#define PDA_LOW_BIT    26
> +
>  /* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
>  #define IREMAP_PAGE_ORDER  8
> 
> diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
> index 29203d7..92f0900 100644
> --- a/xen/include/asm-x86/iommu.h
> +++ b/xen/include/asm-x86/iommu.h
> @@ -31,6 +31,8 @@ int iommu_supports_eim(void);
>  int iommu_enable_x2apic_IR(void);
>  void iommu_disable_x2apic_IR(void);
> 
> +int pi_update_irte(const struct vcpu *v, const struct pirq *pirq, const 
> uint8_t gvec);
> +
>  #endif /* !__ARCH_X86_IOMMU_H__ */
>  /*
>   * Local variables:
> --
> 2.1.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.