[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v9 5/8] VT-d: Introduce a new function update_irte_for_msi_common



>>> On 27.02.17 at 02:45, <chao.gao@xxxxxxxxx> wrote:
> @@ -547,16 +548,116 @@ static int remap_entry_to_msi_msg(
>      return 0;
>  }
>  
> +/*
> + * This function is a common interface to update irte for msi case.
> + *
> + * If @pi_desc != NULL and @gvec != 0, the IRTE will be updated to a posted
> + * format. In this case, @msg is ignored because constructing a posted format
> + * IRTE doesn't need any information about the msi address or msi data.
> + *
> + * If @pi_desc == NULL and @gvec == 0, the IRTE will be updated to a remapped
> + * format. In this case, @msg can't be NULL.

This kind of implies that in the other case msg can be NULL. Please
make this explicit or remove the last sentence, to avoid confusing
readers. Plus, if msg can be NULL in that case, why don't you pass
NULL in that case?

> + * Assume 'ir_ctrl->iremap_lock' has been acquired and the remap_index
> + * of msi_desc has a benign value.
> + */
> +static int update_irte_for_msi_common(
> +    struct iommu *iommu, const struct pci_dev *pdev,
> +    const struct msi_desc *msi_desc, struct msi_msg *msg,
> +    const struct pi_desc *pi_desc, const uint8_t gvec)
> +{
> +    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> +    struct iremap_entry new_ire = {{0}};

I think just "{ }" will do.

> +    unsigned int index = msi_desc->remap_index;
> +    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
> +
> +    ASSERT( ir_ctrl );
> +    ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
> +    ASSERT( (index >= 0) && (index < IREMAP_ENTRY_NR) );

Stray blanks inside parentheses.

> +    if ( (!pi_desc && gvec) || (pi_desc && !gvec) )

gvec == 0 alone is never a valid check: Either all vectors are valid,
or a whole range (e.g. 0x00...0x0f) is invalid. Furthermore I think
such checks are easier to read as either

    if ( !pi_desc != !gvec )

or

    if ( pi_desc ? !gvec : gvec )

> +        return -EINVAL;
> +
> +    if ( !pi_desc && !gvec && !msg )

With the earlier check the first or second part could be omitted
afaict.

> +        return -EINVAL;
> +
> +    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
> +                     iremap_entries, iremap_entry);
> +
> +    if ( !pi_desc )
> +    {
> +       /* Set interrupt remapping table entry */

Again a request for consistency: Either have a respective comment
also at the top of the else branch, or omit the one here too.

> +        new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
> +        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
> +        new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
> +        /* Hardware require RH = 1 for LPR delivery mode */
> +        new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> +        new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
> +                                MSI_DATA_VECTOR_MASK;
> +        if ( x2apic_enabled )
> +            new_ire.remap.dst = msg->dest32;
> +        else
> +            new_ire.remap.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
> +                                 & 0xff) << 8;

Please strive to eliminate literal numbers here. At least the 0xff looks
to be easy to deal with (using MASK_EXTR() together with
MSI_ADDR_DEST_ID_MASK).

> +        new_ire.remap.p = 1;
> +    }
> +    else
> +    {
> +        new_ire.post.im = 1;
> +        new_ire.post.vector = gvec;
> +        new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
> +        new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
> +        new_ire.post.p = 1;
> +    }
> +
> +    if ( pdev )
> +        set_msi_source_id(pdev, &new_ire);
> +    else
> +        set_hpet_source_id(msi_desc->hpet_id, &new_ire);
> +
> +    if ( iremap_entry->val != new_ire.val )
> +    {
> +        if ( cpu_has_cx16 )
> +        {
> +            __uint128_t ret;
> +            struct iremap_entry old_ire;
> +
> +            old_ire = *iremap_entry;
> +            ret = cmpxchg16b(iremap_entry, &old_ire, &new_ire);
> +
> +            /*
> +             * In the above, we use cmpxchg16 to atomically update the 
> 128-bit
> +             * IRTE, and the hardware cannot update the IRTE behind us, so
> +             * the return value of cmpxchg16 should be the same as old_ire.
> +             * This ASSERT validate it.
> +             */
> +            ASSERT(ret == old_ire.val);
> +        }
> +        else
> +        {

This wants a comment added explaining the conditions under which
this is safe. Perhaps also one or more ASSERT()s to that effect.

> +            iremap_entry->lo = new_ire.lo;
> +            iremap_entry->hi = new_ire.hi;
> +        }
> +
> +        iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));

sizeof(*iremap_entry)

> @@ -592,38 +693,33 @@ static int msi_msg_to_remap_entry(
>          return -EFAULT;
>      }
>  
> +    /* Get the IRTE's bind relationship with guest from the live IRTE. */
>      GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
>                       iremap_entries, iremap_entry);
> -
> -    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
> -
> -    /* Set interrupt remapping table entry */
> -    new_ire.remap.fpd = 0;
> -    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
> -    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
> -    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
> -    /* Hardware require RH = 1 for LPR delivery mode */
> -    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> -    new_ire.remap.avail = 0;
> -    new_ire.remap.res_1 = 0;
> -    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
> -                            MSI_DATA_VECTOR_MASK;
> -    new_ire.remap.res_2 = 0;
> -    if ( x2apic_enabled )
> -        new_ire.remap.dst = msg->dest32;
> +    if ( !iremap_entry->remap.im )
> +    {
> +        gvec = 0;
> +        pi_desc = NULL;
> +    }
>      else
> -        new_ire.remap.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
> -                             & 0xff) << 8;
> +    {
> +        gvec = iremap_entry->post.vector;
> +        pi_desc = (void *)((((u64)iremap_entry->post.pda_h) << PDA_LOW_BIT )
> +                           + iremap_entry->post.pda_l);
> +    }
> +    unmap_vtd_domain_page(iremap_entries);

I don't follow: Why does it matter what the entry currently holds?
As I've pointed out more than once before (mainly to Feng), the
goal ought to be to produce the new entry solely based on what
the intended new state is, i.e. function input and global data.

> -    if ( pdev )
> -        set_msi_source_id(pdev, &new_ire);
> -    else
> -        set_hpet_source_id(msi_desc->hpet_id, &new_ire);
> -    new_ire.remap.res_3 = 0;
> -    new_ire.remap.res_4 = 0;
> -    new_ire.remap.p = 1;    /* finally, set present bit */
> +    /*
> +     * Actually we can just suppress the update when IRTE is already in 
> posted
> +     * format. After a msi gets bound to a guest interrupt, changes to the 
> msi
> +     * message have no effect to the IRTE.
> +     */
> +    update_irte_for_msi_common(iommu, pdev, msi_desc, msg, pi_desc, gvec);
>  
>      /* now construct new MSI/MSI-X rte entry */
> +    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
> +        nr = msi_desc->msi.nvec;

Why do you re-do here what was already done earlier in the function
(code you didn't touch)?

> @@ -996,31 +1046,11 @@ int pi_update_irte(const struct vcpu *v, const struct 
> pirq *pirq,
>      if ( !ir_ctrl )
>          return -ENODEV;
>  
> -    spin_lock_irq(&ir_ctrl->iremap_lock);
> -
> -    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
> -
> -    old_ire = *p;
> -
> -    /* Setup/Update interrupt remapping table entry. */
> -    setup_posted_irte(&new_ire, &old_ire, pi_desc, gvec);
> -    ret = cmpxchg16b(p, &old_ire, &new_ire);
> -
> -    /*
> -     * In the above, we use cmpxchg16 to atomically update the 128-bit IRTE,
> -     * and the hardware cannot update the IRTE behind us, so the return value
> -     * of cmpxchg16 should be the same as old_ire. This ASSERT validate it.
> -     */
> -    ASSERT(ret == old_ire.val);
> -
> -    iommu_flush_cache_entry(p, sizeof(*p));
> -    iommu_flush_iec_index(iommu, 0, remap_index);
> -
> -    unmap_vtd_domain_page(iremap_entries);
> -
> -    spin_unlock_irq(&ir_ctrl->iremap_lock);
> -
> -    return 0;
> +    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
> +    rc = update_irte_for_msi_common(iommu, pci_dev, msi_desc, NULL, pi_desc,
> +                                    gvec);
> +    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
> +    return rc;

Considering the old code use spin_lock_irq() (and there's such left
also earlier in the function), why do you use the irqsave function
here?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.