|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 9/9] vpci/msix: add MSI-X handlers
>>> On 27.04.17 at 16:35, <roger.pau@xxxxxxxxxx> wrote:
> +static int vpci_msix_control_write(struct pci_dev *pdev, unsigned int reg,
> + union vpci_val val, void *data)
> +{
> + uint8_t seg = pdev->seg, bus = pdev->bus;
> + uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> + paddr_t table_base =
> pdev->vpci->header.bars[pdev->vpci->msix->bir].paddr;
> + struct vpci_msix *msix = data;
Wouldn't you better use this also to obtain the array index one line
earlier?
> + bool new_masked, new_enabled;
> + unsigned int i;
> + uint32_t data32;
> + int rc;
> +
> + new_masked = val.word & PCI_MSIX_FLAGS_MASKALL;
> + new_enabled = val.word & PCI_MSIX_FLAGS_ENABLE;
> +
> + if ( new_enabled != msix->enabled && new_enabled )
if ( !msix->enabled && new_enabled )
would likely be easier to read (similar for the "else if" below).
> + {
> + /* MSI-X enabled. */
> + for ( i = 0; i < msix->max_entries; i++ )
> + {
> + if ( msix->entries[i].masked )
> + continue;
> +
> + rc = vpci_msix_enable(&msix->entries[i].arch, pdev,
> + msix->entries[i].addr,
> msix->entries[i].data,
> + msix->entries[i].nr, table_base);
> + if ( rc )
> + {
> + gdprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: unable to update entry %u:
> %d\n",
> + seg, bus, slot, func, i, rc);
> + return rc;
> + }
> +
> + vpci_msix_mask(&msix->entries[i].arch, false);
> + }
> + }
> + else if ( new_enabled != msix->enabled && !new_enabled )
> + {
> + /* MSI-X disabled. */
> + for ( i = 0; i < msix->max_entries; i++ )
> + {
> + rc = vpci_msix_disable(&msix->entries[i].arch);
> + if ( rc )
> + {
> + gdprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: unable to disable entry %u:
> %d\n",
> + seg, bus, slot, func, i, rc);
> + return rc;
> + }
> + }
> + }
> +
> + data32 = val.word;
> + if ( (new_enabled != msix->enabled || new_masked != msix->masked) &&
> + pci_msi_conf_write_intercept(pdev, reg, 2, &data32) >= 0 )
> + pci_conf_write16(seg, bus, slot, func, reg, data32);
What's the intermediate variable "data32" good for here? Afaict you
could use val.word in its stead.
> +static struct vpci_msix *vpci_msix_find(struct domain *d, unsigned long addr)
> +{
> + struct vpci_msix *msix;
> +
> + ASSERT(vpci_locked(d));
> + list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next )
> + if ( msix->pdev->vpci->header.command & PCI_COMMAND_MEMORY &&
Please parenthesize & within &&.
> + addr >= msix->addr &&
> + addr < msix->addr + msix->max_entries * PCI_MSIX_ENTRY_SIZE )
> + return msix;
> +
> + return NULL;
> +}
Looking ahead I'm getting the impression that you only allow
accesses to the MSI-X table entries, yet in vpci_modify_bars() you
(correctly) prevent mapping entire pages. While most other
registers are disallowed from sharing a page with the table, the PBA
is specifically named as an exception. Hence you need to support
at least reads from the entire range.
> +static int vpci_msix_table_accept(struct vcpu *v, unsigned long addr)
> +{
> + int found;
> +
> + vpci_lock(v->domain);
> + found = !!vpci_msix_find(v->domain, addr);
At the risk of repeating a comment I gave on an earlier patch: Using
"bool" for "found" allows you to avoid the !! .
> +static int vpci_msix_access_check(struct pci_dev *pdev, unsigned long addr,
> + unsigned int len)
> +{
> + uint8_t seg = pdev->seg, bus = pdev->bus;
> + uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> +
> +
No double blank lines please.
> + /* Only allow 32/64b accesses. */
> + if ( len != 4 && len != 8 )
> + {
> + gdprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: invalid MSI-X table access size: %u\n",
> + seg, bus, slot, func, len);
> + return -EINVAL;
> + }
> +
> + /* Do no allow accesses that span across multiple entries. */
> + if ( (addr & (PCI_MSIX_ENTRY_SIZE - 1)) + len > PCI_MSIX_ENTRY_SIZE )
> + {
> + gdprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: MSI-X access crosses entry boundary\n",
> + seg, bus, slot, func);
> + return -EINVAL;
> + }
> +
> + /*
> + * Only allow 64b accesses to the low message address field.
> + *
> + * NB: this is more restrictive than the specification, that allows 64b
> + * accesses to other fields under certain circumstances, so this check
> and
> + * the code will have to be fixed in order to fully comply with the
> + * specification.
> + */
> + if ( (addr & (PCI_MSIX_ENTRY_SIZE - 1)) != 0 && len != 4 )
> + {
> + gdprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: 64bit MSI-X table access to 32bit field"
> + " (offset: %#lx len: %u)\n", seg, bus, slot, func,
> + addr & (PCI_MSIX_ENTRY_SIZE - 1), len);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
So you allow mis-aligned accesses, but you disallow 8-byte ones
to the upper half of an entry? I think both aspects need to be got
right from the beginning, the more that you BUG() in the switch()es
further down in such cases.
> +static struct vpci_msix_entry *vpci_msix_get_entry(struct vpci_msix *msix,
> + unsigned long addr)
> +{
> + return &msix->entries[(addr - msix->addr) / PCI_MSIX_ENTRY_SIZE];
> +}
> +
> +static int vpci_msix_table_read(struct vcpu *v, unsigned long addr,
> + unsigned int len, unsigned long *data)
> +{
> + struct vpci_msix *msix;
> + struct vpci_msix_entry *entry;
> + unsigned int offset;
> +
> + vpci_lock(v->domain);
> + msix = vpci_msix_find(v->domain, addr);
> + if ( !msix )
> + {
> + vpci_unlock(v->domain);
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + if ( vpci_msix_access_check(msix->pdev, addr, len) )
> + {
> + vpci_unlock(v->domain);
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + /* Get the table entry and offset. */
> + entry = vpci_msix_get_entry(msix, addr);
> + offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
> +
> + switch ( offset )
> + {
> + case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
> + *data = entry->addr;
You're not clipping off the upper 32 bits here - is that reliably
happening elsewhere?
> + break;
> + case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
> + *data = entry->addr >> 32;
> + break;
> + case PCI_MSIX_ENTRY_DATA_OFFSET:
> + *data = entry->data;
> + break;
> + case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
> + *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
What about the other 31 bits?
> +static int vpci_msix_table_write(struct vcpu *v, unsigned long addr,
> + unsigned int len, unsigned long data)
> +{
> + struct vpci_msix *msix;
> + struct vpci_msix_entry *entry;
> + unsigned int offset;
> +
> + vpci_lock(v->domain);
> + msix = vpci_msix_find(v->domain, addr);
> + if ( !msix )
> + {
> + vpci_unlock(v->domain);
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + if ( vpci_msix_access_check(msix->pdev, addr, len) )
> + {
> + vpci_unlock(v->domain);
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + /* Get the table entry and offset. */
> + entry = vpci_msix_get_entry(msix, addr);
> + offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
> +
> + switch ( offset )
> + {
> + case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
> + if ( len == 8 )
> + {
> + entry->addr = data;
> + break;
> + }
> + entry->addr &= ~GENMASK(31, 0);
> + entry->addr |= data;
> + break;
> + case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
> + entry->addr &= ~GENMASK(63, 32);
> + entry->addr |= data << 32;
> + break;
> + case PCI_MSIX_ENTRY_DATA_OFFSET:
> + entry->data = data;
> + break;
> + case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
> + {
> + bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
> + struct pci_dev *pdev = msix->pdev;
> + paddr_t table_base =
> + pdev->vpci->header.bars[pdev->vpci->msix->bir].paddr;
Again simply "msix->bir"?
> + int rc;
> +
> + if ( !msix->enabled )
> + {
> + entry->masked = new_masked;
> + break;
> + }
> +
> + if ( new_masked != entry->masked && !new_masked )
> + {
> + /* Unmasking an entry, update it. */
> + rc = vpci_msix_enable(&entry->arch, msix->pdev, entry->addr,
And simply "pdev" here?
> +static int vpci_init_msix(struct pci_dev *pdev)
> +{
> + struct domain *d = pdev->domain;
> + uint8_t seg = pdev->seg, bus = pdev->bus;
> + uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> + struct vpci_msix *msix;
> + unsigned int msix_offset, i, max_entries;
> + paddr_t msix_paddr;
> + uint16_t control;
> + int rc;
> +
> + msix_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
> + if ( !msix_offset )
> + return 0;
> +
> + if ( !vpci_msix_enabled(pdev->domain) )
This is a non-__init function, so it can't use dom0_msix (I'm saying
this just in case there really is a need to retain those command line
options).
> + {
> + xen_vpci_mask_capability(pdev, PCI_CAP_ID_MSIX);
> + return 0;
> + }
> +
> + control = pci_conf_read16(seg, bus, slot, func,
> + msix_control_reg(msix_offset));
> +
> + /* Get the maximum number of vectors the device supports. */
> + max_entries = msix_table_size(control);
> + if ( !max_entries )
> + return 0;
This if() is never going to be true.
> + msix = xzalloc_bytes(MSIX_SIZE(max_entries));
> + if ( !msix )
> + return -ENOMEM;
> +
> + msix->max_entries = max_entries;
> + msix->pdev = pdev;
> +
> + /* Find the MSI-X table address. */
> + msix->offset = pci_conf_read32(seg, bus, slot, func,
> + msix_table_offset_reg(msix_offset));
> + msix->bir = msix->offset & PCI_MSIX_BIRMASK;
> + msix->offset &= ~PCI_MSIX_BIRMASK;
> +
> + ASSERT(pdev->vpci->header.bars[msix->bir].type == VPCI_BAR_MEM ||
> + pdev->vpci->header.bars[msix->bir].type == VPCI_BAR_MEM64_LO);
> + msix->addr = pdev->vpci->header.bars[msix->bir].mapped_addr +
> msix->offset;
> + msix_paddr = pdev->vpci->header.bars[msix->bir].paddr + msix->offset;
I can't seem to find where these addresses get updated in case
the BARs are being relocated by the Dom0 kernel.
> + for ( i = 0; i < msix->max_entries; i++)
> + {
> + msix->entries[i].masked = true;
> + msix->entries[i].nr = i;
> + vpci_msix_arch_init(&msix->entries[i].arch);
> + }
> +
> + if ( list_empty(&d->arch.hvm_domain.msix_tables) )
> + register_mmio_handler(d, &vpci_msix_table_ops);
> +
> + list_add(&msix->next, &d->arch.hvm_domain.msix_tables);
> +
> + rc = xen_vpci_add_register(pdev, vpci_msix_control_read,
> + vpci_msix_control_write,
> + msix_control_reg(msix_offset), 2, msix);
> + if ( rc )
> + {
> + dprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: failed to add handler for MSI-X control:
> %d\n",
> + seg, bus, slot, func, rc);
> + goto error;
> + }
> +
> + if ( pdev->vpci->header.command & PCI_COMMAND_MEMORY )
> + {
> + /* Unmap this memory from the guest. */
> + rc = modify_mmio(pdev->domain, _gfn(PFN_DOWN(msix->addr)),
> + _mfn(PFN_DOWN(msix_paddr)),
> + PFN_UP(msix->max_entries * PCI_MSIX_ENTRY_SIZE),
> + false);
> + if ( rc )
> + {
> + dprintk(XENLOG_ERR,
> + "%04x:%02x:%02x.%u: unable to unmap MSI-X BAR region:
> %d\n",
> + seg, bus, slot, func, rc);
> + goto error;
> + }
> + }
Why is this unmapping conditional upon PCI_COMMAND_MEMORY?
> +static void vpci_dump_msix(unsigned char key)
> +{
> + struct domain *d;
> + struct pci_dev *pdev;
> +
> + printk("Guest MSI-X information:\n");
> +
> + for_each_domain ( d )
> + {
> + if ( !has_vpci(d) )
> + continue;
> +
> + vpci_lock(d);
Dump handlers, even if there are existing examples to the contrary,
should only try-lock any locks they mean to hold (and not dump
anything if they can't get hold of the lock).
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -112,6 +112,33 @@ struct vpci {
> /* Arch-specific data. */
> struct vpci_arch_msi arch;
> } *msi;
> +
> + /* MSI-X data. */
> + struct vpci_msix {
> + struct pci_dev *pdev;
> + /* Maximum number of vectors supported by the device. */
> + unsigned int max_entries;
> + /* MSI-X table offset. */
> + unsigned int offset;
> + /* MSI-X table BIR. */
> + unsigned int bir;
> + /* Table addr. */
> + paddr_t addr;
> + /* MSI-X enabled? */
> + bool enabled;
> + /* Masked? */
> + bool masked;
> + /* List link. */
> + struct list_head next;
> + /* Entries. */
> + struct vpci_msix_entry {
> + unsigned int nr;
> + uint64_t addr;
> + uint32_t data;
> + bool masked;
> + struct vpci_arch_msix_entry arch;
Indentation.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |