The new PHYSDEVOP_pci_device_add is intended to be extensible, with a first extension (to pass the proximity domain of a device) added right away. A couple of directly related functions at once get adjusted to account for the segment number. Is the way this re-uses PHYSDEVOP_map_pirq acceptable, or should we define a replacement one? Should we deprecate the PHYSDEVOP_manage_pci_* sub-hypercalls? Signed-off-by: Jan Beulich --- 2011-08-25.orig/xen/arch/ia64/xen/hypercall.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/arch/ia64/xen/hypercall.c 2011-08-25 15:06:35.000000000 +0200 @@ -665,7 +665,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL); + ret = pci_add_device(0, manage_pci.bus, manage_pci.devfn, NULL); break; } @@ -678,7 +678,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_remove_device(manage_pci.bus, manage_pci.devfn); + ret = pci_remove_device(0, manage_pci.bus, manage_pci.devfn); break; } @@ -698,7 +698,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA pdev_info.is_virtfn = manage_pci_ext.is_virtfn; pdev_info.physfn.bus = manage_pci_ext.physfn.bus; pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; - ret = pci_add_device(manage_pci_ext.bus, + ret = pci_add_device(0, manage_pci_ext.bus, manage_pci_ext.devfn, &pdev_info); break; --- 2011-08-25.orig/xen/arch/x86/irq.c 2011-08-25 15:05:26.000000000 +0200 +++ 2011-08-25/xen/arch/x86/irq.c 2011-08-25 15:33:18.000000000 +0200 @@ -1655,7 +1655,7 @@ int map_domain_pirq( if ( !cpu_has_apic ) goto done; - pdev = pci_get_pdev(msi->bus, msi->devfn); + pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); ret = pci_enable_msi(msi, &msi_desc); if ( ret ) goto done; --- 2011-08-25.orig/xen/arch/x86/msi.c 2011-08-25 15:05:26.000000000 +0200 +++ 2011-08-25/xen/arch/x86/msi.c 2011-08-25 15:06:35.000000000 +0200 @@ -528,7 +528,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s if ( vf >= 0 ) { - struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func)); + struct pci_dev *pdev = pci_get_pdev(0, bus, PCI_DEVFN(slot, func)); unsigned int pos = pci_find_ext_capability(0, bus, PCI_DEVFN(slot, func), PCI_EXT_CAP_ID_SRIOV); @@ -767,7 +767,7 @@ static int __pci_enable_msi(struct msi_i struct msi_desc *old_desc; ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(msi->bus, msi->devfn); + pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); if ( !pdev ) return -ENODEV; @@ -775,7 +775,8 @@ static int __pci_enable_msi(struct msi_i if ( old_desc ) { dprintk(XENLOG_WARNING, "irq %d has already mapped to MSI on " - "device %02x:%02x.%01x.\n", msi->irq, msi->bus, + "device %04x:%02x:%02x.%01x\n", + msi->irq, msi->seg, msi->bus, PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); *desc = old_desc; return 0; @@ -785,7 +786,7 @@ static int __pci_enable_msi(struct msi_i if ( old_desc ) { dprintk(XENLOG_WARNING, "MSI-X is already in use on " - "device %02x:%02x.%01x\n", msi->bus, + "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus, PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); pci_disable_msi(old_desc); } @@ -830,7 +831,7 @@ static int __pci_enable_msix(struct msi_ struct msi_desc *old_desc; ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(msi->bus, msi->devfn); + pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); if ( !pdev ) return -ENODEV; @@ -844,7 +845,8 @@ static int __pci_enable_msix(struct msi_ if ( old_desc ) { dprintk(XENLOG_WARNING, "irq %d has already mapped to MSIX on " - "device %02x:%02x.%01x.\n", msi->irq, msi->bus, + "device %04x:%02x:%02x.%01x\n", + msi->irq, msi->seg, msi->bus, PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); *desc = old_desc; return 0; @@ -854,7 +856,7 @@ static int __pci_enable_msix(struct msi_ if ( old_desc ) { dprintk(XENLOG_WARNING, "MSI is already in use on " - "device %02x:%02x.%01x\n", msi->bus, + "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus, PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); pci_disable_msi(old_desc); @@ -962,8 +964,10 @@ int pci_restore_msi_state(struct pci_dev if (desc->msi_desc != entry) { - dprintk(XENLOG_ERR, "Restore MSI for dev %x:%x not set before?\n", - pdev->bus, pdev->devfn); + dprintk(XENLOG_ERR, + "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n", + pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); spin_unlock_irqrestore(&desc->lock, flags); return -EINVAL; } --- 2011-08-25.orig/xen/arch/x86/physdev.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/arch/x86/physdev.c 2011-08-25 15:06:35.000000000 +0200 @@ -360,6 +360,15 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( copy_from_guest(&map, arg, 1) != 0 ) break; + if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) + { + map.type = MAP_PIRQ_TYPE_MSI; + msi.seg = map.bus >> 16; + } + else + { + msi.seg = 0; + } msi.bus = map.bus; msi.devfn = map.devfn; msi.entry_nr = map.entry_nr; @@ -483,7 +492,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL); + ret = pci_add_device(0, manage_pci.bus, manage_pci.devfn, NULL); break; } @@ -496,7 +505,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_remove_device(manage_pci.bus, manage_pci.devfn); + ret = pci_remove_device(0, manage_pci.bus, manage_pci.devfn); break; } @@ -520,12 +529,52 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H pdev_info.is_virtfn = manage_pci_ext.is_virtfn; pdev_info.physfn.bus = manage_pci_ext.physfn.bus; pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; - ret = pci_add_device(manage_pci_ext.bus, + ret = pci_add_device(0, manage_pci_ext.bus, manage_pci_ext.devfn, &pdev_info); break; } + case PHYSDEVOP_pci_device_add: { + struct physdev_pci_device_add add; + struct pci_dev_info pdev_info; + + ret = -EPERM; + if ( !IS_PRIV(current->domain) ) + break; + + ret = -EFAULT; + if ( copy_from_guest(&add, arg, 1) != 0 ) + break; + + pdev_info.is_extfn = !!(add.flags & XEN_PCI_DEV_EXTFN); + if ( add.flags & XEN_PCI_DEV_VIRTFN ) + { + pdev_info.is_virtfn = 1; + pdev_info.physfn.bus = add.physfn.bus; + pdev_info.physfn.devfn = add.physfn.devfn; + } + else + pdev_info.is_virtfn = 0; + ret = pci_add_device(add.seg, add.bus, add.devfn, &pdev_info); + break; + } + + case PHYSDEVOP_pci_device_remove: { + struct physdev_pci_device dev; + + ret = -EPERM; + if ( !IS_PRIV(v->domain) ) + break; + + ret = -EFAULT; + if ( copy_from_guest(&dev, arg, 1) != 0 ) + break; + + ret = pci_remove_device(dev.seg, dev.bus, dev.devfn); + break; + } + #ifdef __x86_64__ case PHYSDEVOP_pci_mmcfg_reserved: { struct physdev_pci_mmcfg_reserved info; @@ -557,11 +606,31 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; spin_lock(&pcidevs_lock); - pdev = pci_get_pdev(restore_msi.bus, restore_msi.devfn); + pdev = pci_get_pdev(0, restore_msi.bus, restore_msi.devfn); + ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV; + spin_unlock(&pcidevs_lock); + break; + } + + case PHYSDEVOP_restore_msi_ext: { + struct physdev_pci_device dev; + struct pci_dev *pdev; + + ret = -EPERM; + if ( !IS_PRIV(v->domain) ) + break; + + ret = -EFAULT; + if ( copy_from_guest(&dev, arg, 1) != 0 ) + break; + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn); ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV; spin_unlock(&pcidevs_lock); break; } + case PHYSDEVOP_setup_gsi: { struct physdev_setup_gsi setup_gsi; --- 2011-08-25.orig/xen/arch/x86/x86_64/physdev.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/arch/x86/x86_64/physdev.c 2011-08-25 15:06:35.000000000 +0200 @@ -67,6 +67,14 @@ CHECK_physdev_get_free_pirq; CHECK_physdev_pci_mmcfg_reserved; #undef xen_physdev_pci_mmcfg_reserved +#define xen_physdev_pci_device_add physdev_pci_device_add +CHECK_physdev_pci_device_add +#undef xen_physdev_pci_device_add + +#define xen_physdev_pci_device physdev_pci_device +CHECK_physdev_pci_device +#undef xen_physdev_pci_device + #define COMPAT #undef guest_handle_okay #define guest_handle_okay compat_handle_okay --- 2011-08-25.orig/xen/drivers/passthrough/amd/pci_amd_iommu.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/amd/pci_amd_iommu.c 2011-08-25 15:06:35.000000000 +0200 @@ -131,7 +131,7 @@ static void __init amd_iommu_setup_dom0_ { for ( devfn = 0; devfn < 256; devfn++ ) { - pdev = pci_get_pdev(bus, devfn); + pdev = pci_get_pdev(0, bus, devfn); if ( !pdev ) continue; @@ -296,7 +296,7 @@ static int reassign_device( struct domai struct hvm_iommu *t = domain_hvm_iommu(target); ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev_by_domain(source, bus, devfn); + pdev = pci_get_pdev_by_domain(source, 0, bus, devfn); if ( !pdev ) return -ENODEV; --- 2011-08-25.orig/xen/drivers/passthrough/iommu.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/iommu.c 2011-08-25 15:06:35.000000000 +0200 @@ -282,7 +282,7 @@ int deassign_device(struct domain *d, u8 return -EINVAL; ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(bus, devfn); + pdev = pci_get_pdev(0, bus, devfn); if ( !pdev ) return -ENODEV; --- 2011-08-25.orig/xen/drivers/passthrough/pci.c 2011-08-25 15:06:23.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/pci.c 2011-08-25 15:06:35.000000000 +0200 @@ -121,6 +121,7 @@ static struct pci_dev *alloc_pdev(struct return NULL; memset(pdev, 0, sizeof(struct pci_dev)); + *(u16*) &pdev->seg = pseg->nr; *((u8*) &pdev->bus) = bus; *((u8*) &pdev->devfn) = devfn; pdev->domain = NULL; @@ -137,41 +138,59 @@ static void free_pdev(struct pci_dev *pd xfree(pdev); } -struct pci_dev *pci_get_pdev(int bus, int devfn) +struct pci_dev *pci_get_pdev(int seg, int bus, int devfn) { - struct pci_seg *pseg = get_pseg(0); + struct pci_seg *pseg = get_pseg(seg); struct pci_dev *pdev = NULL; ASSERT(spin_is_locked(&pcidevs_lock)); + ASSERT(seg != -1 || bus == -1); + ASSERT(bus != -1 || devfn == -1); if ( !pseg ) - return NULL; + { + if ( seg == -1 ) + radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1); + if ( !pseg ) + return NULL; + } - list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) - if ( (pdev->bus == bus || bus == -1) && - (pdev->devfn == devfn || devfn == -1) ) - { - return pdev; - } + do { + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) + if ( (pdev->bus == bus || bus == -1) && + (pdev->devfn == devfn || devfn == -1) ) + return pdev; + } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg, + pseg->nr + 1, 1) ); return NULL; } -struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn) +struct pci_dev *pci_get_pdev_by_domain( + struct domain *d, int seg, int bus, int devfn) { - struct pci_seg *pseg = get_pseg(0); + struct pci_seg *pseg = get_pseg(seg); struct pci_dev *pdev = NULL; + ASSERT(seg != -1 || bus == -1); + ASSERT(bus != -1 || devfn == -1); + if ( !pseg ) - return NULL; + { + if ( seg == -1 ) + radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1); + if ( !pseg ) + return NULL; + } - list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) - if ( (pdev->bus == bus || bus == -1) && - (pdev->devfn == devfn || devfn == -1) && - (pdev->domain == d) ) - { - return pdev; - } + do { + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) + if ( (pdev->bus == bus || bus == -1) && + (pdev->devfn == devfn || devfn == -1) && + (pdev->domain == d) ) + return pdev; + } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg, + pseg->nr + 1, 1) ); return NULL; } @@ -215,7 +234,7 @@ void pci_enable_acs(struct pci_dev *pdev pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl); } -int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info) +int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *info) { struct pci_seg *pseg; struct pci_dev *pdev; @@ -230,17 +249,20 @@ int pci_add_device(u8 bus, u8 devfn, con else if (info->is_virtfn) { spin_lock(&pcidevs_lock); - pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn); + pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn); spin_unlock(&pcidevs_lock); if ( !pdev ) - pci_add_device(info->physfn.bus, info->physfn.devfn, NULL); + pci_add_device(seg, info->physfn.bus, info->physfn.devfn, NULL); pdev_type = "virtual function"; } else - return -EINVAL; + { + info = NULL; + pdev_type = "device"; + } spin_lock(&pcidevs_lock); - pseg = alloc_pseg(0); + pseg = alloc_pseg(seg); if ( !pseg ) goto out; pdev = alloc_pdev(pseg, bus, devfn); @@ -251,7 +273,7 @@ int pci_add_device(u8 bus, u8 devfn, con pdev->info = *info; else if ( !pdev->vf_rlen[0] ) { - unsigned int pos = pci_find_ext_capability(0, bus, devfn, + unsigned int pos = pci_find_ext_capability(seg, bus, devfn, PCI_EXT_CAP_ID_SRIOV); u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); @@ -271,9 +293,10 @@ int pci_add_device(u8 bus, u8 devfn, con if ( (bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) { - printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf" - " BAR%u in IO space\n", - bus, slot, func, i); + printk(XENLOG_WARNING + "SR-IOV device %04x:%02x:%02x.%u with vf BAR%u" + " in IO space\n", + seg, bus, slot, func, i); continue; } pci_conf_write32(bus, slot, func, idx, ~0); @@ -282,9 +305,10 @@ int pci_add_device(u8 bus, u8 devfn, con { if ( i >= PCI_SRIOV_NUM_BARS ) { - printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with" - " 64-bit vf BAR in last slot\n", - bus, slot, func); + printk(XENLOG_WARNING + "SR-IOV device %04x:%02x:%02x.%u with 64-bit" + " vf BAR in last slot\n", + seg, bus, slot, func); break; } hi = pci_conf_read32(bus, slot, func, idx + 4); @@ -309,9 +333,10 @@ int pci_add_device(u8 bus, u8 devfn, con } } else - printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual" - " functions already enabled (%04x)\n", - bus, slot, func, ctrl); + printk(XENLOG_WARNING + "SR-IOV device %04x:%02x:%02x.%u has its virtual" + " functions already enabled (%04x)\n", + seg, bus, slot, func, ctrl); } ret = 0; @@ -331,14 +356,14 @@ int pci_add_device(u8 bus, u8 devfn, con out: spin_unlock(&pcidevs_lock); - printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, - bus, slot, func); + printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type, + seg, bus, slot, func); return ret; } -int pci_remove_device(u8 bus, u8 devfn) +int pci_remove_device(u16 seg, u8 bus, u8 devfn) { - struct pci_seg *pseg = get_pseg(0); + struct pci_seg *pseg = get_pseg(seg); struct pci_dev *pdev; int ret = -ENODEV; @@ -354,8 +379,8 @@ int pci_remove_device(u8 bus, u8 devfn) list_del(&pdev->domain_list); pci_cleanup_msi(pdev); free_pdev(pdev); - printk(XENLOG_DEBUG "PCI remove device %02x:%02x.%x\n", bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + printk(XENLOG_DEBUG "PCI remove device %04x:%02x:%02x.%u\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); break; } @@ -413,7 +438,7 @@ void pci_release_devices(struct domain * spin_lock(&pcidevs_lock); pci_clean_dpci_irqs(d); - while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) ) + while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) ) { pci_cleanup_msi(pdev); bus = pdev->bus; devfn = pdev->devfn; --- 2011-08-25.orig/xen/drivers/passthrough/vtd/iommu.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/vtd/iommu.c 2011-08-25 15:06:35.000000000 +0200 @@ -259,7 +259,7 @@ static u64 addr_to_dma_page_maddr(struct * just get any passthrough device in the domainr - assume user * assigns only devices from same node to a given guest. */ - pdev = pci_get_pdev_by_domain(domain, -1, -1); + pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); drhd = acpi_find_matched_drhd_unit(pdev); if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) ) goto out; @@ -276,7 +276,7 @@ static u64 addr_to_dma_page_maddr(struct if ( !alloc ) break; - pdev = pci_get_pdev_by_domain(domain, -1, -1); + pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); drhd = acpi_find_matched_drhd_unit(pdev); maddr = alloc_pgtable_maddr(drhd, 1); if ( !maddr ) @@ -1250,7 +1250,7 @@ int domain_context_mapping_one( { int res = 0; - pdev = pci_get_pdev(bus, devfn); + pdev = pci_get_pdev(0, bus, devfn); if (!pdev) res = -ENODEV; else if (pdev->domain != domain) @@ -1343,7 +1343,7 @@ static int domain_context_mapping(struct int ret = 0; u32 type; u8 secbus; - struct pci_dev *pdev = pci_get_pdev(bus, devfn); + struct pci_dev *pdev = pci_get_pdev(0, bus, devfn); drhd = acpi_find_matched_drhd_unit(pdev); if ( !drhd ) @@ -1468,7 +1468,7 @@ static int domain_context_unmap(struct d int ret = 0; u32 type; u8 tmp_bus, tmp_devfn, secbus; - struct pci_dev *pdev = pci_get_pdev(bus, devfn); + struct pci_dev *pdev = pci_get_pdev(0, bus, devfn); int found = 0; BUG_ON(!pdev); @@ -1579,7 +1579,7 @@ static int reassign_device_ownership( int ret; ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev_by_domain(source, bus, devfn); + pdev = pci_get_pdev_by_domain(source, 0, bus, devfn); if (!pdev) return -ENODEV; @@ -1888,7 +1888,7 @@ static void __init setup_dom0_devices(st { for ( devfn = 0; devfn < 256; devfn++ ) { - pdev = pci_get_pdev(bus, devfn); + pdev = pci_get_pdev(0, bus, devfn); if ( !pdev ) continue; @@ -2122,7 +2122,7 @@ int device_assigned(u8 bus, u8 devfn) struct pci_dev *pdev; spin_lock(&pcidevs_lock); - pdev = pci_get_pdev_by_domain(dom0, bus, devfn); + pdev = pci_get_pdev_by_domain(dom0, 0, bus, devfn); if (!pdev) { spin_unlock(&pcidevs_lock); @@ -2144,7 +2144,7 @@ static int intel_iommu_assign_device(str return -ENODEV; ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(bus, devfn); + pdev = pci_get_pdev(0, bus, devfn); if (!pdev) return -ENODEV; --- 2011-08-25.orig/xen/drivers/passthrough/vtd/quirks.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/vtd/quirks.c 2011-08-25 15:06:35.000000000 +0200 @@ -286,7 +286,7 @@ static void map_me_phantom_function(stru struct pci_dev *pdev; /* find ME VT-d engine base on a real ME device */ - pdev = pci_get_pdev(0, PCI_DEVFN(dev, 0)); + pdev = pci_get_pdev(0, 0, PCI_DEVFN(dev, 0)); drhd = acpi_find_matched_drhd_unit(pdev); /* map or unmap ME phantom function */ --- 2011-08-25.orig/xen/drivers/passthrough/vtd/x86/ats.c 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/vtd/x86/ats.c 2011-08-25 15:06:35.000000000 +0200 @@ -37,6 +37,7 @@ static LIST_HEAD(ats_dev_drhd_units); struct pci_ats_dev { struct list_head list; + u16 seg; u8 bus; u8 devfn; u16 ats_queue_depth; /* ATS device invalidation queue depth */ @@ -91,7 +92,7 @@ int ats_device(int seg, int bus, int dev if ( !ats_enabled || !iommu_qinval ) return 0; - pdev = pci_get_pdev(bus, devfn); + pdev = pci_get_pdev(seg, bus, devfn); if ( !pdev ) return 0; @@ -130,8 +131,9 @@ int enable_ats_device(int seg, int bus, BUG_ON(!pos); if ( iommu_verbose ) - dprintk(XENLOG_INFO VTDPREFIX, "%x:%x.%x: ATS capability found\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dprintk(XENLOG_INFO VTDPREFIX, + "%04x:%02x:%02x.%u: ATS capability found\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); /* BUGBUG: add back seg when multi-seg platform support is enabled */ value = pci_conf_read16(bus, PCI_SLOT(devfn), @@ -140,7 +142,7 @@ int enable_ats_device(int seg, int bus, { list_for_each_entry ( pdev, &ats_devices, list ) { - if ( pdev->bus == bus && pdev->devfn == devfn ) + if ( pdev->seg == seg && pdev->bus == bus && pdev->devfn == devfn ) { pos = 0; break; @@ -161,6 +163,7 @@ int enable_ats_device(int seg, int bus, if ( pos ) { + pdev->seg = seg; pdev->bus = bus; pdev->devfn = devfn; value = pci_conf_read16(bus, PCI_SLOT(devfn), @@ -170,8 +173,10 @@ int enable_ats_device(int seg, int bus, } if ( iommu_verbose ) - dprintk(XENLOG_INFO VTDPREFIX, "%x:%x.%x: ATS %s enabled\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos ? "is" : "was"); + dprintk(XENLOG_INFO VTDPREFIX, + "%04x:%02x:%02x.%u: ATS %s enabled\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + pos ? "is" : "was"); return pos; } @@ -194,7 +199,7 @@ void disable_ats_device(int seg, int bus list_for_each_entry ( pdev, &ats_devices, list ) { - if ( pdev->bus == bus && pdev->devfn == devfn ) + if ( pdev->seg == seg && pdev->bus == bus && pdev->devfn == devfn ) { list_del(&pdev->list); xfree(pdev); @@ -203,8 +208,9 @@ void disable_ats_device(int seg, int bus } if ( iommu_verbose ) - dprintk(XENLOG_INFO VTDPREFIX, "%x:%x.%x: ATS is disabled\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dprintk(XENLOG_INFO VTDPREFIX, + "%04x:%02x:%02x.%u: ATS is disabled\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); } --- 2011-08-25.orig/xen/include/asm-x86/msi.h 2011-08-25 15:05:26.000000000 +0200 +++ 2011-08-25/xen/include/asm-x86/msi.h 2011-08-25 15:06:35.000000000 +0200 @@ -59,8 +59,9 @@ #endif struct msi_info { - int bus; - int devfn; + u16 seg; + u8 bus; + u8 devfn; int irq; int entry_nr; uint64_t table_base; --- 2011-08-25.orig/xen/include/public/physdev.h 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/include/public/physdev.h 2011-08-25 15:06:35.000000000 +0200 @@ -142,6 +142,7 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { @@ -152,7 +153,7 @@ struct physdev_map_pirq { int index; /* IN or OUT */ int pirq; - /* IN */ + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ int bus; /* IN */ int devfn; @@ -268,6 +269,41 @@ struct physdev_pci_mmcfg_reserved { typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** --- 2011-08-25.orig/xen/include/xen/pci.h 2011-08-25 15:06:23.000000000 +0200 +++ 2011-08-25/xen/include/xen/pci.h 2011-08-25 15:06:35.000000000 +0200 @@ -56,6 +56,7 @@ struct pci_dev { spinlock_t msix_table_lock; struct domain *domain; + const u16 seg; const u8 bus; const u8 devfn; struct pci_dev_info info; @@ -90,10 +91,11 @@ struct pci_dev *pci_lock_domain_pdev(str void pci_release_devices(struct domain *d); int pci_add_segment(u16 seg); -int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *); -int pci_remove_device(u8 bus, u8 devfn); -struct pci_dev *pci_get_pdev(int bus, int devfn); -struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn); +int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *); +int pci_remove_device(u16 seg, u8 bus, u8 devfn); +struct pci_dev *pci_get_pdev(int seg, int bus, int devfn); +struct pci_dev *pci_get_pdev_by_domain( + struct domain *, int seg, int bus, int devfn); void disconnect_pci_devices(void); --- 2011-08-25.orig/xen/include/xlat.lst 2011-08-25 15:32:13.000000000 +0200 +++ 2011-08-25/xen/include/xlat.lst 2011-08-25 15:06:35.000000000 +0200 @@ -65,6 +65,8 @@ ? physdev_irq_status_query physdev.h ? physdev_manage_pci physdev.h ? physdev_manage_pci_ext physdev.h +? physdev_pci_device physdev.h +? physdev_pci_device_add physdev.h ? physdev_pci_mmcfg_reserved physdev.h ? physdev_unmap_pirq physdev.h ? physdev_restore_msi physdev.h