... and make some so far global data per-segment. Segments are tracked in a radix tree that never gets deleted from, so there should not be any race conditions. Signed-off-by: Jan Beulich --- 2011-08-25.orig/xen/arch/x86/setup.c 2011-08-08 08:29:50.000000000 +0200 +++ 2011-08-25/xen/arch/x86/setup.c 2011-08-25 15:06:23.000000000 +0200 @@ -1246,6 +1246,8 @@ void __init __start_xen(unsigned long mb local_irq_enable(); + pt_pci_init(); + #ifdef CONFIG_X86_64 vesa_mtrr_init(); --- 2011-08-25.orig/xen/arch/x86/x86_64/acpi_mmcfg.c 2011-08-19 17:08:35.000000000 +0200 +++ 2011-08-25/xen/arch/x86/x86_64/acpi_mmcfg.c 2011-08-25 15:06:23.000000000 +0200 @@ -111,6 +111,7 @@ int __init acpi_parse_mcfg(struct acpi_t pci_mmcfg_config_num = 0; return -ENODEV; } + pci_add_segment(pci_mmcfg_config[i].pci_segment); } return 0; --- 2011-08-25.orig/xen/arch/x86/x86_64/mmconfig-shared.c 2011-08-08 08:29:50.000000000 +0200 +++ 2011-08-25/xen/arch/x86/x86_64/mmconfig-shared.c 2011-08-25 15:06:23.000000000 +0200 @@ -171,6 +171,7 @@ static const char __init *pci_mmcfg_amd_ pci_mmcfg_config[i].pci_segment = i; pci_mmcfg_config[i].start_bus_number = 0; pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1; + pci_add_segment(i); } return "AMD Family 10h NB"; --- 2011-08-25.orig/xen/drivers/passthrough/pci.c 2011-08-16 08:15:46.000000000 +0200 +++ 2011-08-25/xen/drivers/passthrough/pci.c 2011-08-25 15:06:23.000000000 +0200 @@ -26,29 +26,93 @@ #include #include #include +#include #include #ifdef CONFIG_X86 #include #endif -LIST_HEAD(alldevs_list); +struct pci_seg { + struct list_head alldevs_list; + u16 nr; + /* bus2bridge_lock protects bus2bridge array */ + spinlock_t bus2bridge_lock; +#define MAX_BUSES 256 + struct { + u8 map; + u8 bus; + u8 devfn; + } bus2bridge[MAX_BUSES]; +}; + spinlock_t pcidevs_lock = SPIN_LOCK_UNLOCKED; +static struct radix_tree_root pci_segments; -#define MAX_BUSES 256 -static struct { - u8 map; - u8 bus; - u8 devfn; -} bus2bridge[MAX_BUSES]; +static inline struct pci_seg *get_pseg(u16 seg) +{ + return radix_tree_lookup(&pci_segments, seg); +} + +static struct pci_seg *alloc_pseg(u16 seg) +{ + struct pci_seg *pseg = get_pseg(seg); + + if ( pseg ) + return pseg; + + pseg = xmalloc(struct pci_seg); + if ( !pseg ) + return NULL; + + pseg->nr = seg; + INIT_LIST_HEAD(&pseg->alldevs_list); + spin_lock_init(&pseg->bus2bridge_lock); + memset(pseg->bus2bridge, 0, sizeof(pseg->bus2bridge)); + + if ( radix_tree_insert(&pci_segments, seg, pseg) ) + { + xfree(pseg); + pseg = NULL; + } + + return pseg; +} -/* bus2bridge_lock protects bus2bridge array */ -static DEFINE_SPINLOCK(bus2bridge_lock); +static int pci_segments_iterate( + int (*handler)(struct pci_seg *, void *), void *arg) +{ + u16 seg = 0; + int rc = 0; + + do { + struct pci_seg *pseg; + + if ( !radix_tree_gang_lookup(&pci_segments, (void **)&pseg, seg, 1) ) + break; + rc = handler(pseg, arg); + seg = pseg->nr + 1; + } while (!rc && seg); + + return rc; +} + +void __init pt_pci_init(void) +{ + radix_tree_init(&pci_segments); + if ( !alloc_pseg(0) ) + panic("Could not initialize PCI segment 0\n"); +} -static struct pci_dev *alloc_pdev(u8 bus, u8 devfn) +int __init pci_add_segment(u16 seg) +{ + return alloc_pseg(seg) ? 0 : -ENOMEM; +} + +static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn) { struct pci_dev *pdev; - list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) if ( pdev->bus == bus && pdev->devfn == devfn ) return pdev; @@ -61,7 +125,7 @@ static struct pci_dev *alloc_pdev(u8 bus *((u8*) &pdev->devfn) = devfn; pdev->domain = NULL; INIT_LIST_HEAD(&pdev->msi_list); - list_add(&pdev->alldevs_list, &alldevs_list); + list_add(&pdev->alldevs_list, &pseg->alldevs_list); spin_lock_init(&pdev->msix_table_lock); return pdev; @@ -75,11 +139,15 @@ static void free_pdev(struct pci_dev *pd struct pci_dev *pci_get_pdev(int bus, int devfn) { + struct pci_seg *pseg = get_pseg(0); struct pci_dev *pdev = NULL; ASSERT(spin_is_locked(&pcidevs_lock)); - list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + if ( !pseg ) + return NULL; + + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) if ( (pdev->bus == bus || bus == -1) && (pdev->devfn == devfn || devfn == -1) ) { @@ -91,9 +159,13 @@ struct pci_dev *pci_get_pdev(int bus, in struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn) { + struct pci_seg *pseg = get_pseg(0); struct pci_dev *pdev = NULL; - list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + if ( !pseg ) + return NULL; + + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) if ( (pdev->bus == bus || bus == -1) && (pdev->devfn == devfn || devfn == -1) && (pdev->domain == d) ) @@ -145,6 +217,7 @@ void pci_enable_acs(struct pci_dev *pdev int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info) { + struct pci_seg *pseg; struct pci_dev *pdev; unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); const char *pdev_type; @@ -167,7 +240,10 @@ int pci_add_device(u8 bus, u8 devfn, con return -EINVAL; spin_lock(&pcidevs_lock); - pdev = alloc_pdev(bus, devfn); + pseg = alloc_pseg(0); + if ( !pseg ) + goto out; + pdev = alloc_pdev(pseg, bus, devfn); if ( !pdev ) goto out; @@ -262,11 +338,15 @@ out: int pci_remove_device(u8 bus, u8 devfn) { + struct pci_seg *pseg = get_pseg(0); struct pci_dev *pdev; int ret = -ENODEV; + if ( !pseg ) + return -ENODEV; + spin_lock(&pcidevs_lock); - list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) if ( pdev->bus == bus && pdev->devfn == devfn ) { ret = iommu_remove_device(pdev); @@ -384,22 +464,26 @@ int pdev_type(u8 bus, u8 devfn) */ int find_upstream_bridge(u8 *bus, u8 *devfn, u8 *secbus) { + struct pci_seg *pseg = get_pseg(0); int ret = 0; int cnt = 0; if ( *bus == 0 ) return 0; - if ( !bus2bridge[*bus].map ) + if ( !pseg ) + return -1; + + if ( !pseg->bus2bridge[*bus].map ) return 0; ret = 1; - spin_lock(&bus2bridge_lock); - while ( bus2bridge[*bus].map ) + spin_lock(&pseg->bus2bridge_lock); + while ( pseg->bus2bridge[*bus].map ) { *secbus = *bus; - *devfn = bus2bridge[*bus].devfn; - *bus = bus2bridge[*bus].bus; + *devfn = pseg->bus2bridge[*bus].devfn; + *bus = pseg->bus2bridge[*bus].bus; if ( cnt++ >= MAX_BUSES ) { ret = -1; @@ -408,7 +492,7 @@ int find_upstream_bridge(u8 *bus, u8 *de } out: - spin_unlock(&bus2bridge_lock); + spin_unlock(&pseg->bus2bridge_lock); return ret; } @@ -431,14 +515,13 @@ int __init pci_device_detect(u8 bus, u8 * scan pci devices to add all existed PCI devices to alldevs_list, * and setup pci hierarchy in array bus2bridge. */ -int __init scan_pci_devices(void) +static int __init _scan_pci_devices(struct pci_seg *pseg, void *arg) { struct pci_dev *pdev; int bus, dev, func; u8 sec_bus, sub_bus; int type; - spin_lock(&pcidevs_lock); for ( bus = 0; bus < 256; bus++ ) { for ( dev = 0; dev < 32; dev++ ) @@ -448,11 +531,10 @@ int __init scan_pci_devices(void) if ( pci_device_detect(bus, dev, func) == 0 ) continue; - pdev = alloc_pdev(bus, PCI_DEVFN(dev, func)); + pdev = alloc_pdev(pseg, bus, PCI_DEVFN(dev, func)); if ( !pdev ) { printk("%s: alloc_pdev failed.\n", __func__); - spin_unlock(&pcidevs_lock); return -ENOMEM; } @@ -470,14 +552,15 @@ int __init scan_pci_devices(void) sub_bus = pci_conf_read8(bus, dev, func, PCI_SUBORDINATE_BUS); - spin_lock(&bus2bridge_lock); + spin_lock(&pseg->bus2bridge_lock); for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ ) { - bus2bridge[sec_bus].map = 1; - bus2bridge[sec_bus].bus = bus; - bus2bridge[sec_bus].devfn = PCI_DEVFN(dev, func); + pseg->bus2bridge[sec_bus].map = 1; + pseg->bus2bridge[sec_bus].bus = bus; + pseg->bus2bridge[sec_bus].devfn = + PCI_DEVFN(dev, func); } - spin_unlock(&bus2bridge_lock); + spin_unlock(&pseg->bus2bridge_lock); break; case DEV_TYPE_PCIe_ENDPOINT: @@ -487,7 +570,6 @@ int __init scan_pci_devices(void) default: printk("%s: unknown type: bdf = %x:%x.%x\n", __func__, bus, dev, func); - spin_unlock(&pcidevs_lock); return -EINVAL; } @@ -498,39 +580,53 @@ int __init scan_pci_devices(void) } } - spin_unlock(&pcidevs_lock); return 0; } +int __init scan_pci_devices(void) +{ + int ret; + + spin_lock(&pcidevs_lock); + ret = pci_segments_iterate(_scan_pci_devices, NULL); + spin_unlock(&pcidevs_lock); + + return ret; +} + /* Disconnect all PCI devices from the PCI buses. >From the PCI spec: * "When a 0 is written to [the COMMAND] register, the device is * logically disconnected from the PCI bus for all accesses except * configuration accesses. All devices are required to support * this base level of functionality." */ -void disconnect_pci_devices(void) +static int _disconnect_pci_devices(struct pci_seg *pseg, void *arg) { struct pci_dev *pdev; - spin_lock(&pcidevs_lock); - - list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) pci_conf_write16(pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), PCI_COMMAND, 0); + return 0; +} + +void disconnect_pci_devices(void) +{ + spin_lock(&pcidevs_lock); + pci_segments_iterate(_disconnect_pci_devices, NULL); spin_unlock(&pcidevs_lock); } #ifdef SUPPORT_MSI_REMAPPING -static void dump_pci_devices(unsigned char ch) +static int _dump_pci_devices(struct pci_seg *pseg, void *arg) { struct pci_dev *pdev; struct msi_desc *msi; - printk("==== PCI devices ====\n"); - spin_lock(&pcidevs_lock); + printk("==== segment %04x ====\n", pseg->nr); - list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) { printk("%02x:%02x.%x - dom %-3d - MSIs < ", pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), @@ -540,6 +636,14 @@ static void dump_pci_devices(unsigned ch printk(">\n"); } + return 0; +} + +static void dump_pci_devices(unsigned char ch) +{ + printk("==== PCI devices ====\n"); + spin_lock(&pcidevs_lock); + pci_segments_iterate(_dump_pci_devices, NULL); spin_unlock(&pcidevs_lock); } --- 2011-08-25.orig/xen/include/xen/iommu.h 2011-08-25 08:21:53.000000000 +0200 +++ 2011-08-25/xen/include/xen/iommu.h 2011-08-25 15:06:23.000000000 +0200 @@ -92,6 +92,8 @@ void iommu_pte_flush(struct domain *d, u void iommu_set_pgd(struct domain *d); void iommu_domain_teardown(struct domain *d); +void pt_pci_init(void); + struct pirq; int hvm_do_IRQ_dpci(struct domain *, struct pirq *); int dpci_ioport_intercept(ioreq_t *p); --- 2011-08-25.orig/xen/include/xen/pci.h 2011-08-16 08:15:46.000000000 +0200 +++ 2011-08-25/xen/include/xen/pci.h 2011-08-25 15:06:23.000000000 +0200 @@ -89,6 +89,7 @@ struct pci_dev *pci_lock_pdev(int bus, i struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn); void pci_release_devices(struct domain *d); +int pci_add_segment(u16 seg); int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *); int pci_remove_device(u8 bus, u8 devfn); struct pci_dev *pci_get_pdev(int bus, int devfn);