[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/7] PCI device register/unregister + pci_dev cleanups



Add management and locking of PCI device structures

Add functions for managing pci_dev structures.  Create a list
containing all current pci_devs.  Remove msi_pdev_list.  Create a
read-write lock protecting all pci_dev lists.  Add spinlocks for
pci_dev access.  Do necessary modifications to MSI code.

Signed-off-by: Espen Skoglund <espen.skoglund@xxxxxxxxxxxxx>


--
 b/xen/drivers/passthrough/pci.c             |  124 ++++++++++++++++++++
 xen/arch/x86/i8259.c                        |    3 
 xen/arch/x86/msi.c                          |  172 +++++++++++-----------------
 xen/arch/x86/physdev.c                      |    4 
 xen/drivers/passthrough/Makefile            |    1 
 xen/drivers/passthrough/amd/pci_amd_iommu.c |   72 ++++++-----
 xen/drivers/passthrough/iommu.c             |    5 
 xen/drivers/passthrough/vtd/iommu.c         |   73 ++++++-----
 xen/include/asm-x86/msi.h                   |    4 
 xen/include/xen/iommu.h                     |    9 -
 xen/include/xen/pci.h                       |   23 +++
 11 files changed, 304 insertions(+), 186 deletions(-)
--
diff -r 5b0699fb81a5 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/arch/x86/i8259.c      Fri Jul 04 17:19:39 2008 +0100
@@ -382,7 +382,6 @@
 
 static struct irqaction cascade = { no_action, "cascade", NULL};
 
-extern struct list_head msi_pdev_list;
 void __init init_IRQ(void)
 {
     int i;
@@ -419,7 +418,5 @@
     outb(LATCH >> 8, PIT_CH0);     /* MSB */
 
     setup_irq(2, &cascade);
-
-    INIT_LIST_HEAD(&msi_pdev_list);
 }
 
diff -r 5b0699fb81a5 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/arch/x86/msi.c        Fri Jul 04 17:19:39 2008 +0100
@@ -28,21 +28,6 @@
 #include <xen/iommu.h>
 
 extern int msi_irq_enable;
-
-/* PCI-dev list with MSI/MSIX capabilities */
-DEFINE_SPINLOCK(msi_pdev_lock);
-struct list_head msi_pdev_list;
-
-struct pci_dev *get_msi_pdev(u8 bus, u8 devfn)
-{
-    struct pci_dev *pdev = NULL;
-
-    list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list)
-        if ( pdev->bus == bus && pdev->devfn == devfn )
-            return pdev;
-
-    return NULL;
-}
 
 /* bitmap indicate which fixed map is free */
 DEFINE_SPINLOCK(msix_fixmap_lock);
@@ -112,10 +97,8 @@
     }
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-    struct msi_desc *entry = irq_desc[irq].msi_desc;
-
     switch ( entry->msi_attrib.type )
     {
     case PCI_CAP_ID_MSI:
@@ -147,7 +130,7 @@
     {
         void __iomem *base;
         base = entry->mask_base +
-            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+           entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
 
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -164,9 +147,6 @@
 
 static int set_vector_msi(struct msi_desc *entry)
 {
-    irq_desc_t *desc;
-    unsigned long flags;
-
     if ( entry->vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n",
@@ -174,19 +154,12 @@
         return -EINVAL;
     }
 
-    desc = &irq_desc[entry->vector];
-    spin_lock_irqsave(&desc->lock, flags);
-    desc->msi_desc = entry;
-    spin_unlock_irqrestore(&desc->lock, flags);
-
+    irq_desc[entry->vector].msi_desc = entry;
     return 0;
 }
 
 static int unset_vector_msi(int vector)
 {
-    irq_desc_t *desc;
-    unsigned long flags;
-
     if ( vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
@@ -194,18 +167,12 @@
         return -EINVAL;
     }
 
-    desc = &irq_desc[vector];
-    spin_lock_irqsave(&desc->lock, flags);
-    desc->msi_desc = NULL;
-    spin_unlock_irqrestore(&desc->lock, flags);
-
+    irq_desc[vector].msi_desc = NULL;
     return 0;
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-    struct msi_desc *entry = irq_desc[irq].msi_desc;
-
     if ( vtd_enabled )
         msi_msg_write_remap_rte(entry, msg);
 
@@ -254,6 +221,7 @@
 
 void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+    struct msi_desc *desc = irq_desc[irq].msi_desc;
     struct msi_msg msg;
     unsigned int dest;
 
@@ -263,12 +231,18 @@
         mask = TARGET_CPUS;
     dest = cpu_mask_to_apicid(mask);
 
-    read_msi_msg(irq, &msg);
+    if ( !desc )
+       return;
+
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
+    spin_lock(&desc->dev->lock);
+    read_msi_msg(desc, &msg);
 
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-    write_msi_msg(irq, &msg);
+    write_msi_msg(desc, &msg);
+    spin_unlock(&desc->dev->lock);
 }
 
 static void msi_set_enable(struct pci_dev *dev, int enable)
@@ -290,7 +264,7 @@
     }
 }
 
-void msix_set_enable(struct pci_dev *dev, int enable)
+static void msix_set_enable(struct pci_dev *dev, int enable)
 {
     int pos;
     u16 control;
@@ -335,6 +309,7 @@
 {
     struct msi_desc *entry = irq_desc[irq].msi_desc;
 
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     BUG_ON(!entry || !entry->dev);
     switch (entry->msi_attrib.type) {
     case PCI_CAP_ID_MSI:
@@ -401,7 +376,7 @@
 
     msi_compose_msg(dev, desc->vector, &msg);
     set_vector_msi(desc);
-    write_msi_msg(desc->vector, &msg);
+    write_msi_msg(irq_desc[desc->vector].msi_desc, &msg);
 
     return 0;
 }
@@ -415,8 +390,8 @@
 {
     struct msi_desc *entry;
 
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     entry = irq_desc[vector].msi_desc;
-
     teardown_msi_vector(vector);
 
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -619,35 +594,22 @@
 static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
 {
     int status;
-    struct pci_dev *dev;
+    struct pci_dev *pdev;
 
-    dev = get_msi_pdev(bus, devfn);
-    if ( !dev )
+    pdev = pci_lock_pdev(bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
+
+    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
     {
-        dev = xmalloc(struct pci_dev);
-        if ( !dev )
-            return -ENOMEM;
-        dev->bus = bus;
-        dev->devfn = devfn;
-        INIT_LIST_HEAD(&dev->msi_list);
-    }
-
-    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) )
-    {
+       spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device 
\
             %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
-    status = msi_capability_init(dev, vector);
-
-    if ( dev != get_msi_pdev(bus, devfn) )
-    {
-        spin_lock(&msi_pdev_lock);
-        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
-        spin_unlock(&msi_pdev_lock);
-    }
-
+    status = msi_capability_init(pdev, vector);
+    spin_unlock(&pdev->lock);
     return status;
 }
 
@@ -660,6 +622,13 @@
     u8 bus, slot, func;
 
     entry = irq_desc[vector].msi_desc;
+    if ( !entry )
+       return;
+    /*
+     * Lock here is safe.  msi_desc can not be removed without holding
+     * both irq_desc[].lock (which we do) and pdev->lock.
+     */
+    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -674,6 +643,7 @@
     msi_free_vector(vector);
 
     pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
+    spin_unlock(&dev->lock);
 }
 
 /**
@@ -694,47 +664,35 @@
 static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
 {
     int status, pos, nr_entries;
-    struct pci_dev *dev;
+    struct pci_dev *pdev;
     u16 control;
     u8 slot = PCI_SLOT(devfn);
     u8 func = PCI_FUNC(devfn);
+
+    pdev = pci_lock_pdev(bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
 
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
     control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
     if (entry_nr > nr_entries)
+    {
+       spin_unlock(&pdev->lock);
         return -EINVAL;
-
-    /* Check whether driver already requested for MSI-X irqs */
-    dev = get_msi_pdev(bus, devfn);
-
-    if ( !dev )
-    {
-        dev = xmalloc(struct pci_dev);
-        if ( !dev )
-            return -ENOMEM;
-        dev->bus = bus;
-        dev->devfn = devfn;
-        INIT_LIST_HEAD(&dev->msi_list);
     }
 
-    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) )
+    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
     {
+       spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
                 device %02x:%02x.%01x.\n", vector, bus,
                 PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
-    status = msix_capability_init(dev, vector, entry_nr);
-
-    if ( dev != get_msi_pdev(bus, devfn) )
-    {
-        spin_lock(&msi_pdev_lock);
-        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
-        spin_unlock(&msi_pdev_lock);
-    }
-
+    status = msix_capability_init(pdev, vector, entry_nr);
+    spin_unlock(&pdev->lock);
     return status;
 }
 
@@ -747,6 +705,13 @@
     u8 bus, slot, func;
 
     entry = irq_desc[vector].msi_desc;
+    if ( !entry )
+       return;
+    /*
+     * Lock here is safe.  msi_desc can not be removed without holding
+     * both irq_desc[].lock (which we do) and pdev->lock.
+     */
+    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -761,10 +726,12 @@
     msi_free_vector(vector);
 
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
+    spin_unlock(&dev->lock);
 }
 
 int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
 {
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     if ( msi )
         return __pci_enable_msi(bus, devfn, vector);
     else
@@ -773,9 +740,11 @@
 
 void pci_disable_msi(int vector)
 {
-    irq_desc_t *desc;
+    irq_desc_t *desc = &irq_desc[vector];
+    ASSERT(spin_is_locked(&desc->lock));
+    if ( !desc->msi_desc )
+       return;
 
-    desc = &irq_desc[vector];
     if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
         __pci_disable_msi(vector);
     else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -789,9 +758,17 @@
     irq_desc_t *desc;
     unsigned long flags;
 
+retry:
     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
     {
         desc = &irq_desc[entry->vector];
+
+       local_irq_save(flags);
+       if ( !spin_trylock(&desc->lock) )
+       {
+           local_irq_restore(flags);
+           goto retry;
+       }
 
         spin_lock_irqsave(&desc->lock, flags);
         if ( desc->handler == &pci_msi_type )
@@ -800,22 +777,17 @@
             BUG_ON(desc->status & IRQ_GUEST);
             desc->handler = &no_irq_type;
         }
-        spin_unlock_irqrestore(&desc->lock, flags);
 
         msi_free_vector(entry->vector);
+        spin_unlock_irqrestore(&desc->lock, flags);
     }
 }
 
-void pci_cleanup_msi(u8 bus, u8 devfn)
+void pci_cleanup_msi(struct pci_dev *pdev)
 {
-    struct pci_dev *dev = get_msi_pdev(bus, devfn);
-
-    if ( !dev )
-        return;
-
     /* Disable MSI and/or MSI-X */
-    msi_set_enable(dev, 0);
-    msix_set_enable(dev, 0);
-    msi_free_vectors(dev);
+    msi_set_enable(pdev, 0);
+    msix_set_enable(pdev, 0);
+    msi_free_vectors(pdev);
 }
 
diff -r 5b0699fb81a5 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/arch/x86/physdev.c    Fri Jul 04 17:19:39 2008 +0100
@@ -114,12 +114,12 @@
             gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n",
                      vector);
         desc->handler = &pci_msi_type;
-        spin_unlock_irqrestore(&desc->lock, flags);
 
         ret = pci_enable_msi(map->msi_info.bus,
                                     map->msi_info.devfn, vector,
                                                         map->msi_info.entry_nr,
                                                         map->msi_info.msi);
+        spin_unlock_irqrestore(&desc->lock, flags);
         if ( ret )
             goto done;
     }
@@ -161,10 +161,10 @@
         irq_desc_t *desc;
 
         desc = &irq_desc[vector];
+        spin_lock_irqsave(&desc->lock, flags);
         if ( desc->msi_desc )
             pci_disable_msi(vector);
 
-        spin_lock_irqsave(&desc->lock, flags);
         if ( desc->handler == &pci_msi_type )
         {
             /* MSI is not shared, so should be released already */
diff -r 5b0699fb81a5 xen/drivers/passthrough/Makefile
--- a/xen/drivers/passthrough/Makefile  Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/drivers/passthrough/Makefile  Fri Jul 04 17:19:39 2008 +0100
@@ -3,3 +3,4 @@
 
 obj-y += iommu.o
 obj-y += io.o
+obj-y += pci.o
diff -r 5b0699fb81a5 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Jul 04 17:04:47 
2008 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Jul 04 17:19:39 
2008 +0100
@@ -298,6 +298,7 @@
     u32 l;
     int bdf;
 
+    write_lock(&pcidevs_lock);
     for ( bus = 0; bus < 256; bus++ )
     {
         for ( dev = 0; dev < 32; dev++ )
@@ -310,10 +311,9 @@
                      (l == 0x0000ffff) || (l == 0xffff0000) )
                     continue;
 
-                pdev = xmalloc(struct pci_dev);
-                pdev->bus = bus;
-                pdev->devfn = PCI_DEVFN(dev, func);
-                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+                pdev->domain = d;
+                list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 bdf = (bus << 8) | pdev->devfn;
                 /* supported device? */
@@ -325,6 +325,7 @@
             }
         }
     }
+    write_unlock(&pcidevs_lock);
 }
 
 int amd_iov_detect(void)
@@ -493,38 +494,37 @@
     struct amd_iommu *iommu;
     int bdf;
 
-    for_each_pdev ( source, pdev )
+    pdev = pci_lock_domain_pdev(source, bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
+
+    bdf = (bus << 8) | devfn;
+    /* supported device? */
+    iommu = (bdf < ivrs_bdf_entries) ?
+       find_iommu_for_device(bus, pdev->devfn) : NULL;
+
+    if ( !iommu )
     {
-        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
-            continue;
+       spin_unlock(&pdev->lock);
+       amd_iov_error("Fail to find iommu."
+                     " %x:%x.%x cannot be assigned to domain %d\n", 
+                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
+       return -ENODEV;
+    }
 
-        pdev->bus = bus;
-        pdev->devfn = devfn;
+    amd_iommu_disable_domain_device(source, iommu, bdf);
 
-        bdf = (bus << 8) | devfn;
-        /* supported device? */
-        iommu = (bdf < ivrs_bdf_entries) ?
-            find_iommu_for_device(bus, pdev->devfn) : NULL;
+    write_lock(&pcidevs_lock);
+    list_move(&pdev->domain_list, &target->arch.pdev_list);
+    write_unlock(&pcidevs_lock);
+    pdev->domain = target;
 
-        if ( !iommu )
-        {
-            amd_iov_error("Fail to find iommu."
-                     " %x:%x.%x cannot be assigned to domain %d\n", 
-                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
-            return -ENODEV;
-        }
-
-        amd_iommu_disable_domain_device(source, iommu, bdf);
-        /* Move pci device from the source domain to target domain. */
-        list_move(&pdev->domain_list, &target->arch.pdev_list);
-
-        amd_iommu_setup_domain_device(target, iommu, bdf);
-        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
+    amd_iommu_setup_domain_device(target, iommu, bdf);
+    amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
-        break;
-    }
+    spin_unlock(&pdev->lock);
     return 0;
 }
 
@@ -552,14 +552,16 @@
 static void release_domain_devices(struct domain *d)
 {
     struct pci_dev *pdev;
+    u8 bus, devfn;
 
-    while ( has_arch_pdevs(d) )
+    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
     {
-        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
         pdev_flr(pdev->bus, pdev->devfn);
+       bus = pdev->bus; devfn = pdev->devfn;
+       spin_unlock(&pdev->lock);
         amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
-        reassign_device(d, dom0, pdev->bus, pdev->devfn);
+                    bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        reassign_device(d, dom0, bus, devfn);
     }
 }
 
@@ -619,11 +621,11 @@
     release_domain_devices(d);
 }
 
-static void amd_iommu_return_device(
+static int amd_iommu_return_device(
     struct domain *s, struct domain *t, u8 bus, u8 devfn)
 {
     pdev_flr(bus, devfn);
-    reassign_device(s, t, bus, devfn);
+    return reassign_device(s, t, bus, devfn);
 }
 
 static int amd_iommu_group_id(u8 bus, u8 devfn)
diff -r 5b0699fb81a5 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/drivers/passthrough/iommu.c   Fri Jul 04 17:19:39 2008 +0100
@@ -240,6 +240,7 @@
 
     group_id = ops->get_device_group_id(bus, devfn);
 
+    read_lock(&pcidevs_lock);
     for_each_pdev( d, pdev )
     {
         if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
@@ -252,10 +253,14 @@
             bdf |= (pdev->bus & 0xff) << 16;
             bdf |= (pdev->devfn & 0xff) << 8;
             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+            {
+                read_unlock(&pcidevs_lock);
                 return -1;
+            }
             i++;
         }
     }
+    read_unlock(&pcidevs_lock);
 
     return i;
 }
diff -r 5b0699fb81a5 xen/drivers/passthrough/pci.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/pci.c     Fri Jul 04 17:19:39 2008 +0100
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2008,  Netronome Systems, Inc.
+ *                
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/sched.h>
+#include <xen/pci.h>
+#include <xen/list.h>
+#include <xen/prefetch.h>
+#include <xen/keyhandler.h>
+
+
+LIST_HEAD(alldevs_list);
+rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev;
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+        if ( pdev->bus == bus && pdev->devfn == devfn )
+           return pdev;
+
+    pdev = xmalloc(struct pci_dev);
+    if ( !pdev )
+       return NULL;
+
+    *((u8*) &pdev->bus) = bus;
+    *((u8*) &pdev->devfn) = devfn;
+    pdev->domain = NULL;
+    spin_lock_init(&pdev->lock);
+    INIT_LIST_HEAD(&pdev->msi_list);
+    list_add(&pdev->alldevs_list, &alldevs_list);
+
+    return pdev;
+}
+
+void free_pdev(struct pci_dev *pdev)
+{
+    list_del(&pdev->alldevs_list);
+    xfree(pdev);
+}
+
+struct pci_dev *pci_lock_pdev(int bus, int devfn)
+{
+    struct pci_dev *pdev;
+
+    read_lock(&pcidevs_lock);
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+        if ( (pdev->bus == bus || bus == -1) &&
+            (pdev->devfn == devfn || devfn == -1) )
+       {
+           spin_lock(&pdev->lock);
+           read_unlock(&pcidevs_lock);
+           return pdev;
+       }
+    read_unlock(&pcidevs_lock);
+
+    return NULL;
+}
+
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
+{
+    struct pci_dev *pdev;
+
+    read_lock(&pcidevs_lock);
+    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
+    {
+       spin_lock(&pdev->lock);
+        if ( (pdev->bus == bus || bus == -1) &&
+            (pdev->devfn == devfn || devfn == -1) &&
+            (pdev->domain == d) )
+       {
+           read_unlock(&pcidevs_lock);
+           return pdev;
+       }
+       spin_unlock(&pdev->lock);
+    }
+    read_unlock(&pcidevs_lock);
+
+    return NULL;
+}
+
+static void dump_pci_devices(unsigned char ch)
+{
+    struct pci_dev *pdev;
+    struct msi_desc *msi;
+
+    printk("==== PCI devices ====\n");
+    read_lock(&pcidevs_lock);
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+    {
+       spin_lock(&pdev->lock);
+        printk("%02x:%02x.%x - dom %-3d - MSIs < ",
+               pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+               pdev->domain ? pdev->domain->domain_id : -1);
+       list_for_each_entry ( msi, &pdev->msi_list, list )
+           printk("%d ", msi->vector);
+       printk(">\n");
+       spin_unlock(&pdev->lock);
+    }
+
+    read_unlock(&pcidevs_lock);
+}
+
+static int __init setup_dump_pcidevs(void)
+{
+    register_keyhandler('P', dump_pci_devices, "dump PCI devices");
+    return 0;
+}
+__initcall(setup_dump_pcidevs);
diff -r 5b0699fb81a5 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 04 17:19:39 2008 +0100
@@ -1345,7 +1345,7 @@
     return ret;
 }
 
-void reassign_device_ownership(
+static int reassign_device_ownership(
     struct domain *source,
     struct domain *target,
     u8 bus, u8 devfn)
@@ -1353,61 +1353,62 @@
     struct hvm_iommu *source_hd = domain_hvm_iommu(source);
     struct pci_dev *pdev;
     struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    int status;
-    int found = 0;
+    struct iommu *pdev_iommu;
+    int ret, found = 0;
+
+    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+        return -ENODEV;
 
     pdev_flr(bus, devfn);
-
-    for_each_pdev( source, pdev )
-        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
-            goto found;
-
-    return;
-
-found:
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
-    iommu = drhd->iommu;
+    pdev_iommu = drhd->iommu;
     domain_context_unmap(bus, devfn);
 
-    /* Move pci device from the source domain to target domain. */
+    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
+    write_unlock(&pcidevs_lock);
+    pdev->domain = target;
 
+    ret = domain_context_mapping(target, bus, devfn);
+    spin_unlock(&pdev->lock);
+
+    read_lock(&pcidevs_lock);
     for_each_pdev ( source, pdev )
     {
         drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
-        if ( drhd->iommu == iommu )
+        if ( drhd->iommu == pdev_iommu )
         {
             found = 1;
             break;
         }
     }
+    read_unlock(&pcidevs_lock);
 
     if ( !found )
-        clear_bit(iommu->index, &source_hd->iommu_bitmap);
+        clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
 
-    status = domain_context_mapping(target, bus, devfn);
-    if ( status != 0 )
-        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
+    return ret;
 }
 
 void return_devices_to_dom0(struct domain *d)
 {
     struct pci_dev *pdev;
 
-    while ( has_arch_pdevs(d) )
+    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
     {
-        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
-        pci_cleanup_msi(pdev->bus, pdev->devfn);
+        pci_cleanup_msi(pdev);
+        spin_unlock(&pdev->lock);
         reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
     }
 
 #ifdef VTD_DEBUG
+    read_lock(&pcidevs_lock);
     for_each_pdev ( dom0, pdev )
         dprintk(XENLOG_INFO VTDPREFIX,
                 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
                 dom0->domain_id, pdev->bus,
                 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+    read_unlock(&pcidevs_lock);
 #endif
 }
 
@@ -1568,11 +1569,7 @@
         return ret;
 
     if ( domain_context_mapped(bus, devfn) == 0 )
-    {
         ret = domain_context_mapping(d, bus, devfn);
-        if ( !ret )
-            return 0;
-    }
 
     return ret;
 }
@@ -1586,6 +1583,7 @@
 
     hd = domain_hvm_iommu(d);
 
+    write_lock(&pcidevs_lock);
     for ( bus = 0; bus < 256; bus++ )
     {
         for ( dev = 0; dev < 32; dev++ )
@@ -1597,10 +1595,10 @@
                 if ( (l == 0xffffffff) || (l == 0x00000000) ||
                      (l == 0x0000ffff) || (l == 0xffff0000) )
                     continue;
-                pdev = xmalloc(struct pci_dev);
-                pdev->bus = bus;
-                pdev->devfn = PCI_DEVFN(dev, func);
-                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+
+                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+                pdev->domain = d;
+                list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 ret = domain_context_mapping(d, pdev->bus, pdev->devfn);
                 if ( ret != 0 )
@@ -1609,6 +1607,7 @@
             }
         }
     }
+    write_unlock(&pcidevs_lock);
 }
 
 void clear_fault_bits(struct iommu *iommu)
@@ -1737,9 +1736,11 @@
 {
     struct pci_dev *pdev;
 
-    for_each_pdev( dom0, pdev )
-        if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
-            return 0;
+    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
+    {
+        spin_unlock(&pdev->lock);
+        return 0;
+    }
 
     return 1;
 }
@@ -1751,9 +1752,11 @@
     u16 bdf;
 
     if ( list_empty(&acpi_drhd_units) )
+        return -ENODEV;
+
+    ret = reassign_device_ownership(dom0, d, bus, devfn);
+    if ( ret )
         return ret;
-
-    reassign_device_ownership(dom0, d, bus, devfn);
 
     /* Setup rmrr identity mapping */
     for_each_rmrr_device( rmrr, bdf, i )
diff -r 5b0699fb81a5 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/include/asm-x86/msi.h Fri Jul 04 17:19:39 2008 +0100
@@ -63,12 +63,10 @@
 /* Helper functions */
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
-extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
-extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
 extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
 extern void pci_disable_msi(int vector);
-extern void pci_cleanup_msi(u8 bus, u8 devfn);
+extern void pci_cleanup_msi(struct pci_dev *pdev);
 
 struct msi_desc {
        struct {
diff -r 5b0699fb81a5 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/include/xen/iommu.h   Fri Jul 04 17:19:39 2008 +0100
@@ -56,6 +56,8 @@
     struct intel_iommu *intel;
 };
 
+int iommu_add_device(u8 bus, u8 devfn);
+void iommu_remove_device(u8 bus, u8 devfn);
 int iommu_domain_init(struct domain *d);
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
@@ -63,9 +65,6 @@
 void deassign_device(struct domain *d, u8 bus, u8 devfn);
 int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
-void reassign_device_ownership(struct domain *source,
-                               struct domain *target,
-                               u8 bus, u8 devfn);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
 void iommu_domain_teardown(struct domain *d);
@@ -99,8 +98,8 @@
     void (*teardown)(struct domain *d);
     int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn);
     int (*unmap_page)(struct domain *d, unsigned long gfn);
-    void (*reassign_device)(struct domain *s, struct domain *t,
-                            u8 bus, u8 devfn);
+    int (*reassign_device)(struct domain *s, struct domain *t,
+                          u8 bus, u8 devfn);
     int (*get_device_group_id)(u8 bus, u8 devfn);
 };
 
diff -r 5b0699fb81a5 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Fri Jul 04 17:04:47 2008 +0100
+++ b/xen/include/xen/pci.h     Fri Jul 04 17:19:39 2008 +0100
@@ -10,6 +10,7 @@
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/list.h>
+#include <xen/spinlock.h>
 
 /*
  * The PCI interface treats multi-function devices as independent
@@ -29,15 +30,31 @@
 #define PCI_BDF2(b,df)  (((b & 0xff) << 8) | (df & 0xff))
 
 struct pci_dev {
+    struct list_head alldevs_list;
     struct list_head domain_list;
-    struct list_head msi_dev_list;
-    u8 bus;
-    u8 devfn;
     struct list_head msi_list;
+    struct domain *domain;
+    const u8 bus;
+    const u8 devfn;
+    spinlock_t lock;
 };
 
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
+
+/*
+ * The pcidevs_lock write-lock must be held when doing alloc_pdev() or
+ * free_pdev().  Never de-reference pdev without holding pdev->lock or
+ * pcidevs_lock.  Always aquire pcidevs_lock before pdev->lock when
+ * doing free_pdev().
+ */
+
+extern rwlock_t pcidevs_lock;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn);
+void free_pdev(struct pci_dev *pdev);
+struct pci_dev *pci_lock_pdev(int bus, int devfn);
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
 
 
 uint8_t pci_conf_read8(

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.