WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Add management and locking of PCI device

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Add management and locking of PCI device structures
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 04 Jul 2008 16:20:13 -0700
Delivery-date: Fri, 04 Jul 2008 16:20:29 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1215190344 -3600
# Node ID 7f7d0e7aa01bbd51f4c2045fe6b99a3f0bcd963e
# Parent  bd7f2a120f9446337d1b6a0417eae157e0abe291
Add management and locking of PCI device structures

Add functions for managing pci_dev structures.  Create a list
containing all current pci_devs.  Remove msi_pdev_list.  Create a
read-write lock protecting all pci_dev lists.  Add spinlocks for
pci_dev access.  Do necessary modifications to MSI code.

Signed-off-by: Espen Skoglund <espen.skoglund@xxxxxxxxxxxxx>
---
 xen/arch/x86/i8259.c                        |    7 
 xen/arch/x86/msi.c                          |  202 ++++++++++++----------------
 xen/arch/x86/physdev.c                      |    4 
 xen/drivers/passthrough/Makefile            |    1 
 xen/drivers/passthrough/amd/pci_amd_iommu.c |   84 +++++------
 xen/drivers/passthrough/iommu.c             |    5 
 xen/drivers/passthrough/pci.c               |  124 +++++++++++++++++
 xen/drivers/passthrough/vtd/iommu.c         |   79 +++++-----
 xen/include/asm-x86/msi.h                   |    4 
 xen/include/xen/iommu.h                     |    9 -
 xen/include/xen/pci.h                       |   23 ++-
 11 files changed, 330 insertions(+), 212 deletions(-)

diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/arch/x86/i8259.c      Fri Jul 04 17:52:24 2008 +0100
@@ -382,7 +382,6 @@ void __devinit init_8259A(int auto_eoi)
 
 static struct irqaction cascade = { no_action, "cascade", NULL};
 
-extern struct list_head msi_pdev_list;
 void __init init_IRQ(void)
 {
     int i;
@@ -419,7 +418,5 @@ void __init init_IRQ(void)
     outb(LATCH >> 8, PIT_CH0);     /* MSB */
 
     setup_irq(2, &cascade);
-
-    INIT_LIST_HEAD(&msi_pdev_list);
-}
-
+}
+
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/arch/x86/msi.c        Fri Jul 04 17:52:24 2008 +0100
@@ -29,21 +29,6 @@
 
 extern int msi_irq_enable;
 
-/* PCI-dev list with MSI/MSIX capabilities */
-DEFINE_SPINLOCK(msi_pdev_lock);
-struct list_head msi_pdev_list;
-
-struct pci_dev *get_msi_pdev(u8 bus, u8 devfn)
-{
-    struct pci_dev *pdev = NULL;
-
-    list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list)
-        if ( pdev->bus == bus && pdev->devfn == devfn )
-            return pdev;
-
-    return NULL;
-}
-
 /* bitmap indicate which fixed map is free */
 DEFINE_SPINLOCK(msix_fixmap_lock);
 DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
@@ -112,10 +97,8 @@ static void msi_compose_msg(struct pci_d
     }
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
-    struct msi_desc *entry = irq_desc[irq].msi_desc;
-
+static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
     switch ( entry->msi_attrib.type )
     {
     case PCI_CAP_ID_MSI:
@@ -147,7 +130,7 @@ void read_msi_msg(unsigned int irq, stru
     {
         void __iomem *base;
         base = entry->mask_base +
-            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+           entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
 
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -164,9 +147,6 @@ void read_msi_msg(unsigned int irq, stru
 
 static int set_vector_msi(struct msi_desc *entry)
 {
-    irq_desc_t *desc;
-    unsigned long flags;
-
     if ( entry->vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n",
@@ -174,19 +154,12 @@ static int set_vector_msi(struct msi_des
         return -EINVAL;
     }
 
-    desc = &irq_desc[entry->vector];
-    spin_lock_irqsave(&desc->lock, flags);
-    desc->msi_desc = entry;
-    spin_unlock_irqrestore(&desc->lock, flags);
-
+    irq_desc[entry->vector].msi_desc = entry;
     return 0;
 }
 
 static int unset_vector_msi(int vector)
 {
-    irq_desc_t *desc;
-    unsigned long flags;
-
     if ( vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
@@ -194,18 +167,12 @@ static int unset_vector_msi(int vector)
         return -EINVAL;
     }
 
-    desc = &irq_desc[vector];
-    spin_lock_irqsave(&desc->lock, flags);
-    desc->msi_desc = NULL;
-    spin_unlock_irqrestore(&desc->lock, flags);
-
+    irq_desc[vector].msi_desc = NULL;
     return 0;
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
-    struct msi_desc *entry = irq_desc[irq].msi_desc;
-
+static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
     if ( vtd_enabled )
         msi_msg_write_remap_rte(entry, msg);
 
@@ -254,6 +221,7 @@ void write_msi_msg(unsigned int irq, str
 
 void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+    struct msi_desc *desc = irq_desc[irq].msi_desc;
     struct msi_msg msg;
     unsigned int dest;
 
@@ -263,12 +231,18 @@ void set_msi_irq_affinity(unsigned int i
         mask = TARGET_CPUS;
     dest = cpu_mask_to_apicid(mask);
 
-    read_msi_msg(irq, &msg);
+    if ( !desc )
+       return;
+
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
+    spin_lock(&desc->dev->lock);
+    read_msi_msg(desc, &msg);
 
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-    write_msi_msg(irq, &msg);
+    write_msi_msg(desc, &msg);
+    spin_unlock(&desc->dev->lock);
 }
 
 static void msi_set_enable(struct pci_dev *dev, int enable)
@@ -290,7 +264,7 @@ static void msi_set_enable(struct pci_de
     }
 }
 
-void msix_set_enable(struct pci_dev *dev, int enable)
+static void msix_set_enable(struct pci_dev *dev, int enable)
 {
     int pos;
     u16 control;
@@ -335,6 +309,7 @@ static void msi_set_mask_bit(unsigned in
 {
     struct msi_desc *entry = irq_desc[irq].msi_desc;
 
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     BUG_ON(!entry || !entry->dev);
     switch (entry->msi_attrib.type) {
     case PCI_CAP_ID_MSI:
@@ -401,7 +376,7 @@ static int setup_msi_irq(struct pci_dev 
 
     msi_compose_msg(dev, desc->vector, &msg);
     set_vector_msi(desc);
-    write_msi_msg(desc->vector, &msg);
+    write_msi_msg(irq_desc[desc->vector].msi_desc, &msg);
 
     return 0;
 }
@@ -415,8 +390,8 @@ static void msi_free_vector(int vector)
 {
     struct msi_desc *entry;
 
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     entry = irq_desc[vector].msi_desc;
-
     teardown_msi_vector(vector);
 
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -619,35 +594,22 @@ static int __pci_enable_msi(u8 bus, u8 d
 static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
 {
     int status;
-    struct pci_dev *dev;
-
-    dev = get_msi_pdev(bus, devfn);
-    if ( !dev )
-    {
-        dev = xmalloc(struct pci_dev);
-        if ( !dev )
-            return -ENOMEM;
-        dev->bus = bus;
-        dev->devfn = devfn;
-        INIT_LIST_HEAD(&dev->msi_list);
-    }
-
-    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) )
-    {
+    struct pci_dev *pdev;
+
+    pdev = pci_lock_pdev(bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
+
+    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
+    {
+       spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device 
\
             %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
-    status = msi_capability_init(dev, vector);
-
-    if ( dev != get_msi_pdev(bus, devfn) )
-    {
-        spin_lock(&msi_pdev_lock);
-        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
-        spin_unlock(&msi_pdev_lock);
-    }
-
+    status = msi_capability_init(pdev, vector);
+    spin_unlock(&pdev->lock);
     return status;
 }
 
@@ -660,6 +622,13 @@ static void __pci_disable_msi(int vector
     u8 bus, slot, func;
 
     entry = irq_desc[vector].msi_desc;
+    if ( !entry )
+       return;
+    /*
+     * Lock here is safe.  msi_desc can not be removed without holding
+     * both irq_desc[].lock (which we do) and pdev->lock.
+     */
+    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -674,6 +643,7 @@ static void __pci_disable_msi(int vector
     msi_free_vector(vector);
 
     pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
+    spin_unlock(&dev->lock);
 }
 
 /**
@@ -694,47 +664,35 @@ static int __pci_enable_msix(u8 bus, u8 
 static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
 {
     int status, pos, nr_entries;
-    struct pci_dev *dev;
+    struct pci_dev *pdev;
     u16 control;
     u8 slot = PCI_SLOT(devfn);
     u8 func = PCI_FUNC(devfn);
+
+    pdev = pci_lock_pdev(bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
 
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
     control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
     if (entry_nr > nr_entries)
+    {
+       spin_unlock(&pdev->lock);
         return -EINVAL;
-
-    /* Check whether driver already requested for MSI-X irqs */
-    dev = get_msi_pdev(bus, devfn);
-
-    if ( !dev )
-    {
-        dev = xmalloc(struct pci_dev);
-        if ( !dev )
-            return -ENOMEM;
-        dev->bus = bus;
-        dev->devfn = devfn;
-        INIT_LIST_HEAD(&dev->msi_list);
-    }
-
-    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) )
-    {
+    }
+
+    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
+    {
+       spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
                 device %02x:%02x.%01x.\n", vector, bus,
                 PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
-    status = msix_capability_init(dev, vector, entry_nr);
-
-    if ( dev != get_msi_pdev(bus, devfn) )
-    {
-        spin_lock(&msi_pdev_lock);
-        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
-        spin_unlock(&msi_pdev_lock);
-    }
-
+    status = msix_capability_init(pdev, vector, entry_nr);
+    spin_unlock(&pdev->lock);
     return status;
 }
 
@@ -747,6 +705,13 @@ static void __pci_disable_msix(int vecto
     u8 bus, slot, func;
 
     entry = irq_desc[vector].msi_desc;
+    if ( !entry )
+       return;
+    /*
+     * Lock here is safe.  msi_desc can not be removed without holding
+     * both irq_desc[].lock (which we do) and pdev->lock.
+     */
+    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -761,10 +726,12 @@ static void __pci_disable_msix(int vecto
     msi_free_vector(vector);
 
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
+    spin_unlock(&dev->lock);
 }
 
 int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
 {
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
     if ( msi )
         return __pci_enable_msi(bus, devfn, vector);
     else
@@ -773,9 +740,11 @@ int pci_enable_msi(u8 bus, u8 devfn, int
 
 void pci_disable_msi(int vector)
 {
-    irq_desc_t *desc;
-
-    desc = &irq_desc[vector];
+    irq_desc_t *desc = &irq_desc[vector];
+    ASSERT(spin_is_locked(&desc->lock));
+    if ( !desc->msi_desc )
+       return;
+
     if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
         __pci_disable_msi(vector);
     else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -789,9 +758,17 @@ static void msi_free_vectors(struct pci_
     irq_desc_t *desc;
     unsigned long flags;
 
+retry:
     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
     {
         desc = &irq_desc[entry->vector];
+
+       local_irq_save(flags);
+       if ( !spin_trylock(&desc->lock) )
+       {
+           local_irq_restore(flags);
+           goto retry;
+       }
 
         spin_lock_irqsave(&desc->lock, flags);
         if ( desc->handler == &pci_msi_type )
@@ -800,22 +777,17 @@ static void msi_free_vectors(struct pci_
             BUG_ON(desc->status & IRQ_GUEST);
             desc->handler = &no_irq_type;
         }
+
+        msi_free_vector(entry->vector);
         spin_unlock_irqrestore(&desc->lock, flags);
-
-        msi_free_vector(entry->vector);
-    }
-}
-
-void pci_cleanup_msi(u8 bus, u8 devfn)
-{
-    struct pci_dev *dev = get_msi_pdev(bus, devfn);
-
-    if ( !dev )
-        return;
-
+    }
+}
+
+void pci_cleanup_msi(struct pci_dev *pdev)
+{
     /* Disable MSI and/or MSI-X */
-    msi_set_enable(dev, 0);
-    msix_set_enable(dev, 0);
-    msi_free_vectors(dev);
-}
-
+    msi_set_enable(pdev, 0);
+    msix_set_enable(pdev, 0);
+    msi_free_vectors(pdev);
+}
+
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/arch/x86/physdev.c    Fri Jul 04 17:52:24 2008 +0100
@@ -114,12 +114,12 @@ static int map_domain_pirq(struct domain
             gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n",
                      vector);
         desc->handler = &pci_msi_type;
-        spin_unlock_irqrestore(&desc->lock, flags);
 
         ret = pci_enable_msi(map->msi_info.bus,
                                     map->msi_info.devfn, vector,
                                                         map->msi_info.entry_nr,
                                                         map->msi_info.msi);
+        spin_unlock_irqrestore(&desc->lock, flags);
         if ( ret )
             goto done;
     }
@@ -161,10 +161,10 @@ static int unmap_domain_pirq(struct doma
         irq_desc_t *desc;
 
         desc = &irq_desc[vector];
+        spin_lock_irqsave(&desc->lock, flags);
         if ( desc->msi_desc )
             pci_disable_msi(vector);
 
-        spin_lock_irqsave(&desc->lock, flags);
         if ( desc->handler == &pci_msi_type )
         {
             /* MSI is not shared, so should be released already */
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/Makefile
--- a/xen/drivers/passthrough/Makefile  Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/drivers/passthrough/Makefile  Fri Jul 04 17:52:24 2008 +0100
@@ -3,3 +3,4 @@ subdir-$(x86) += amd
 
 obj-y += iommu.o
 obj-y += io.o
+obj-y += pci.o
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Jul 04 17:51:42 
2008 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Jul 04 17:52:24 
2008 +0100
@@ -298,6 +298,7 @@ static void amd_iommu_setup_dom0_devices
     u32 l;
     int bdf;
 
+    write_lock(&pcidevs_lock);
     for ( bus = 0; bus < 256; bus++ )
     {
         for ( dev = 0; dev < 32; dev++ )
@@ -310,10 +311,9 @@ static void amd_iommu_setup_dom0_devices
                      (l == 0x0000ffff) || (l == 0xffff0000) )
                     continue;
 
-                pdev = xmalloc(struct pci_dev);
-                pdev->bus = bus;
-                pdev->devfn = PCI_DEVFN(dev, func);
-                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+                pdev->domain = d;
+                list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 bdf = (bus << 8) | pdev->devfn;
                 /* supported device? */
@@ -325,6 +325,7 @@ static void amd_iommu_setup_dom0_devices
             }
         }
     }
+    write_unlock(&pcidevs_lock);
 }
 
 int amd_iov_detect(void)
@@ -493,38 +494,37 @@ static int reassign_device( struct domai
     struct amd_iommu *iommu;
     int bdf;
 
-    for_each_pdev ( source, pdev )
-    {
-        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
-            continue;
-
-        pdev->bus = bus;
-        pdev->devfn = devfn;
-
-        bdf = (bus << 8) | devfn;
-        /* supported device? */
-        iommu = (bdf < ivrs_bdf_entries) ?
-            find_iommu_for_device(bus, pdev->devfn) : NULL;
-
-        if ( !iommu )
-        {
-            amd_iov_error("Fail to find iommu."
-                     " %x:%x.%x cannot be assigned to domain %d\n", 
-                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
-            return -ENODEV;
-        }
-
-        amd_iommu_disable_domain_device(source, iommu, bdf);
-        /* Move pci device from the source domain to target domain. */
-        list_move(&pdev->domain_list, &target->arch.pdev_list);
-
-        amd_iommu_setup_domain_device(target, iommu, bdf);
-        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
+    pdev = pci_lock_domain_pdev(source, bus, devfn);
+    if ( !pdev )
+       return -ENODEV;
+
+    bdf = (bus << 8) | devfn;
+    /* supported device? */
+    iommu = (bdf < ivrs_bdf_entries) ?
+       find_iommu_for_device(bus, pdev->devfn) : NULL;
+
+    if ( !iommu )
+    {
+       spin_unlock(&pdev->lock);
+       amd_iov_error("Fail to find iommu."
+                     " %x:%x.%x cannot be assigned to domain %d\n", 
+                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
+       return -ENODEV;
+    }
+
+    amd_iommu_disable_domain_device(source, iommu, bdf);
+
+    write_lock(&pcidevs_lock);
+    list_move(&pdev->domain_list, &target->arch.pdev_list);
+    write_unlock(&pcidevs_lock);
+    pdev->domain = target;
+
+    amd_iommu_setup_domain_device(target, iommu, bdf);
+    amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
-        break;
-    }
+    spin_unlock(&pdev->lock);
     return 0;
 }
 
@@ -552,14 +552,16 @@ static void release_domain_devices(struc
 static void release_domain_devices(struct domain *d)
 {
     struct pci_dev *pdev;
-
-    while ( has_arch_pdevs(d) )
-    {
-        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
+    u8 bus, devfn;
+
+    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+    {
         pdev_flr(pdev->bus, pdev->devfn);
+       bus = pdev->bus; devfn = pdev->devfn;
+       spin_unlock(&pdev->lock);
         amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
-        reassign_device(d, dom0, pdev->bus, pdev->devfn);
+                    bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        reassign_device(d, dom0, bus, devfn);
     }
 }
 
@@ -619,11 +621,11 @@ static void amd_iommu_domain_destroy(str
     release_domain_devices(d);
 }
 
-static void amd_iommu_return_device(
+static int amd_iommu_return_device(
     struct domain *s, struct domain *t, u8 bus, u8 devfn)
 {
     pdev_flr(bus, devfn);
-    reassign_device(s, t, bus, devfn);
+    return reassign_device(s, t, bus, devfn);
 }
 
 static int amd_iommu_group_id(u8 bus, u8 devfn)
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/drivers/passthrough/iommu.c   Fri Jul 04 17:52:24 2008 +0100
@@ -240,6 +240,7 @@ int iommu_get_device_group(struct domain
 
     group_id = ops->get_device_group_id(bus, devfn);
 
+    read_lock(&pcidevs_lock);
     for_each_pdev( d, pdev )
     {
         if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
@@ -252,10 +253,14 @@ int iommu_get_device_group(struct domain
             bdf |= (pdev->bus & 0xff) << 16;
             bdf |= (pdev->devfn & 0xff) << 8;
             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+            {
+                read_unlock(&pcidevs_lock);
                 return -1;
+            }
             i++;
         }
     }
+    read_unlock(&pcidevs_lock);
 
     return i;
 }
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/pci.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/pci.c     Fri Jul 04 17:52:24 2008 +0100
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2008,  Netronome Systems, Inc.
+ *                
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/sched.h>
+#include <xen/pci.h>
+#include <xen/list.h>
+#include <xen/prefetch.h>
+#include <xen/keyhandler.h>
+
+
+LIST_HEAD(alldevs_list);
+rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev;
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+        if ( pdev->bus == bus && pdev->devfn == devfn )
+           return pdev;
+
+    pdev = xmalloc(struct pci_dev);
+    if ( !pdev )
+       return NULL;
+
+    *((u8*) &pdev->bus) = bus;
+    *((u8*) &pdev->devfn) = devfn;
+    pdev->domain = NULL;
+    spin_lock_init(&pdev->lock);
+    INIT_LIST_HEAD(&pdev->msi_list);
+    list_add(&pdev->alldevs_list, &alldevs_list);
+
+    return pdev;
+}
+
+void free_pdev(struct pci_dev *pdev)
+{
+    list_del(&pdev->alldevs_list);
+    xfree(pdev);
+}
+
+struct pci_dev *pci_lock_pdev(int bus, int devfn)
+{
+    struct pci_dev *pdev;
+
+    read_lock(&pcidevs_lock);
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+        if ( (pdev->bus == bus || bus == -1) &&
+            (pdev->devfn == devfn || devfn == -1) )
+       {
+           spin_lock(&pdev->lock);
+           read_unlock(&pcidevs_lock);
+           return pdev;
+       }
+    read_unlock(&pcidevs_lock);
+
+    return NULL;
+}
+
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
+{
+    struct pci_dev *pdev;
+
+    read_lock(&pcidevs_lock);
+    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
+    {
+       spin_lock(&pdev->lock);
+        if ( (pdev->bus == bus || bus == -1) &&
+            (pdev->devfn == devfn || devfn == -1) &&
+            (pdev->domain == d) )
+       {
+           read_unlock(&pcidevs_lock);
+           return pdev;
+       }
+       spin_unlock(&pdev->lock);
+    }
+    read_unlock(&pcidevs_lock);
+
+    return NULL;
+}
+
+static void dump_pci_devices(unsigned char ch)
+{
+    struct pci_dev *pdev;
+    struct msi_desc *msi;
+
+    printk("==== PCI devices ====\n");
+    read_lock(&pcidevs_lock);
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+    {
+       spin_lock(&pdev->lock);
+        printk("%02x:%02x.%x - dom %-3d - MSIs < ",
+               pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+               pdev->domain ? pdev->domain->domain_id : -1);
+       list_for_each_entry ( msi, &pdev->msi_list, list )
+           printk("%d ", msi->vector);
+       printk(">\n");
+       spin_unlock(&pdev->lock);
+    }
+
+    read_unlock(&pcidevs_lock);
+}
+
+static int __init setup_dump_pcidevs(void)
+{
+    register_keyhandler('P', dump_pci_devices, "dump PCI devices");
+    return 0;
+}
+__initcall(setup_dump_pcidevs);
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 04 17:52:24 2008 +0100
@@ -1345,7 +1345,7 @@ static int domain_context_unmap(u8 bus, 
     return ret;
 }
 
-void reassign_device_ownership(
+static int reassign_device_ownership(
     struct domain *source,
     struct domain *target,
     u8 bus, u8 devfn)
@@ -1353,61 +1353,62 @@ void reassign_device_ownership(
     struct hvm_iommu *source_hd = domain_hvm_iommu(source);
     struct pci_dev *pdev;
     struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    int status;
-    int found = 0;
+    struct iommu *pdev_iommu;
+    int ret, found = 0;
+
+    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+        return -ENODEV;
 
     pdev_flr(bus, devfn);
-
-    for_each_pdev( source, pdev )
-        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
-            goto found;
-
-    return;
-
-found:
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
-    iommu = drhd->iommu;
+    pdev_iommu = drhd->iommu;
     domain_context_unmap(bus, devfn);
 
-    /* Move pci device from the source domain to target domain. */
+    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
-
+    write_unlock(&pcidevs_lock);
+    pdev->domain = target;
+
+    ret = domain_context_mapping(target, bus, devfn);
+    spin_unlock(&pdev->lock);
+
+    read_lock(&pcidevs_lock);
     for_each_pdev ( source, pdev )
     {
         drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
-        if ( drhd->iommu == iommu )
+        if ( drhd->iommu == pdev_iommu )
         {
             found = 1;
             break;
         }
     }
+    read_unlock(&pcidevs_lock);
 
     if ( !found )
-        clear_bit(iommu->index, &source_hd->iommu_bitmap);
-
-    status = domain_context_mapping(target, bus, devfn);
-    if ( status != 0 )
-        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
+        clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
+
+    return ret;
 }
 
 void return_devices_to_dom0(struct domain *d)
 {
     struct pci_dev *pdev;
 
-    while ( has_arch_pdevs(d) )
-    {
-        pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
-        pci_cleanup_msi(pdev->bus, pdev->devfn);
+    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+    {
+        pci_cleanup_msi(pdev);
+        spin_unlock(&pdev->lock);
         reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
     }
 
 #ifdef VTD_DEBUG
+    read_lock(&pcidevs_lock);
     for_each_pdev ( dom0, pdev )
         dprintk(XENLOG_INFO VTDPREFIX,
                 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
                 dom0->domain_id, pdev->bus,
                 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+    read_unlock(&pcidevs_lock);
 #endif
 }
 
@@ -1568,11 +1569,7 @@ static int iommu_prepare_rmrr_dev(struct
         return ret;
 
     if ( domain_context_mapped(bus, devfn) == 0 )
-    {
         ret = domain_context_mapping(d, bus, devfn);
-        if ( !ret )
-            return 0;
-    }
 
     return ret;
 }
@@ -1586,6 +1583,7 @@ static void setup_dom0_devices(struct do
 
     hd = domain_hvm_iommu(d);
 
+    write_lock(&pcidevs_lock);
     for ( bus = 0; bus < 256; bus++ )
     {
         for ( dev = 0; dev < 32; dev++ )
@@ -1597,10 +1595,10 @@ static void setup_dom0_devices(struct do
                 if ( (l == 0xffffffff) || (l == 0x00000000) ||
                      (l == 0x0000ffff) || (l == 0xffff0000) )
                     continue;
-                pdev = xmalloc(struct pci_dev);
-                pdev->bus = bus;
-                pdev->devfn = PCI_DEVFN(dev, func);
-                list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+
+                pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+                pdev->domain = d;
+                list_add(&pdev->domain_list, &d->arch.pdev_list);
 
                 ret = domain_context_mapping(d, pdev->bus, pdev->devfn);
                 if ( ret != 0 )
@@ -1609,6 +1607,7 @@ static void setup_dom0_devices(struct do
             }
         }
     }
+    write_unlock(&pcidevs_lock);
 }
 
 void clear_fault_bits(struct iommu *iommu)
@@ -1737,9 +1736,11 @@ int device_assigned(u8 bus, u8 devfn)
 {
     struct pci_dev *pdev;
 
-    for_each_pdev( dom0, pdev )
-        if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
-            return 0;
+    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
+    {
+        spin_unlock(&pdev->lock);
+        return 0;
+    }
 
     return 1;
 }
@@ -1751,9 +1752,11 @@ int intel_iommu_assign_device(struct dom
     u16 bdf;
 
     if ( list_empty(&acpi_drhd_units) )
+        return -ENODEV;
+
+    ret = reassign_device_ownership(dom0, d, bus, devfn);
+    if ( ret )
         return ret;
-
-    reassign_device_ownership(dom0, d, bus, devfn);
 
     /* Setup rmrr identity mapping */
     for_each_rmrr_device( rmrr, bdf, i )
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/include/asm-x86/msi.h Fri Jul 04 17:52:24 2008 +0100
@@ -63,12 +63,10 @@ struct msi_msg {
 /* Helper functions */
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
-extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
-extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
 extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
 extern void pci_disable_msi(int vector);
-extern void pci_cleanup_msi(u8 bus, u8 devfn);
+extern void pci_cleanup_msi(struct pci_dev *pdev);
 
 struct msi_desc {
        struct {
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/include/xen/iommu.h   Fri Jul 04 17:52:24 2008 +0100
@@ -56,6 +56,8 @@ struct iommu {
     struct intel_iommu *intel;
 };
 
+int iommu_add_device(u8 bus, u8 devfn);
+void iommu_remove_device(u8 bus, u8 devfn);
 int iommu_domain_init(struct domain *d);
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
@@ -63,9 +65,6 @@ void deassign_device(struct domain *d, u
 void deassign_device(struct domain *d, u8 bus, u8 devfn);
 int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
-void reassign_device_ownership(struct domain *source,
-                               struct domain *target,
-                               u8 bus, u8 devfn);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
 void iommu_domain_teardown(struct domain *d);
@@ -99,8 +98,8 @@ struct iommu_ops {
     void (*teardown)(struct domain *d);
     int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn);
     int (*unmap_page)(struct domain *d, unsigned long gfn);
-    void (*reassign_device)(struct domain *s, struct domain *t,
-                            u8 bus, u8 devfn);
+    int (*reassign_device)(struct domain *s, struct domain *t,
+                          u8 bus, u8 devfn);
     int (*get_device_group_id)(u8 bus, u8 devfn);
 };
 
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/include/xen/pci.h     Fri Jul 04 17:52:24 2008 +0100
@@ -10,6 +10,7 @@
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/list.h>
+#include <xen/spinlock.h>
 
 /*
  * The PCI interface treats multi-function devices as independent
@@ -29,15 +30,31 @@
 #define PCI_BDF2(b,df)  (((b & 0xff) << 8) | (df & 0xff))
 
 struct pci_dev {
+    struct list_head alldevs_list;
     struct list_head domain_list;
-    struct list_head msi_dev_list;
-    u8 bus;
-    u8 devfn;
     struct list_head msi_list;
+    struct domain *domain;
+    const u8 bus;
+    const u8 devfn;
+    spinlock_t lock;
 };
 
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
+
+/*
+ * The pcidevs_lock write-lock must be held when doing alloc_pdev() or
+ * free_pdev().  Never de-reference pdev without holding pdev->lock or
+ * pcidevs_lock.  Always aquire pcidevs_lock before pdev->lock when
+ * doing free_pdev().
+ */
+
+extern rwlock_t pcidevs_lock;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn);
+void free_pdev(struct pci_dev *pdev);
+struct pci_dev *pci_lock_pdev(int bus, int devfn);
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
 
 
 uint8_t pci_conf_read8(

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Add management and locking of PCI device structures, Xen patchbot-unstable <=