# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1215190344 -3600
# Node ID 7f7d0e7aa01bbd51f4c2045fe6b99a3f0bcd963e
# Parent bd7f2a120f9446337d1b6a0417eae157e0abe291
Add management and locking of PCI device structures
Add functions for managing pci_dev structures. Create a list
containing all current pci_devs. Remove msi_pdev_list. Create a
read-write lock protecting all pci_dev lists. Add spinlocks for
pci_dev access. Do necessary modifications to MSI code.
Signed-off-by: Espen Skoglund <espen.skoglund@xxxxxxxxxxxxx>
---
xen/arch/x86/i8259.c | 7
xen/arch/x86/msi.c | 202 ++++++++++++----------------
xen/arch/x86/physdev.c | 4
xen/drivers/passthrough/Makefile | 1
xen/drivers/passthrough/amd/pci_amd_iommu.c | 84 +++++------
xen/drivers/passthrough/iommu.c | 5
xen/drivers/passthrough/pci.c | 124 +++++++++++++++++
xen/drivers/passthrough/vtd/iommu.c | 79 +++++-----
xen/include/asm-x86/msi.h | 4
xen/include/xen/iommu.h | 9 -
xen/include/xen/pci.h | 23 ++-
11 files changed, 330 insertions(+), 212 deletions(-)
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/arch/x86/i8259.c Fri Jul 04 17:52:24 2008 +0100
@@ -382,7 +382,6 @@ void __devinit init_8259A(int auto_eoi)
static struct irqaction cascade = { no_action, "cascade", NULL};
-extern struct list_head msi_pdev_list;
void __init init_IRQ(void)
{
int i;
@@ -419,7 +418,5 @@ void __init init_IRQ(void)
outb(LATCH >> 8, PIT_CH0); /* MSB */
setup_irq(2, &cascade);
-
- INIT_LIST_HEAD(&msi_pdev_list);
-}
-
+}
+
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/arch/x86/msi.c Fri Jul 04 17:52:24 2008 +0100
@@ -29,21 +29,6 @@
extern int msi_irq_enable;
-/* PCI-dev list with MSI/MSIX capabilities */
-DEFINE_SPINLOCK(msi_pdev_lock);
-struct list_head msi_pdev_list;
-
-struct pci_dev *get_msi_pdev(u8 bus, u8 devfn)
-{
- struct pci_dev *pdev = NULL;
-
- list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list)
- if ( pdev->bus == bus && pdev->devfn == devfn )
- return pdev;
-
- return NULL;
-}
-
/* bitmap indicate which fixed map is free */
DEFINE_SPINLOCK(msix_fixmap_lock);
DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
@@ -112,10 +97,8 @@ static void msi_compose_msg(struct pci_d
}
}
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
- struct msi_desc *entry = irq_desc[irq].msi_desc;
-
+static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
switch ( entry->msi_attrib.type )
{
case PCI_CAP_ID_MSI:
@@ -147,7 +130,7 @@ void read_msi_msg(unsigned int irq, stru
{
void __iomem *base;
base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -164,9 +147,6 @@ void read_msi_msg(unsigned int irq, stru
static int set_vector_msi(struct msi_desc *entry)
{
- irq_desc_t *desc;
- unsigned long flags;
-
if ( entry->vector >= NR_VECTORS )
{
dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n",
@@ -174,19 +154,12 @@ static int set_vector_msi(struct msi_des
return -EINVAL;
}
- desc = &irq_desc[entry->vector];
- spin_lock_irqsave(&desc->lock, flags);
- desc->msi_desc = entry;
- spin_unlock_irqrestore(&desc->lock, flags);
-
+ irq_desc[entry->vector].msi_desc = entry;
return 0;
}
static int unset_vector_msi(int vector)
{
- irq_desc_t *desc;
- unsigned long flags;
-
if ( vector >= NR_VECTORS )
{
dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
@@ -194,18 +167,12 @@ static int unset_vector_msi(int vector)
return -EINVAL;
}
- desc = &irq_desc[vector];
- spin_lock_irqsave(&desc->lock, flags);
- desc->msi_desc = NULL;
- spin_unlock_irqrestore(&desc->lock, flags);
-
+ irq_desc[vector].msi_desc = NULL;
return 0;
}
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
- struct msi_desc *entry = irq_desc[irq].msi_desc;
-
+static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
if ( vtd_enabled )
msi_msg_write_remap_rte(entry, msg);
@@ -254,6 +221,7 @@ void write_msi_msg(unsigned int irq, str
void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
+ struct msi_desc *desc = irq_desc[irq].msi_desc;
struct msi_msg msg;
unsigned int dest;
@@ -263,12 +231,18 @@ void set_msi_irq_affinity(unsigned int i
mask = TARGET_CPUS;
dest = cpu_mask_to_apicid(mask);
- read_msi_msg(irq, &msg);
+ if ( !desc )
+ return;
+
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
+ spin_lock(&desc->dev->lock);
+ read_msi_msg(desc, &msg);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- write_msi_msg(irq, &msg);
+ write_msi_msg(desc, &msg);
+ spin_unlock(&desc->dev->lock);
}
static void msi_set_enable(struct pci_dev *dev, int enable)
@@ -290,7 +264,7 @@ static void msi_set_enable(struct pci_de
}
}
-void msix_set_enable(struct pci_dev *dev, int enable)
+static void msix_set_enable(struct pci_dev *dev, int enable)
{
int pos;
u16 control;
@@ -335,6 +309,7 @@ static void msi_set_mask_bit(unsigned in
{
struct msi_desc *entry = irq_desc[irq].msi_desc;
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
BUG_ON(!entry || !entry->dev);
switch (entry->msi_attrib.type) {
case PCI_CAP_ID_MSI:
@@ -401,7 +376,7 @@ static int setup_msi_irq(struct pci_dev
msi_compose_msg(dev, desc->vector, &msg);
set_vector_msi(desc);
- write_msi_msg(desc->vector, &msg);
+ write_msi_msg(irq_desc[desc->vector].msi_desc, &msg);
return 0;
}
@@ -415,8 +390,8 @@ static void msi_free_vector(int vector)
{
struct msi_desc *entry;
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
entry = irq_desc[vector].msi_desc;
-
teardown_msi_vector(vector);
if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -619,35 +594,22 @@ static int __pci_enable_msi(u8 bus, u8 d
static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
{
int status;
- struct pci_dev *dev;
-
- dev = get_msi_pdev(bus, devfn);
- if ( !dev )
- {
- dev = xmalloc(struct pci_dev);
- if ( !dev )
- return -ENOMEM;
- dev->bus = bus;
- dev->devfn = devfn;
- INIT_LIST_HEAD(&dev->msi_list);
- }
-
- if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) )
- {
+ struct pci_dev *pdev;
+
+ pdev = pci_lock_pdev(bus, devfn);
+ if ( !pdev )
+ return -ENODEV;
+
+ if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
+ {
+ spin_unlock(&pdev->lock);
dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device
\
%02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
return 0;
}
- status = msi_capability_init(dev, vector);
-
- if ( dev != get_msi_pdev(bus, devfn) )
- {
- spin_lock(&msi_pdev_lock);
- list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
- spin_unlock(&msi_pdev_lock);
- }
-
+ status = msi_capability_init(pdev, vector);
+ spin_unlock(&pdev->lock);
return status;
}
@@ -660,6 +622,13 @@ static void __pci_disable_msi(int vector
u8 bus, slot, func;
entry = irq_desc[vector].msi_desc;
+ if ( !entry )
+ return;
+ /*
+ * Lock here is safe. msi_desc can not be removed without holding
+ * both irq_desc[].lock (which we do) and pdev->lock.
+ */
+ spin_lock(&entry->dev->lock);
dev = entry->dev;
bus = dev->bus;
slot = PCI_SLOT(dev->devfn);
@@ -674,6 +643,7 @@ static void __pci_disable_msi(int vector
msi_free_vector(vector);
pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
+ spin_unlock(&dev->lock);
}
/**
@@ -694,47 +664,35 @@ static int __pci_enable_msix(u8 bus, u8
static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
{
int status, pos, nr_entries;
- struct pci_dev *dev;
+ struct pci_dev *pdev;
u16 control;
u8 slot = PCI_SLOT(devfn);
u8 func = PCI_FUNC(devfn);
+
+ pdev = pci_lock_pdev(bus, devfn);
+ if ( !pdev )
+ return -ENODEV;
pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
nr_entries = multi_msix_capable(control);
if (entry_nr > nr_entries)
+ {
+ spin_unlock(&pdev->lock);
return -EINVAL;
-
- /* Check whether driver already requested for MSI-X irqs */
- dev = get_msi_pdev(bus, devfn);
-
- if ( !dev )
- {
- dev = xmalloc(struct pci_dev);
- if ( !dev )
- return -ENOMEM;
- dev->bus = bus;
- dev->devfn = devfn;
- INIT_LIST_HEAD(&dev->msi_list);
- }
-
- if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) )
- {
+ }
+
+ if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
+ {
+ spin_unlock(&pdev->lock);
dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
device %02x:%02x.%01x.\n", vector, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
return 0;
}
- status = msix_capability_init(dev, vector, entry_nr);
-
- if ( dev != get_msi_pdev(bus, devfn) )
- {
- spin_lock(&msi_pdev_lock);
- list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
- spin_unlock(&msi_pdev_lock);
- }
-
+ status = msix_capability_init(pdev, vector, entry_nr);
+ spin_unlock(&pdev->lock);
return status;
}
@@ -747,6 +705,13 @@ static void __pci_disable_msix(int vecto
u8 bus, slot, func;
entry = irq_desc[vector].msi_desc;
+ if ( !entry )
+ return;
+ /*
+ * Lock here is safe. msi_desc can not be removed without holding
+ * both irq_desc[].lock (which we do) and pdev->lock.
+ */
+ spin_lock(&entry->dev->lock);
dev = entry->dev;
bus = dev->bus;
slot = PCI_SLOT(dev->devfn);
@@ -761,10 +726,12 @@ static void __pci_disable_msix(int vecto
msi_free_vector(vector);
pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
+ spin_unlock(&dev->lock);
}
int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
{
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
if ( msi )
return __pci_enable_msi(bus, devfn, vector);
else
@@ -773,9 +740,11 @@ int pci_enable_msi(u8 bus, u8 devfn, int
void pci_disable_msi(int vector)
{
- irq_desc_t *desc;
-
- desc = &irq_desc[vector];
+ irq_desc_t *desc = &irq_desc[vector];
+ ASSERT(spin_is_locked(&desc->lock));
+ if ( !desc->msi_desc )
+ return;
+
if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
__pci_disable_msi(vector);
else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
@@ -789,9 +758,17 @@ static void msi_free_vectors(struct pci_
irq_desc_t *desc;
unsigned long flags;
+retry:
list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
{
desc = &irq_desc[entry->vector];
+
+ local_irq_save(flags);
+ if ( !spin_trylock(&desc->lock) )
+ {
+ local_irq_restore(flags);
+ goto retry;
+ }
spin_lock_irqsave(&desc->lock, flags);
if ( desc->handler == &pci_msi_type )
@@ -800,22 +777,17 @@ static void msi_free_vectors(struct pci_
BUG_ON(desc->status & IRQ_GUEST);
desc->handler = &no_irq_type;
}
+
+ msi_free_vector(entry->vector);
spin_unlock_irqrestore(&desc->lock, flags);
-
- msi_free_vector(entry->vector);
- }
-}
-
-void pci_cleanup_msi(u8 bus, u8 devfn)
-{
- struct pci_dev *dev = get_msi_pdev(bus, devfn);
-
- if ( !dev )
- return;
-
+ }
+}
+
+void pci_cleanup_msi(struct pci_dev *pdev)
+{
/* Disable MSI and/or MSI-X */
- msi_set_enable(dev, 0);
- msix_set_enable(dev, 0);
- msi_free_vectors(dev);
-}
-
+ msi_set_enable(pdev, 0);
+ msix_set_enable(pdev, 0);
+ msi_free_vectors(pdev);
+}
+
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/arch/x86/physdev.c Fri Jul 04 17:52:24 2008 +0100
@@ -114,12 +114,12 @@ static int map_domain_pirq(struct domain
gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n",
vector);
desc->handler = &pci_msi_type;
- spin_unlock_irqrestore(&desc->lock, flags);
ret = pci_enable_msi(map->msi_info.bus,
map->msi_info.devfn, vector,
map->msi_info.entry_nr,
map->msi_info.msi);
+ spin_unlock_irqrestore(&desc->lock, flags);
if ( ret )
goto done;
}
@@ -161,10 +161,10 @@ static int unmap_domain_pirq(struct doma
irq_desc_t *desc;
desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
if ( desc->msi_desc )
pci_disable_msi(vector);
- spin_lock_irqsave(&desc->lock, flags);
if ( desc->handler == &pci_msi_type )
{
/* MSI is not shared, so should be released already */
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/Makefile
--- a/xen/drivers/passthrough/Makefile Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/drivers/passthrough/Makefile Fri Jul 04 17:52:24 2008 +0100
@@ -3,3 +3,4 @@ subdir-$(x86) += amd
obj-y += iommu.o
obj-y += io.o
+obj-y += pci.o
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Jul 04 17:51:42
2008 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Jul 04 17:52:24
2008 +0100
@@ -298,6 +298,7 @@ static void amd_iommu_setup_dom0_devices
u32 l;
int bdf;
+ write_lock(&pcidevs_lock);
for ( bus = 0; bus < 256; bus++ )
{
for ( dev = 0; dev < 32; dev++ )
@@ -310,10 +311,9 @@ static void amd_iommu_setup_dom0_devices
(l == 0x0000ffff) || (l == 0xffff0000) )
continue;
- pdev = xmalloc(struct pci_dev);
- pdev->bus = bus;
- pdev->devfn = PCI_DEVFN(dev, func);
- list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+ pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+ pdev->domain = d;
+ list_add(&pdev->domain_list, &d->arch.pdev_list);
bdf = (bus << 8) | pdev->devfn;
/* supported device? */
@@ -325,6 +325,7 @@ static void amd_iommu_setup_dom0_devices
}
}
}
+ write_unlock(&pcidevs_lock);
}
int amd_iov_detect(void)
@@ -493,38 +494,37 @@ static int reassign_device( struct domai
struct amd_iommu *iommu;
int bdf;
- for_each_pdev ( source, pdev )
- {
- if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
- continue;
-
- pdev->bus = bus;
- pdev->devfn = devfn;
-
- bdf = (bus << 8) | devfn;
- /* supported device? */
- iommu = (bdf < ivrs_bdf_entries) ?
- find_iommu_for_device(bus, pdev->devfn) : NULL;
-
- if ( !iommu )
- {
- amd_iov_error("Fail to find iommu."
- " %x:%x.%x cannot be assigned to domain %d\n",
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
- return -ENODEV;
- }
-
- amd_iommu_disable_domain_device(source, iommu, bdf);
- /* Move pci device from the source domain to target domain. */
- list_move(&pdev->domain_list, &target->arch.pdev_list);
-
- amd_iommu_setup_domain_device(target, iommu, bdf);
- amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
+ pdev = pci_lock_domain_pdev(source, bus, devfn);
+ if ( !pdev )
+ return -ENODEV;
+
+ bdf = (bus << 8) | devfn;
+ /* supported device? */
+ iommu = (bdf < ivrs_bdf_entries) ?
+ find_iommu_for_device(bus, pdev->devfn) : NULL;
+
+ if ( !iommu )
+ {
+ spin_unlock(&pdev->lock);
+ amd_iov_error("Fail to find iommu."
+ " %x:%x.%x cannot be assigned to domain %d\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
+ return -ENODEV;
+ }
+
+ amd_iommu_disable_domain_device(source, iommu, bdf);
+
+ write_lock(&pcidevs_lock);
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ write_unlock(&pcidevs_lock);
+ pdev->domain = target;
+
+ amd_iommu_setup_domain_device(target, iommu, bdf);
+ amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
- break;
- }
+ spin_unlock(&pdev->lock);
return 0;
}
@@ -552,14 +552,16 @@ static void release_domain_devices(struc
static void release_domain_devices(struct domain *d)
{
struct pci_dev *pdev;
-
- while ( has_arch_pdevs(d) )
- {
- pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
+ u8 bus, devfn;
+
+ while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+ {
pdev_flr(pdev->bus, pdev->devfn);
+ bus = pdev->bus; devfn = pdev->devfn;
+ spin_unlock(&pdev->lock);
amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
- pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
- reassign_device(d, dom0, pdev->bus, pdev->devfn);
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ reassign_device(d, dom0, bus, devfn);
}
}
@@ -619,11 +621,11 @@ static void amd_iommu_domain_destroy(str
release_domain_devices(d);
}
-static void amd_iommu_return_device(
+static int amd_iommu_return_device(
struct domain *s, struct domain *t, u8 bus, u8 devfn)
{
pdev_flr(bus, devfn);
- reassign_device(s, t, bus, devfn);
+ return reassign_device(s, t, bus, devfn);
}
static int amd_iommu_group_id(u8 bus, u8 devfn)
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/drivers/passthrough/iommu.c Fri Jul 04 17:52:24 2008 +0100
@@ -240,6 +240,7 @@ int iommu_get_device_group(struct domain
group_id = ops->get_device_group_id(bus, devfn);
+ read_lock(&pcidevs_lock);
for_each_pdev( d, pdev )
{
if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
@@ -252,10 +253,14 @@ int iommu_get_device_group(struct domain
bdf |= (pdev->bus & 0xff) << 16;
bdf |= (pdev->devfn & 0xff) << 8;
if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+ {
+ read_unlock(&pcidevs_lock);
return -1;
+ }
i++;
}
}
+ read_unlock(&pcidevs_lock);
return i;
}
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/pci.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/pci.c Fri Jul 04 17:52:24 2008 +0100
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2008, Netronome Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/sched.h>
+#include <xen/pci.h>
+#include <xen/list.h>
+#include <xen/prefetch.h>
+#include <xen/keyhandler.h>
+
+
+LIST_HEAD(alldevs_list);
+rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
+{
+ struct pci_dev *pdev;
+
+ list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+ if ( pdev->bus == bus && pdev->devfn == devfn )
+ return pdev;
+
+ pdev = xmalloc(struct pci_dev);
+ if ( !pdev )
+ return NULL;
+
+ *((u8*) &pdev->bus) = bus;
+ *((u8*) &pdev->devfn) = devfn;
+ pdev->domain = NULL;
+ spin_lock_init(&pdev->lock);
+ INIT_LIST_HEAD(&pdev->msi_list);
+ list_add(&pdev->alldevs_list, &alldevs_list);
+
+ return pdev;
+}
+
+void free_pdev(struct pci_dev *pdev)
+{
+ list_del(&pdev->alldevs_list);
+ xfree(pdev);
+}
+
+struct pci_dev *pci_lock_pdev(int bus, int devfn)
+{
+ struct pci_dev *pdev;
+
+ read_lock(&pcidevs_lock);
+ list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+ if ( (pdev->bus == bus || bus == -1) &&
+ (pdev->devfn == devfn || devfn == -1) )
+ {
+ spin_lock(&pdev->lock);
+ read_unlock(&pcidevs_lock);
+ return pdev;
+ }
+ read_unlock(&pcidevs_lock);
+
+ return NULL;
+}
+
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
+{
+ struct pci_dev *pdev;
+
+ read_lock(&pcidevs_lock);
+ list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
+ {
+ spin_lock(&pdev->lock);
+ if ( (pdev->bus == bus || bus == -1) &&
+ (pdev->devfn == devfn || devfn == -1) &&
+ (pdev->domain == d) )
+ {
+ read_unlock(&pcidevs_lock);
+ return pdev;
+ }
+ spin_unlock(&pdev->lock);
+ }
+ read_unlock(&pcidevs_lock);
+
+ return NULL;
+}
+
+static void dump_pci_devices(unsigned char ch)
+{
+ struct pci_dev *pdev;
+ struct msi_desc *msi;
+
+ printk("==== PCI devices ====\n");
+ read_lock(&pcidevs_lock);
+
+ list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+ {
+ spin_lock(&pdev->lock);
+ printk("%02x:%02x.%x - dom %-3d - MSIs < ",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ pdev->domain ? pdev->domain->domain_id : -1);
+ list_for_each_entry ( msi, &pdev->msi_list, list )
+ printk("%d ", msi->vector);
+ printk(">\n");
+ spin_unlock(&pdev->lock);
+ }
+
+ read_unlock(&pcidevs_lock);
+}
+
+static int __init setup_dump_pcidevs(void)
+{
+ register_keyhandler('P', dump_pci_devices, "dump PCI devices");
+ return 0;
+}
+__initcall(setup_dump_pcidevs);
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 17:52:24 2008 +0100
@@ -1345,7 +1345,7 @@ static int domain_context_unmap(u8 bus,
return ret;
}
-void reassign_device_ownership(
+static int reassign_device_ownership(
struct domain *source,
struct domain *target,
u8 bus, u8 devfn)
@@ -1353,61 +1353,62 @@ void reassign_device_ownership(
struct hvm_iommu *source_hd = domain_hvm_iommu(source);
struct pci_dev *pdev;
struct acpi_drhd_unit *drhd;
- struct iommu *iommu;
- int status;
- int found = 0;
+ struct iommu *pdev_iommu;
+ int ret, found = 0;
+
+ if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+ return -ENODEV;
pdev_flr(bus, devfn);
-
- for_each_pdev( source, pdev )
- if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
- goto found;
-
- return;
-
-found:
drhd = acpi_find_matched_drhd_unit(bus, devfn);
- iommu = drhd->iommu;
+ pdev_iommu = drhd->iommu;
domain_context_unmap(bus, devfn);
- /* Move pci device from the source domain to target domain. */
+ write_lock(&pcidevs_lock);
list_move(&pdev->domain_list, &target->arch.pdev_list);
-
+ write_unlock(&pcidevs_lock);
+ pdev->domain = target;
+
+ ret = domain_context_mapping(target, bus, devfn);
+ spin_unlock(&pdev->lock);
+
+ read_lock(&pcidevs_lock);
for_each_pdev ( source, pdev )
{
drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
- if ( drhd->iommu == iommu )
+ if ( drhd->iommu == pdev_iommu )
{
found = 1;
break;
}
}
+ read_unlock(&pcidevs_lock);
if ( !found )
- clear_bit(iommu->index, &source_hd->iommu_bitmap);
-
- status = domain_context_mapping(target, bus, devfn);
- if ( status != 0 )
- gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
+ clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
+
+ return ret;
}
void return_devices_to_dom0(struct domain *d)
{
struct pci_dev *pdev;
- while ( has_arch_pdevs(d) )
- {
- pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list);
- pci_cleanup_msi(pdev->bus, pdev->devfn);
+ while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+ {
+ pci_cleanup_msi(pdev);
+ spin_unlock(&pdev->lock);
reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
}
#ifdef VTD_DEBUG
+ read_lock(&pcidevs_lock);
for_each_pdev ( dom0, pdev )
dprintk(XENLOG_INFO VTDPREFIX,
"return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
dom0->domain_id, pdev->bus,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ read_unlock(&pcidevs_lock);
#endif
}
@@ -1568,11 +1569,7 @@ static int iommu_prepare_rmrr_dev(struct
return ret;
if ( domain_context_mapped(bus, devfn) == 0 )
- {
ret = domain_context_mapping(d, bus, devfn);
- if ( !ret )
- return 0;
- }
return ret;
}
@@ -1586,6 +1583,7 @@ static void setup_dom0_devices(struct do
hd = domain_hvm_iommu(d);
+ write_lock(&pcidevs_lock);
for ( bus = 0; bus < 256; bus++ )
{
for ( dev = 0; dev < 32; dev++ )
@@ -1597,10 +1595,10 @@ static void setup_dom0_devices(struct do
if ( (l == 0xffffffff) || (l == 0x00000000) ||
(l == 0x0000ffff) || (l == 0xffff0000) )
continue;
- pdev = xmalloc(struct pci_dev);
- pdev->bus = bus;
- pdev->devfn = PCI_DEVFN(dev, func);
- list_add_tail(&pdev->domain_list, &d->arch.pdev_list);
+
+ pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
+ pdev->domain = d;
+ list_add(&pdev->domain_list, &d->arch.pdev_list);
ret = domain_context_mapping(d, pdev->bus, pdev->devfn);
if ( ret != 0 )
@@ -1609,6 +1607,7 @@ static void setup_dom0_devices(struct do
}
}
}
+ write_unlock(&pcidevs_lock);
}
void clear_fault_bits(struct iommu *iommu)
@@ -1737,9 +1736,11 @@ int device_assigned(u8 bus, u8 devfn)
{
struct pci_dev *pdev;
- for_each_pdev( dom0, pdev )
- if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
- return 0;
+ if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
+ {
+ spin_unlock(&pdev->lock);
+ return 0;
+ }
return 1;
}
@@ -1751,9 +1752,11 @@ int intel_iommu_assign_device(struct dom
u16 bdf;
if ( list_empty(&acpi_drhd_units) )
+ return -ENODEV;
+
+ ret = reassign_device_ownership(dom0, d, bus, devfn);
+ if ( ret )
return ret;
-
- reassign_device_ownership(dom0, d, bus, devfn);
/* Setup rmrr identity mapping */
for_each_rmrr_device( rmrr, bdf, i )
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/include/asm-x86/msi.h Fri Jul 04 17:52:24 2008 +0100
@@ -63,12 +63,10 @@ struct msi_msg {
/* Helper functions */
extern void mask_msi_irq(unsigned int irq);
extern void unmask_msi_irq(unsigned int irq);
-extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
-extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
extern void pci_disable_msi(int vector);
-extern void pci_cleanup_msi(u8 bus, u8 devfn);
+extern void pci_cleanup_msi(struct pci_dev *pdev);
struct msi_desc {
struct {
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/include/xen/iommu.h Fri Jul 04 17:52:24 2008 +0100
@@ -56,6 +56,8 @@ struct iommu {
struct intel_iommu *intel;
};
+int iommu_add_device(u8 bus, u8 devfn);
+void iommu_remove_device(u8 bus, u8 devfn);
int iommu_domain_init(struct domain *d);
void iommu_domain_destroy(struct domain *d);
int device_assigned(u8 bus, u8 devfn);
@@ -63,9 +65,6 @@ void deassign_device(struct domain *d, u
void deassign_device(struct domain *d, u8 bus, u8 devfn);
int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn,
XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
-void reassign_device_ownership(struct domain *source,
- struct domain *target,
- u8 bus, u8 devfn);
int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
int iommu_unmap_page(struct domain *d, unsigned long gfn);
void iommu_domain_teardown(struct domain *d);
@@ -99,8 +98,8 @@ struct iommu_ops {
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn);
int (*unmap_page)(struct domain *d, unsigned long gfn);
- void (*reassign_device)(struct domain *s, struct domain *t,
- u8 bus, u8 devfn);
+ int (*reassign_device)(struct domain *s, struct domain *t,
+ u8 bus, u8 devfn);
int (*get_device_group_id)(u8 bus, u8 devfn);
};
diff -r bd7f2a120f94 -r 7f7d0e7aa01b xen/include/xen/pci.h
--- a/xen/include/xen/pci.h Fri Jul 04 17:51:42 2008 +0100
+++ b/xen/include/xen/pci.h Fri Jul 04 17:52:24 2008 +0100
@@ -10,6 +10,7 @@
#include <xen/config.h>
#include <xen/types.h>
#include <xen/list.h>
+#include <xen/spinlock.h>
/*
* The PCI interface treats multi-function devices as independent
@@ -29,15 +30,31 @@
#define PCI_BDF2(b,df) (((b & 0xff) << 8) | (df & 0xff))
struct pci_dev {
+ struct list_head alldevs_list;
struct list_head domain_list;
- struct list_head msi_dev_list;
- u8 bus;
- u8 devfn;
struct list_head msi_list;
+ struct domain *domain;
+ const u8 bus;
+ const u8 devfn;
+ spinlock_t lock;
};
#define for_each_pdev(domain, pdev) \
list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
+
+/*
+ * The pcidevs_lock write-lock must be held when doing alloc_pdev() or
+ * free_pdev(). Never de-reference pdev without holding pdev->lock or
+ * pcidevs_lock. Always aquire pcidevs_lock before pdev->lock when
+ * doing free_pdev().
+ */
+
+extern rwlock_t pcidevs_lock;
+
+struct pci_dev *alloc_pdev(u8 bus, u8 devfn);
+void free_pdev(struct pci_dev *pdev);
+struct pci_dev *pci_lock_pdev(int bus, int devfn);
+struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
uint8_t pci_conf_read8(
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|