[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 6/8] IOMMU: add phantom function support



Apart from generating device context entries for the base function,
all phantom functions also need context entries to be generated for
them.

In order to distinguish different use cases, a variant of
pci_get_pdev() is being introduced that, even when passed a phantom
function number, would return the underlying actual device.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
         return;
 
     for_each_pdev( d, pdev )
-        amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
+    {
+        u8 devfn = pdev->devfn;
+
+        do {
+            amd_iommu_flush_iotlb(devfn, pdev, gaddr, order);
+            devfn += pdev->phantom_stride;
+        } while ( devfn != pdev->devfn &&
+                  PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+    }
 }
 
 /* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -667,7 +667,7 @@ void parse_ppr_log_entry(struct amd_iomm
     devfn = PCI_DEVFN2(device_id);
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev(iommu->seg, bus, devfn);
+    pdev = pci_get_real_pdev(iommu->seg, bus, devfn);
     spin_unlock(&pcidevs_lock);
 
     if ( pdev )
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -598,7 +598,6 @@ static int update_paging_mode(struct dom
         for_each_pdev( d, pdev )
         {
             bdf = PCI_BDF2(pdev->bus, pdev->devfn);
-            req_id = get_dma_requestor_id(pdev->seg, bdf);
             iommu = find_iommu_for_device(pdev->seg, bdf);
             if ( !iommu )
             {
@@ -607,16 +606,21 @@ static int update_paging_mode(struct dom
             }
 
             spin_lock_irqsave(&iommu->lock, flags);
-            device_entry = iommu->dev_table.buffer +
-                           (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
-
-            /* valid = 0 only works for dom0 passthrough mode */
-            amd_iommu_set_root_page_table((u32 *)device_entry,
-                                          page_to_maddr(hd->root_table),
-                                          hd->domain_id,
-                                          hd->paging_mode, 1);
-
-            amd_iommu_flush_device(iommu, req_id);
+            do {
+                req_id = get_dma_requestor_id(pdev->seg, bdf);
+                device_entry = iommu->dev_table.buffer +
+                               (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+                /* valid = 0 only works for dom0 passthrough mode */
+                amd_iommu_set_root_page_table((u32 *)device_entry,
+                                              page_to_maddr(hd->root_table),
+                                              hd->domain_id,
+                                              hd->paging_mode, 1);
+
+                amd_iommu_flush_device(iommu, req_id);
+                bdf += pdev->phantom_stride;
+            } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+                      PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
             spin_unlock_irqrestore(&iommu->lock, flags);
         }
 
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -158,6 +158,8 @@ void __init iommu_dom0_init(struct domai
 int iommu_add_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+    int rc;
+    u8 devfn;
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -168,7 +170,20 @@ int iommu_add_device(struct pci_dev *pde
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
-    return hd->platform_ops->add_device(pdev->devfn, pdev);
+    rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+    if ( rc || !pdev->phantom_stride )
+        return rc;
+
+    for ( devfn = pdev->devfn ; ; )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            return 0;
+        rc = hd->platform_ops->add_device(devfn, pdev);
+        if ( rc )
+            printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+    }
 }
 
 int iommu_enable_device(struct pci_dev *pdev)
@@ -191,6 +206,8 @@ int iommu_enable_device(struct pci_dev *
 int iommu_remove_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+    u8 devfn;
+
     if ( !pdev->domain )
         return -EINVAL;
 
@@ -198,6 +215,22 @@ int iommu_remove_device(struct pci_dev *
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
+    for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+    {
+        int rc;
+
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->remove_device(devfn, pdev);
+        if ( !rc )
+            continue;
+
+        printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+               pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+        return rc;
+    }
+
     return hd->platform_ops->remove_device(pdev->devfn, pdev);
 }
 
@@ -245,6 +278,18 @@ static int assign_device(struct domain *
     if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
         goto done;
 
+    for ( ; pdev->phantom_stride; rc = 0 )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->assign_device(d, devfn, pdev);
+        if ( rc )
+            printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed 
(%d)\n",
+                   d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   rc);
+    }
+
     if ( has_arch_pdevs(d) && !need_iommu(d) )
     {
         d->need_iommu = 1;
@@ -377,6 +422,21 @@ int deassign_device(struct domain *d, u1
     if ( !pdev )
         return -ENODEV;
 
+    while ( pdev->phantom_stride )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+        if ( !ret )
+            continue;
+
+        printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+               d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+        return ret;
+    }
+
+    devfn = pdev->devfn;
     ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
     if ( ret )
     {
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -144,6 +144,8 @@ static struct pci_dev *alloc_pdev(struct
     /* update bus2bridge */
     switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
     {
+        int pos;
+        u16 cap;
         u8 sec_bus, sub_bus;
 
         case DEV_TYPE_PCIe_BRIDGE:
@@ -167,6 +169,20 @@ static struct pci_dev *alloc_pdev(struct
             break;
 
         case DEV_TYPE_PCIe_ENDPOINT:
+            pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
+                                      PCI_FUNC(devfn), PCI_CAP_ID_EXP);
+            BUG_ON(!pos);
+            cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
+                                  PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
+            if ( cap & PCI_EXP_DEVCAP_PHANTOM )
+            {
+                pdev->phantom_stride = 8 >> MASK_EXTR(cap,
+                                                      PCI_EXP_DEVCAP_PHANTOM);
+                if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
+                    pdev->phantom_stride = 0;
+            }
+            break;
+
         case DEV_TYPE_PCI:
             break;
 
@@ -290,6 +306,27 @@ struct pci_dev *pci_get_pdev(int seg, in
     return NULL;
 }
 
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+{
+    struct pci_dev *pdev;
+    int stride;
+
+    if ( seg < 0 || bus < 0 || devfn < 0 )
+        return NULL;
+
+    for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
+          !pdev && stride; stride >>= 1 )
+    {
+        if ( !(devfn & (8 - stride)) )
+            continue;
+        pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
+        if ( pdev && stride != pdev->phantom_stride )
+            pdev = NULL;
+    }
+
+    return pdev;
+}
+
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *d, int seg, int bus, int devfn)
 {
@@ -488,8 +525,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
-           seg, bus, slot, func);
+    if ( !ret )
+    {
+        printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+               seg, bus, slot, func);
+        while ( pdev->phantom_stride )
+        {
+            func += pdev->phantom_stride;
+            if ( PCI_SLOT(func) )
+                break;
+            printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
+                   seg, bus, slot, func);
+        }
+    }
     return ret;
 }
 
@@ -681,7 +729,7 @@ void pci_check_disable_device(u16 seg, u
     u16 cword;
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev(seg, bus, devfn);
+    pdev = pci_get_real_pdev(seg, bus, devfn);
     if ( pdev )
     {
         if ( now < pdev->fault.time ||
@@ -698,6 +746,7 @@ void pci_check_disable_device(u16 seg, u
 
     /* Tell the device to stop DMAing; we can't rely on the guest to
      * control it for us. */
+    devfn = pdev->devfn;
     cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                             PCI_COMMAND);
     pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -759,6 +808,27 @@ struct setup_dom0 {
     int (*handler)(u8 devfn, struct pci_dev *);
 };
 
+static void setup_one_dom0_device(const struct setup_dom0 *ctxt,
+                                  struct pci_dev *pdev)
+{
+    u8 devfn = pdev->devfn;
+
+    do {
+        int err = ctxt->handler(devfn, pdev);
+
+        if ( err )
+        {
+            printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   ctxt->d->domain_id, err);
+            if ( devfn == pdev->devfn )
+                return;
+        }
+        devfn += pdev->phantom_stride;
+    } while ( devfn != pdev->devfn &&
+              PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+}
+
 static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
 {
     struct setup_dom0 *ctxt = arg;
@@ -777,12 +847,12 @@ static int __init _setup_dom0_pci_device
             {
                 pdev->domain = ctxt->d;
                 list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
-                ctxt->handler(devfn, pdev);
+                setup_one_dom0_device(ctxt, pdev);
             }
             else if ( pdev->domain == dom_xen )
             {
                 pdev->domain = ctxt->d;
-                ctxt->handler(devfn, pdev);
+                setup_one_dom0_device(ctxt, pdev);
                 pdev->domain = dom_xen;
             }
             else if ( pdev->domain != ctxt->d )
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -58,6 +58,9 @@ do {                                    
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))
 
+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
 #define reserve_bootmem(_p,_l) ((void)0)
 
 struct domain;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -63,6 +63,8 @@ struct pci_dev {
     const u8 bus;
     const u8 devfn;
 
+    u8 phantom_stride;
+
     enum pdev_type {
         DEV_TYPE_PCI_UNKNOWN,
         DEV_TYPE_PCIe_ENDPOINT,
@@ -114,6 +116,7 @@ int pci_ro_device(int seg, int bus, int 
 void arch_pci_ro_device(int seg, int bdf);
 int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *, int seg, int bus, int devfn);
 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);


Attachment: IOMMU-phantom-dev.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.