WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH, RFC 2/7] PCI multi-seg: add new physdevop-s

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH, RFC 2/7] PCI multi-seg: add new physdevop-s
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Thu, 25 Aug 2011 15:56:34 +0100
Delivery-date: Thu, 25 Aug 2011 07:56:57 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
The new PHYSDEVOP_pci_device_add is intended to be extensible, with a
first extension (to pass the proximity domain of a device) added right
away.

A couple of directly related functions at once get adjusted to account
for the segment number.

Is the way this re-uses PHYSDEVOP_map_pirq acceptable, or should we
define a replacement one?

Should we deprecate the PHYSDEVOP_manage_pci_* sub-hypercalls?

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2011-08-25.orig/xen/arch/ia64/xen/hypercall.c       2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/arch/ia64/xen/hypercall.c    2011-08-25 15:06:35.000000000 
+0200
@@ -665,7 +665,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
+        ret = pci_add_device(0, manage_pci.bus, manage_pci.devfn, NULL);
         break;
     }
 
@@ -678,7 +678,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_remove_device(manage_pci.bus, manage_pci.devfn);
+        ret = pci_remove_device(0, manage_pci.bus, manage_pci.devfn);
             break;
     }
 
@@ -698,7 +698,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
         pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
         pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
         pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
-        ret = pci_add_device(manage_pci_ext.bus,
+        ret = pci_add_device(0, manage_pci_ext.bus,
                              manage_pci_ext.devfn,
                              &pdev_info);
         break;
--- 2011-08-25.orig/xen/arch/x86/irq.c  2011-08-25 15:05:26.000000000 +0200
+++ 2011-08-25/xen/arch/x86/irq.c       2011-08-25 15:33:18.000000000 +0200
@@ -1655,7 +1655,7 @@ int map_domain_pirq(
         if ( !cpu_has_apic )
             goto done;
 
-        pdev = pci_get_pdev(msi->bus, msi->devfn);
+        pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
         ret = pci_enable_msi(msi, &msi_desc);
         if ( ret )
             goto done;
--- 2011-08-25.orig/xen/arch/x86/msi.c  2011-08-25 15:05:26.000000000 +0200
+++ 2011-08-25/xen/arch/x86/msi.c       2011-08-25 15:06:35.000000000 +0200
@@ -528,7 +528,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
 
     if ( vf >= 0 )
     {
-        struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
+        struct pci_dev *pdev = pci_get_pdev(0, bus, PCI_DEVFN(slot, func));
         unsigned int pos = pci_find_ext_capability(0, bus,
                                                    PCI_DEVFN(slot, func),
                                                    PCI_EXT_CAP_ID_SRIOV);
@@ -767,7 +767,7 @@ static int __pci_enable_msi(struct msi_i
     struct msi_desc *old_desc;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev(msi->bus, msi->devfn);
+    pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
@@ -775,7 +775,8 @@ static int __pci_enable_msi(struct msi_i
     if ( old_desc )
     {
         dprintk(XENLOG_WARNING, "irq %d has already mapped to MSI on "
-                "device %02x:%02x.%01x.\n", msi->irq, msi->bus,
+                "device %04x:%02x:%02x.%01x\n",
+                msi->irq, msi->seg, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         *desc = old_desc;
         return 0;
@@ -785,7 +786,7 @@ static int __pci_enable_msi(struct msi_i
     if ( old_desc )
     {
         dprintk(XENLOG_WARNING, "MSI-X is already in use on "
-                "device %02x:%02x.%01x\n", msi->bus,
+                "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         pci_disable_msi(old_desc);
     }
@@ -830,7 +831,7 @@ static int __pci_enable_msix(struct msi_
     struct msi_desc *old_desc;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev(msi->bus, msi->devfn);
+    pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
@@ -844,7 +845,8 @@ static int __pci_enable_msix(struct msi_
     if ( old_desc )
     {
         dprintk(XENLOG_WARNING, "irq %d has already mapped to MSIX on "
-                "device %02x:%02x.%01x.\n", msi->irq, msi->bus,
+                "device %04x:%02x:%02x.%01x\n",
+                msi->irq, msi->seg, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         *desc = old_desc;
         return 0;
@@ -854,7 +856,7 @@ static int __pci_enable_msix(struct msi_
     if ( old_desc )
     {
         dprintk(XENLOG_WARNING, "MSI is already in use on "
-                "device %02x:%02x.%01x\n", msi->bus,
+                "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         pci_disable_msi(old_desc);
 
@@ -962,8 +964,10 @@ int pci_restore_msi_state(struct pci_dev
 
         if (desc->msi_desc != entry)
         {
-            dprintk(XENLOG_ERR, "Restore MSI for dev %x:%x not set before?\n",
-                                pdev->bus, pdev->devfn);
+            dprintk(XENLOG_ERR,
+                    "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
+                    pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                    PCI_FUNC(pdev->devfn));
             spin_unlock_irqrestore(&desc->lock, flags);
             return -EINVAL;
         }
--- 2011-08-25.orig/xen/arch/x86/physdev.c      2011-08-25 15:32:13.000000000 
+0200
+++ 2011-08-25/xen/arch/x86/physdev.c   2011-08-25 15:06:35.000000000 +0200
@@ -360,6 +360,15 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&map, arg, 1) != 0 )
             break;
 
+        if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+        {
+            map.type = MAP_PIRQ_TYPE_MSI;
+            msi.seg = map.bus >> 16;
+        }
+        else
+        {
+            msi.seg = 0;
+        }
         msi.bus = map.bus;
         msi.devfn = map.devfn;
         msi.entry_nr = map.entry_nr;
@@ -483,7 +492,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
+        ret = pci_add_device(0, manage_pci.bus, manage_pci.devfn, NULL);
         break;
     }
 
@@ -496,7 +505,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_remove_device(manage_pci.bus, manage_pci.devfn);
+        ret = pci_remove_device(0, manage_pci.bus, manage_pci.devfn);
         break;
     }
 
@@ -520,12 +529,52 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
         pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
         pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
-        ret = pci_add_device(manage_pci_ext.bus,
+        ret = pci_add_device(0, manage_pci_ext.bus,
                              manage_pci_ext.devfn,
                              &pdev_info);
         break;
     }
 
+    case PHYSDEVOP_pci_device_add: {
+        struct physdev_pci_device_add add;
+        struct pci_dev_info pdev_info;
+
+        ret = -EPERM;
+        if ( !IS_PRIV(current->domain) )
+            break;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&add, arg, 1) != 0 )
+            break;
+
+        pdev_info.is_extfn = !!(add.flags & XEN_PCI_DEV_EXTFN);
+        if ( add.flags & XEN_PCI_DEV_VIRTFN )
+        {
+            pdev_info.is_virtfn = 1;
+            pdev_info.physfn.bus = add.physfn.bus;
+            pdev_info.physfn.devfn = add.physfn.devfn;
+        }
+        else
+            pdev_info.is_virtfn = 0;
+        ret = pci_add_device(add.seg, add.bus, add.devfn, &pdev_info);
+        break;
+    }
+
+    case PHYSDEVOP_pci_device_remove: {
+        struct physdev_pci_device dev;
+
+        ret = -EPERM;
+        if ( !IS_PRIV(v->domain) )
+            break;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&dev, arg, 1) != 0 )
+            break;
+
+        ret = pci_remove_device(dev.seg, dev.bus, dev.devfn);
+        break;
+    }
+
 #ifdef __x86_64__
     case PHYSDEVOP_pci_mmcfg_reserved: {
         struct physdev_pci_mmcfg_reserved info;
@@ -557,11 +606,31 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
             break;
 
         spin_lock(&pcidevs_lock);
-        pdev = pci_get_pdev(restore_msi.bus, restore_msi.devfn);
+        pdev = pci_get_pdev(0, restore_msi.bus, restore_msi.devfn);
+        ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV;
+        spin_unlock(&pcidevs_lock);
+        break;
+    }
+
+    case PHYSDEVOP_restore_msi_ext: {
+        struct physdev_pci_device dev;
+        struct pci_dev *pdev;
+
+        ret = -EPERM;
+        if ( !IS_PRIV(v->domain) )
+            break;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&dev, arg, 1) != 0 )
+            break;
+
+        spin_lock(&pcidevs_lock);
+        pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn);
         ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV;
         spin_unlock(&pcidevs_lock);
         break;
     }
+
     case PHYSDEVOP_setup_gsi: {
         struct physdev_setup_gsi setup_gsi;
 
--- 2011-08-25.orig/xen/arch/x86/x86_64/physdev.c       2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/arch/x86/x86_64/physdev.c    2011-08-25 15:06:35.000000000 
+0200
@@ -67,6 +67,14 @@ CHECK_physdev_get_free_pirq;
 CHECK_physdev_pci_mmcfg_reserved;
 #undef xen_physdev_pci_mmcfg_reserved
 
+#define xen_physdev_pci_device_add physdev_pci_device_add
+CHECK_physdev_pci_device_add
+#undef xen_physdev_pci_device_add
+
+#define xen_physdev_pci_device physdev_pci_device
+CHECK_physdev_pci_device
+#undef xen_physdev_pci_device
+
 #define COMPAT
 #undef guest_handle_okay
 #define guest_handle_okay          compat_handle_okay
--- 2011-08-25.orig/xen/drivers/passthrough/amd/pci_amd_iommu.c 2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/drivers/passthrough/amd/pci_amd_iommu.c      2011-08-25 
15:06:35.000000000 +0200
@@ -131,7 +131,7 @@ static void __init amd_iommu_setup_dom0_
     {
         for ( devfn = 0; devfn < 256; devfn++ )
         {
-            pdev = pci_get_pdev(bus, devfn);
+            pdev = pci_get_pdev(0, bus, devfn);
             if ( !pdev )
                 continue;
 
@@ -296,7 +296,7 @@ static int reassign_device( struct domai
     struct hvm_iommu *t = domain_hvm_iommu(target);
 
     ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev_by_domain(source, bus, devfn);
+    pdev = pci_get_pdev_by_domain(source, 0, bus, devfn);
     if ( !pdev )
         return -ENODEV;
 
--- 2011-08-25.orig/xen/drivers/passthrough/iommu.c     2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/drivers/passthrough/iommu.c  2011-08-25 15:06:35.000000000 
+0200
@@ -282,7 +282,7 @@ int deassign_device(struct domain *d, u8
         return -EINVAL;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev(bus, devfn);
+    pdev = pci_get_pdev(0, bus, devfn);
     if ( !pdev )
         return -ENODEV;
 
--- 2011-08-25.orig/xen/drivers/passthrough/pci.c       2011-08-25 
15:06:23.000000000 +0200
+++ 2011-08-25/xen/drivers/passthrough/pci.c    2011-08-25 15:06:35.000000000 
+0200
@@ -121,6 +121,7 @@ static struct pci_dev *alloc_pdev(struct
         return NULL;
     memset(pdev, 0, sizeof(struct pci_dev));
 
+    *(u16*) &pdev->seg = pseg->nr;
     *((u8*) &pdev->bus) = bus;
     *((u8*) &pdev->devfn) = devfn;
     pdev->domain = NULL;
@@ -137,41 +138,59 @@ static void free_pdev(struct pci_dev *pd
     xfree(pdev);
 }
 
-struct pci_dev *pci_get_pdev(int bus, int devfn)
+struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
 {
-    struct pci_seg *pseg = get_pseg(0);
+    struct pci_seg *pseg = get_pseg(seg);
     struct pci_dev *pdev = NULL;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
+    ASSERT(seg != -1 || bus == -1);
+    ASSERT(bus != -1 || devfn == -1);
 
     if ( !pseg )
-        return NULL;
+    {
+        if ( seg == -1 )
+            radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
+        if ( !pseg )
+            return NULL;
+    }
 
-    list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
-        if ( (pdev->bus == bus || bus == -1) &&
-             (pdev->devfn == devfn || devfn == -1) )
-        {
-            return pdev;
-        }
+    do {
+        list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+            if ( (pdev->bus == bus || bus == -1) &&
+                 (pdev->devfn == devfn || devfn == -1) )
+                return pdev;
+    } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
+                                     pseg->nr + 1, 1) );
 
     return NULL;
 }
 
-struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn)
+struct pci_dev *pci_get_pdev_by_domain(
+    struct domain *d, int seg, int bus, int devfn)
 {
-    struct pci_seg *pseg = get_pseg(0);
+    struct pci_seg *pseg = get_pseg(seg);
     struct pci_dev *pdev = NULL;
 
+    ASSERT(seg != -1 || bus == -1);
+    ASSERT(bus != -1 || devfn == -1);
+
     if ( !pseg )
-        return NULL;
+    {
+        if ( seg == -1 )
+            radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
+        if ( !pseg )
+            return NULL;
+    }
 
-    list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
-         if ( (pdev->bus == bus || bus == -1) &&
-              (pdev->devfn == devfn || devfn == -1) &&
-              (pdev->domain == d) )
-         {
-             return pdev;
-         }
+    do {
+        list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+            if ( (pdev->bus == bus || bus == -1) &&
+                 (pdev->devfn == devfn || devfn == -1) &&
+                 (pdev->domain == d) )
+                return pdev;
+    } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
+                                     pseg->nr + 1, 1) );
 
     return NULL;
 }
@@ -215,7 +234,7 @@ void pci_enable_acs(struct pci_dev *pdev
     pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
 }
 
-int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
+int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *info)
 {
     struct pci_seg *pseg;
     struct pci_dev *pdev;
@@ -230,17 +249,20 @@ int pci_add_device(u8 bus, u8 devfn, con
     else if (info->is_virtfn)
     {
         spin_lock(&pcidevs_lock);
-        pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
+        pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn);
         spin_unlock(&pcidevs_lock);
         if ( !pdev )
-            pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
+            pci_add_device(seg, info->physfn.bus, info->physfn.devfn, NULL);
         pdev_type = "virtual function";
     }
     else
-        return -EINVAL;
+    {
+        info = NULL;
+        pdev_type = "device";
+    }
 
     spin_lock(&pcidevs_lock);
-    pseg = alloc_pseg(0);
+    pseg = alloc_pseg(seg);
     if ( !pseg )
         goto out;
     pdev = alloc_pdev(pseg, bus, devfn);
@@ -251,7 +273,7 @@ int pci_add_device(u8 bus, u8 devfn, con
         pdev->info = *info;
     else if ( !pdev->vf_rlen[0] )
     {
-        unsigned int pos = pci_find_ext_capability(0, bus, devfn,
+        unsigned int pos = pci_find_ext_capability(seg, bus, devfn,
                                                    PCI_EXT_CAP_ID_SRIOV);
         u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
 
@@ -271,9 +293,10 @@ int pci_add_device(u8 bus, u8 devfn, con
                 if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
                      PCI_BASE_ADDRESS_SPACE_IO )
                 {
-                    printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf"
-                                          " BAR%u in IO space\n",
-                           bus, slot, func, i);
+                    printk(XENLOG_WARNING
+                           "SR-IOV device %04x:%02x:%02x.%u with vf BAR%u"
+                           " in IO space\n",
+                           seg, bus, slot, func, i);
                     continue;
                 }
                 pci_conf_write32(bus, slot, func, idx, ~0);
@@ -282,9 +305,10 @@ int pci_add_device(u8 bus, u8 devfn, con
                 {
                     if ( i >= PCI_SRIOV_NUM_BARS )
                     {
-                        printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with"
-                                              " 64-bit vf BAR in last slot\n",
-                               bus, slot, func);
+                        printk(XENLOG_WARNING
+                               "SR-IOV device %04x:%02x:%02x.%u with 64-bit"
+                               " vf BAR in last slot\n",
+                               seg, bus, slot, func);
                         break;
                     }
                     hi = pci_conf_read32(bus, slot, func, idx + 4);
@@ -309,9 +333,10 @@ int pci_add_device(u8 bus, u8 devfn, con
             }
         }
         else
-            printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual"
-                                  " functions already enabled (%04x)\n",
-                   bus, slot, func, ctrl);
+            printk(XENLOG_WARNING
+                   "SR-IOV device %04x:%02x:%02x.%u has its virtual"
+                   " functions already enabled (%04x)\n",
+                   seg, bus, slot, func, ctrl);
     }
 
     ret = 0;
@@ -331,14 +356,14 @@ int pci_add_device(u8 bus, u8 devfn, con
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
-           bus, slot, func);
+    printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+           seg, bus, slot, func);
     return ret;
 }
 
-int pci_remove_device(u8 bus, u8 devfn)
+int pci_remove_device(u16 seg, u8 bus, u8 devfn)
 {
-    struct pci_seg *pseg = get_pseg(0);
+    struct pci_seg *pseg = get_pseg(seg);
     struct pci_dev *pdev;
     int ret = -ENODEV;
 
@@ -354,8 +379,8 @@ int pci_remove_device(u8 bus, u8 devfn)
                 list_del(&pdev->domain_list);
             pci_cleanup_msi(pdev);
             free_pdev(pdev);
-            printk(XENLOG_DEBUG "PCI remove device %02x:%02x.%x\n", bus,
-                   PCI_SLOT(devfn), PCI_FUNC(devfn));
+            printk(XENLOG_DEBUG "PCI remove device %04x:%02x:%02x.%u\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
             break;
         }
 
@@ -413,7 +438,7 @@ void pci_release_devices(struct domain *
 
     spin_lock(&pcidevs_lock);
     pci_clean_dpci_irqs(d);
-    while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) )
+    while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
     {
         pci_cleanup_msi(pdev);
         bus = pdev->bus; devfn = pdev->devfn;
--- 2011-08-25.orig/xen/drivers/passthrough/vtd/iommu.c 2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/drivers/passthrough/vtd/iommu.c      2011-08-25 
15:06:35.000000000 +0200
@@ -259,7 +259,7 @@ static u64 addr_to_dma_page_maddr(struct
          * just get any passthrough device in the domainr - assume user
          * assigns only devices from same node to a given guest.
          */
-        pdev = pci_get_pdev_by_domain(domain, -1, -1);
+        pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
         drhd = acpi_find_matched_drhd_unit(pdev);
         if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) )
             goto out;
@@ -276,7 +276,7 @@ static u64 addr_to_dma_page_maddr(struct
             if ( !alloc )
                 break;
 
-            pdev = pci_get_pdev_by_domain(domain, -1, -1);
+            pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
             drhd = acpi_find_matched_drhd_unit(pdev);
             maddr = alloc_pgtable_maddr(drhd, 1);
             if ( !maddr )
@@ -1250,7 +1250,7 @@ int domain_context_mapping_one(
     {
         int res = 0;
 
-        pdev = pci_get_pdev(bus, devfn);
+        pdev = pci_get_pdev(0, bus, devfn);
         if (!pdev)
             res = -ENODEV;
         else if (pdev->domain != domain)
@@ -1343,7 +1343,7 @@ static int domain_context_mapping(struct
     int ret = 0;
     u32 type;
     u8 secbus;
-    struct pci_dev *pdev = pci_get_pdev(bus, devfn);
+    struct pci_dev *pdev = pci_get_pdev(0, bus, devfn);
 
     drhd = acpi_find_matched_drhd_unit(pdev);
     if ( !drhd )
@@ -1468,7 +1468,7 @@ static int domain_context_unmap(struct d
     int ret = 0;
     u32 type;
     u8 tmp_bus, tmp_devfn, secbus;
-    struct pci_dev *pdev = pci_get_pdev(bus, devfn);
+    struct pci_dev *pdev = pci_get_pdev(0, bus, devfn);
     int found = 0;
 
     BUG_ON(!pdev);
@@ -1579,7 +1579,7 @@ static int reassign_device_ownership(
     int ret;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev_by_domain(source, bus, devfn);
+    pdev = pci_get_pdev_by_domain(source, 0, bus, devfn);
 
     if (!pdev)
         return -ENODEV;
@@ -1888,7 +1888,7 @@ static void __init setup_dom0_devices(st
     {
         for ( devfn = 0; devfn < 256; devfn++ )
         {
-            pdev = pci_get_pdev(bus, devfn);
+            pdev = pci_get_pdev(0, bus, devfn);
             if ( !pdev )
                 continue;
 
@@ -2122,7 +2122,7 @@ int device_assigned(u8 bus, u8 devfn)
     struct pci_dev *pdev;
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev_by_domain(dom0, bus, devfn);
+    pdev = pci_get_pdev_by_domain(dom0, 0, bus, devfn);
     if (!pdev)
     {
         spin_unlock(&pcidevs_lock);
@@ -2144,7 +2144,7 @@ static int intel_iommu_assign_device(str
         return -ENODEV;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev(bus, devfn);
+    pdev = pci_get_pdev(0, bus, devfn);
     if (!pdev)
         return -ENODEV;
 
--- 2011-08-25.orig/xen/drivers/passthrough/vtd/quirks.c        2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/drivers/passthrough/vtd/quirks.c     2011-08-25 
15:06:35.000000000 +0200
@@ -286,7 +286,7 @@ static void map_me_phantom_function(stru
     struct pci_dev *pdev;
 
     /* find ME VT-d engine base on a real ME device */
-    pdev = pci_get_pdev(0, PCI_DEVFN(dev, 0));
+    pdev = pci_get_pdev(0, 0, PCI_DEVFN(dev, 0));
     drhd = acpi_find_matched_drhd_unit(pdev);
 
     /* map or unmap ME phantom function */
--- 2011-08-25.orig/xen/drivers/passthrough/vtd/x86/ats.c       2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/drivers/passthrough/vtd/x86/ats.c    2011-08-25 
15:06:35.000000000 +0200
@@ -37,6 +37,7 @@ static LIST_HEAD(ats_dev_drhd_units);
 
 struct pci_ats_dev {
     struct list_head list;
+    u16 seg;
     u8 bus;
     u8 devfn;
     u16 ats_queue_depth;    /* ATS device invalidation queue depth */
@@ -91,7 +92,7 @@ int ats_device(int seg, int bus, int dev
     if ( !ats_enabled || !iommu_qinval )
         return 0;
 
-    pdev = pci_get_pdev(bus, devfn);
+    pdev = pci_get_pdev(seg, bus, devfn);
     if ( !pdev )
         return 0;
 
@@ -130,8 +131,9 @@ int enable_ats_device(int seg, int bus, 
     BUG_ON(!pos);
 
     if ( iommu_verbose )
-        dprintk(XENLOG_INFO VTDPREFIX, "%x:%x.%x: ATS capability found\n",
-                bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        dprintk(XENLOG_INFO VTDPREFIX,
+                "%04x:%02x:%02x.%u: ATS capability found\n",
+                seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
     /* BUGBUG: add back seg when multi-seg platform support is enabled */
     value = pci_conf_read16(bus, PCI_SLOT(devfn),
@@ -140,7 +142,7 @@ int enable_ats_device(int seg, int bus, 
     {
         list_for_each_entry ( pdev, &ats_devices, list )
         {
-            if ( pdev->bus == bus && pdev->devfn == devfn )
+            if ( pdev->seg == seg && pdev->bus == bus && pdev->devfn == devfn )
             {
                 pos = 0;
                 break;
@@ -161,6 +163,7 @@ int enable_ats_device(int seg, int bus, 
 
     if ( pos )
     {
+        pdev->seg = seg;
         pdev->bus = bus;
         pdev->devfn = devfn;
         value = pci_conf_read16(bus, PCI_SLOT(devfn),
@@ -170,8 +173,10 @@ int enable_ats_device(int seg, int bus, 
     }
 
     if ( iommu_verbose )
-        dprintk(XENLOG_INFO VTDPREFIX, "%x:%x.%x: ATS %s enabled\n",
-                bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos ? "is" : "was");
+        dprintk(XENLOG_INFO VTDPREFIX,
+                "%04x:%02x:%02x.%u: ATS %s enabled\n",
+                seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                pos ? "is" : "was");
 
     return pos;
 }
@@ -194,7 +199,7 @@ void disable_ats_device(int seg, int bus
 
     list_for_each_entry ( pdev, &ats_devices, list )
     {
-        if ( pdev->bus == bus && pdev->devfn == devfn )
+        if ( pdev->seg == seg && pdev->bus == bus && pdev->devfn == devfn )
         {
             list_del(&pdev->list);
             xfree(pdev);
@@ -203,8 +208,9 @@ void disable_ats_device(int seg, int bus
     }
 
     if ( iommu_verbose )
-        dprintk(XENLOG_INFO VTDPREFIX, "%x:%x.%x: ATS is disabled\n",
-                bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        dprintk(XENLOG_INFO VTDPREFIX,
+                "%04x:%02x:%02x.%u: ATS is disabled\n",
+                seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 }
 
 
--- 2011-08-25.orig/xen/include/asm-x86/msi.h   2011-08-25 15:05:26.000000000 
+0200
+++ 2011-08-25/xen/include/asm-x86/msi.h        2011-08-25 15:06:35.000000000 
+0200
@@ -59,8 +59,9 @@
 #endif
 
 struct msi_info {
-    int bus;
-    int devfn;
+    u16 seg;
+    u8 bus;
+    u8 devfn;
     int irq;
     int entry_nr;
     uint64_t table_base;
--- 2011-08-25.orig/xen/include/public/physdev.h        2011-08-25 
15:32:13.000000000 +0200
+++ 2011-08-25/xen/include/public/physdev.h     2011-08-25 15:06:35.000000000 
+0200
@@ -142,6 +142,7 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 #define MAP_PIRQ_TYPE_MSI               0x0
 #define MAP_PIRQ_TYPE_GSI               0x1
 #define MAP_PIRQ_TYPE_UNKNOWN           0x2
+#define MAP_PIRQ_TYPE_MSI_SEG           0x3
 
 #define PHYSDEVOP_map_pirq               13
 struct physdev_map_pirq {
@@ -152,7 +153,7 @@ struct physdev_map_pirq {
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN */
+    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
     int bus;
     /* IN */
     int devfn;
@@ -268,6 +269,41 @@ struct physdev_pci_mmcfg_reserved {
 typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
 
+#define XEN_PCI_DEV_EXTFN              0x1
+#define XEN_PCI_DEV_VIRTFN             0x2
+#define XEN_PCI_DEV_PXM                0x4
+
+#define PHYSDEVOP_pci_device_add        25
+struct physdev_pci_device_add {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+    uint32_t flags;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    uint32_t optarr[];
+#elif defined(__GNUC__)
+    uint32_t optarr[0];
+#endif
+};
+typedef struct physdev_pci_device_add physdev_pci_device_add_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
+
+#define PHYSDEVOP_pci_device_remove     26
+#define PHYSDEVOP_restore_msi_ext       27
+struct physdev_pci_device {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+};
+typedef struct physdev_pci_device physdev_pci_device_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **
--- 2011-08-25.orig/xen/include/xen/pci.h       2011-08-25 15:06:23.000000000 
+0200
+++ 2011-08-25/xen/include/xen/pci.h    2011-08-25 15:06:35.000000000 +0200
@@ -56,6 +56,7 @@ struct pci_dev {
     spinlock_t msix_table_lock;
 
     struct domain *domain;
+    const u16 seg;
     const u8 bus;
     const u8 devfn;
     struct pci_dev_info info;
@@ -90,10 +91,11 @@ struct pci_dev *pci_lock_domain_pdev(str
 
 void pci_release_devices(struct domain *d);
 int pci_add_segment(u16 seg);
-int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *);
-int pci_remove_device(u8 bus, u8 devfn);
-struct pci_dev *pci_get_pdev(int bus, int devfn);
-struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
+int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *);
+int pci_remove_device(u16 seg, u8 bus, u8 devfn);
+struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_pdev_by_domain(
+    struct domain *, int seg, int bus, int devfn);
 
 void disconnect_pci_devices(void);
 
--- 2011-08-25.orig/xen/include/xlat.lst        2011-08-25 15:32:13.000000000 
+0200
+++ 2011-08-25/xen/include/xlat.lst     2011-08-25 15:06:35.000000000 +0200
@@ -65,6 +65,8 @@
 ?      physdev_irq_status_query        physdev.h
 ?      physdev_manage_pci              physdev.h
 ?      physdev_manage_pci_ext          physdev.h
+?      physdev_pci_device              physdev.h
+?      physdev_pci_device_add          physdev.h
 ?      physdev_pci_mmcfg_reserved      physdev.h
 ?      physdev_unmap_pirq              physdev.h
 ?      physdev_restore_msi             physdev.h


Attachment: pci-multi-seg-physdevop.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH, RFC 2/7] PCI multi-seg: add new physdevop-s, Jan Beulich <=