WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] Xen 3.4 code freeze

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Subject: Re: [Xen-devel] Xen 3.4 code freeze
From: "Zhao, Yu" <yu.zhao@xxxxxxxxx>
Date: Wed, 18 Mar 2009 17:22:35 +0800
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Wed, 18 Mar 2009 02:24:39 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <C5E65404.56B6%keir.fraser@xxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <C5E65404.56B6%keir.fraser@xxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.17 (Windows/20080914)
Hi Keir,

There are some native kernel SR-IOV patches that would be accepted by the maintainer soon, and I planed to backport them to Xen/Dom0 then. However, it looks they can't be in the native kernel tree before the code freeze. I attached the backported patches, if it's possible, can you please take them for 3.4?

Thanks,
Yu

Keir Fraser wrote:
Folks,

My plan is to take no further large feature patchsets into xen-unstable until 3.4 is 
branched, the only exception possibly being Dan’s tmem patches. Furthermore, after 
Friday I’m away for a week and when I get back I intend to shake the trees into better 
shape and accept bug-fix patches only.

 -- Keir



# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237366099 14400
# Node ID cdf29bb6c74d5e075099855e9ddffb27e633079a
# Parent  9fc957e63f8dc0fdb2400eb424da4c1122b7ac65
Xen: use proper device ID to search VT-d unit for ARI and SR-IOV device

PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended
Function -- a function whose function number is greater than 7 within an
ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended
Function is under the scope of the same remapping unit as the traditional
function. The hypervisor needs to know if a function is Extended Function
so it can find proper DMAR for it.

And section 8.3.3 specifies that the SR-IOV Virtual Function is under the
scope of the same remapping unit as the Physical Function. The hypervisor
also needs to know if a function is the Virtual Function and which Physical
Function it's associated with for same reason.

diff -r 9fc957e63f8d -r cdf29bb6c74d xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/arch/ia64/xen/hypercall.c     Wed Mar 18 04:48:19 2009 -0400
@@ -650,6 +650,7 @@
 
     case PHYSDEVOP_manage_pci_add: {
         struct physdev_manage_pci manage_pci;
+        struct pci_dev dev;
         ret = -EPERM;
         if ( !IS_PRIV(current->domain) )
             break;
@@ -657,7 +658,13 @@
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+        dev.bus = manage_pci.bus;
+        dev.devfn = manage_pci.devfn;
+        dev.is_extfn = manage_pci.is_extfn;
+        dev.is_virtfn = manage_pci.is_virtfn;
+        dev.physfn.bus = manage_pci.physfn.bus;
+        dev.physfn.devfn = manage_pci.physfn.devfn;
+        ret = pci_add_device(&dev);
             break;
     }
 
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/arch/x86/physdev.c    Wed Mar 18 04:48:19 2009 -0400
@@ -397,6 +397,7 @@
 
     case PHYSDEVOP_manage_pci_add: {
         struct physdev_manage_pci manage_pci;
+        struct pci_dev dev;
         ret = -EPERM;
         if ( !IS_PRIV(v->domain) )
             break;
@@ -404,7 +405,13 @@
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+        dev.bus = manage_pci.bus;
+        dev.devfn = manage_pci.devfn;
+        dev.is_extfn = manage_pci.is_extfn;
+        dev.is_virtfn = manage_pci.is_virtfn;
+        dev.physfn.bus = manage_pci.physfn.bus;
+        dev.physfn.devfn = manage_pci.physfn.devfn;
+        ret = pci_add_device(&dev);
         break;
     }
 
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/drivers/passthrough/pci.c     Wed Mar 18 04:48:19 2009 -0400
@@ -43,8 +43,8 @@
         return NULL;
     memset(pdev, 0, sizeof(struct pci_dev));
 
-    *((u8*) &pdev->bus) = bus;
-    *((u8*) &pdev->devfn) = devfn;
+    pdev->bus = bus;
+    pdev->devfn = devfn;
     pdev->domain = NULL;
     INIT_LIST_HEAD(&pdev->msi_list);
     list_add(&pdev->alldevs_list, &alldevs_list);
@@ -92,15 +92,20 @@
     return NULL;
 }
 
-int pci_add_device(u8 bus, u8 devfn)
+int pci_add_device(struct pci_dev *dev)
 {
     struct pci_dev *pdev;
     int ret = -ENOMEM;
 
     spin_lock(&pcidevs_lock);
-    pdev = alloc_pdev(bus, devfn);
+    pdev = alloc_pdev(dev->bus, dev->devfn);
     if ( !pdev )
         goto out;
+
+    pdev->is_extfn = dev->is_extfn;
+    pdev->is_virtfn = dev->is_virtfn;
+    pdev->physfn.bus = dev->physfn.bus;
+    pdev->physfn.devfn = dev->physfn.devfn;
 
     ret = 0;
     if ( !pdev->domain )
@@ -115,8 +120,8 @@
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
-           PCI_SLOT(devfn), PCI_FUNC(devfn));
+    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", dev->bus,
+           PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
     return ret;
 }
 
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/drivers/passthrough/vtd/dmar.c        Wed Mar 18 04:48:19 2009 -0400
@@ -157,6 +157,17 @@
     struct acpi_drhd_unit *drhd;
     struct acpi_drhd_unit *found = NULL, *include_all = NULL;
     int i;
+    struct pci_dev *dev;
+
+    dev = pci_get_pdev(bus, devfn);
+    BUG_ON(!dev);
+
+    if (dev->is_extfn) {
+        devfn = 0;
+    } else if (dev->is_virtfn) {
+        bus = dev->physfn.bus;
+        devfn = PCI_SLOT(dev->physfn.devfn) ? 0 : dev->physfn.devfn;
+    }
 
     list_for_each_entry ( drhd, &acpi_drhd_units, list )
     {
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/include/public/physdev.h      Wed Mar 18 04:48:19 2009 -0400
@@ -178,6 +178,12 @@
     /* IN */
     uint8_t bus;
     uint8_t devfn;
+    unsigned is_extfn:1;
+    unsigned is_virtfn:1;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
 }; 
 
 typedef struct physdev_manage_pci physdev_manage_pci_t;
diff -r 9fc957e63f8d -r cdf29bb6c74d xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Tue Mar 17 15:40:25 2009 +0000
+++ b/xen/include/xen/pci.h     Wed Mar 18 04:48:19 2009 -0400
@@ -41,8 +41,14 @@
     spinlock_t msix_table_lock;
 
     struct domain *domain;
-    const u8 bus;
-    const u8 devfn;
+    u8 bus;
+    u8 devfn;
+    unsigned is_extfn:1;
+    unsigned is_virtfn:1;
+    struct {
+        u8 bus;
+        u8 devfn;
+    } physfn;
 };
 
 #define for_each_pdev(domain, pdev) \
@@ -62,7 +68,7 @@
 struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
 
 void pci_release_devices(struct domain *d);
-int pci_add_device(u8 bus, u8 devfn);
+int pci_add_device(struct pci_dev *dev);
 int pci_remove_device(u8 bus, u8 devfn);
 struct pci_dev *pci_get_pdev(int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237268585 14400
# Node ID 92730fa710446b2502809faa72bb29fda95ba878
# Parent  e8a9f8910a3f113759906e493eaa211e2c43cd85
PCI: initialize and release SR-IOV capability

If a device has the SR-IOV capability, initialize it (set the ARI
Capable Hierarchy in the lowest numbered PF if necessary; calculate
the System Page Size for the VF MMIO, probe the VF Offset, Stride
and BARs). A lock for the VF bus allocation is also initialized if
a PF is the lowest numbered PF.

Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>

diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/Kconfig
--- a/drivers/pci/Kconfig       Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/Kconfig       Tue Mar 17 01:43:05 2009 -0400
@@ -37,3 +37,12 @@
        help
          Say Y here if you want to reserve PCI device for passthrough.
 
+config PCI_IOV
+       bool "PCI IOV support"
+       depends on PCI
+       help
+         PCI-SIG I/O Virtualization (IOV) Specifications support.
+         Single Root IOV: allows the creation of virtual PCI devices
+         that share the physical resources from a real device.
+
+         When in doubt, say N.
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/Makefile
--- a/drivers/pci/Makefile      Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/Makefile      Tue Mar 17 01:43:05 2009 -0400
@@ -15,6 +15,8 @@
 
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
+
+obj-$(CONFIG_PCI_IOV) += iov.o
 
 #
 # Some architectures use the generic PCI setup functions
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/iov.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/pci/iov.c Tue Mar 17 01:43:05 2009 -0400
@@ -0,0 +1,175 @@
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@xxxxxxxxx>
+ *
+ * PCI Express I/O Virtualization (IOV) support.
+ *   Single Root IOV 1.0
+ */
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+       int i;
+       int rc;
+       int nres;
+       u32 pgsz;
+       u16 ctrl, total, offset, stride;
+       struct pci_sriov *iov;
+       struct resource *res;
+       struct pci_dev *pdev;
+
+       pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+       if (ctrl & PCI_SRIOV_CTRL_VFE) {
+               pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+               ssleep(1);
+       }
+
+       pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+       if (!total)
+               return 0;
+
+       list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+               if (pdev->is_physfn)
+                       break;
+       if (list_empty(&dev->bus->devices) || !pdev->is_physfn)
+               pdev = NULL;
+
+       ctrl = 0;
+       if (!pdev && pci_ari_enabled(dev->bus))
+               ctrl |= PCI_SRIOV_CTRL_ARI;
+
+       pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+       pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
+       pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+       pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+       if (!offset || (total > 1 && !stride))
+               return -EIO;
+
+       pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+       i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+       pgsz &= ~((1 << i) - 1);
+       if (!pgsz)
+               return -EIO;
+
+       pgsz &= ~(pgsz - 1);
+       pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+       nres = 0;
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = dev->resource + PCI_IOV_RESOURCES + i;
+               i += __pci_read_base(dev, pci_bar_unknown, res,
+                                    pos + PCI_SRIOV_BAR + i * 4);
+               if (!res->flags)
+                       continue;
+               if ((res->end - res->start + 1) & (PAGE_SIZE - 1)) {
+                       rc = -EIO;
+                       goto failed;
+               }
+               res->end = res->start + (res->end - res->start + 1) * total - 1;
+               nres++;
+       }
+
+       iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+       if (!iov) {
+               rc = -ENOMEM;
+               goto failed;
+       }
+
+       iov->pos = pos;
+       iov->nres = nres;
+       iov->ctrl = ctrl;
+       iov->total = total;
+       iov->offset = offset;
+       iov->stride = stride;
+       iov->pgsz = pgsz;
+       iov->self = dev;
+       pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+       pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+       if (pdev)
+               iov->dev = pci_dev_get(pdev);
+       else {
+               iov->dev = dev;
+               mutex_init(&iov->lock);
+       }
+
+       dev->sriov = iov;
+       dev->is_physfn = 1;
+
+       return 0;
+
+failed:
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = dev->resource + PCI_IOV_RESOURCES + i;
+               res->flags = 0;
+       }
+
+       return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+       if (dev == dev->sriov->dev)
+               mutex_destroy(&dev->sriov->lock);
+       else
+               pci_dev_put(dev->sriov->dev);
+
+       kfree(dev->sriov);
+       dev->sriov = NULL;
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+       int pos;
+
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+       if (pos)
+               return sriov_init(dev, pos);
+
+       return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+       if (dev->is_physfn)
+               sriov_release(dev);
+}
+
+/**
+ * pci_iov_resource_bar - get position of the SR-IOV BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ * @type: the BAR type to be filled in
+ *
+ * Returns position of the BAR encapsulated in the SR-IOV capability.
+ */
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+                        enum pci_bar_type *type)
+{
+       if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
+               return 0;
+
+       BUG_ON(!dev->is_physfn);
+
+       *type = pci_bar_unknown;
+
+       return dev->sriov->pos + PCI_SRIOV_BAR +
+               4 * (resno - PCI_IOV_RESOURCES);
+}
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/pci.c
--- a/drivers/pci/pci.c Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/pci.c Tue Mar 17 01:43:05 2009 -0400
@@ -1048,12 +1048,19 @@
  */
 int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 {
+       int reg;
+
        if (resno < PCI_ROM_RESOURCE) {
                *type = pci_bar_unknown;
                return PCI_BASE_ADDRESS_0 + 4 * resno;
        } else if (resno == PCI_ROM_RESOURCE) {
                *type = pci_bar_mem32;
                return dev->rom_base_reg;
+       } else if (resno < PCI_BRIDGE_RESOURCES) {
+               /* device specific resource */
+               reg = pci_iov_resource_bar(dev, resno, type);
+               if (reg)
+                       return reg;
        }
 
        dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/pci.h
--- a/drivers/pci/pci.h Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/pci.h Tue Mar 17 01:43:05 2009 -0400
@@ -132,3 +132,40 @@
 {
        return bus->self && bus->self->ari_enabled;
 }
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+       int pos;                /* capability position */
+       int nres;               /* number of resources */
+       u32 cap;                /* SR-IOV Capabilities */
+       u16 ctrl;               /* SR-IOV Control */
+       u16 total;              /* total VFs associated with the PF */
+       u16 offset;             /* first VF Routing ID offset */
+       u16 stride;             /* following VF stride */
+       u32 pgsz;               /* page size for BAR alignment */
+       u8 link;                /* Function Dependency Link */
+       struct pci_dev *dev;    /* lowest numbered PF */
+       struct pci_dev *self;   /* this PF */
+       struct mutex lock;      /* lock for VF bus */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+                               enum pci_bar_type *type);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+       return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+
+{
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+                                      enum pci_bar_type *type)
+{
+       return 0;
+}
+#endif /* CONFIG_PCI_IOV */
diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/probe.c
--- a/drivers/pci/probe.c       Fri Mar 13 10:08:22 2009 +0000
+++ b/drivers/pci/probe.c       Tue Mar 17 01:43:05 2009 -0400
@@ -765,6 +765,9 @@
        struct pci_dev *pci_dev;
 
        pci_dev = to_pci_dev(dev);
+
+       pci_iov_release(pci_dev);
+
        kfree(pci_dev);
 }
 
@@ -891,6 +894,9 @@
 
        /* Alternative Routing-ID Forwarding */
        pci_enable_ari(dev);
+
+       /* Single Root I/O Virtualization */
+       pci_iov_init(dev);
 
        /*
         * Add the device to our list of discovered devices
diff -r e8a9f8910a3f -r 92730fa71044 include/linux/pci.h
--- a/include/linux/pci.h       Fri Mar 13 10:08:22 2009 +0000
+++ b/include/linux/pci.h       Tue Mar 17 01:43:05 2009 -0400
@@ -77,6 +77,12 @@
        /* #6: expansion ROM resource */
        PCI_ROM_RESOURCE,
 
+       /* device specific resources */
+#ifdef CONFIG_PCI_IOV
+       PCI_IOV_RESOURCES,
+       PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
+#endif
+
        /* resources assigned to buses behind the bridge */
 #define PCI_BRIDGE_RESOURCE_NUM 4
 
@@ -127,6 +133,8 @@
        char cap_nr;
        u32 data[0];
 };
+
+struct pci_sriov;
 
 /*
  * The pci_dev structure is used to describe PCI devices.
@@ -189,13 +197,17 @@
        unsigned int    broken_parity_status:1; /* Device generates false 
positive parity */
        unsigned int    msi_enabled:1;
        unsigned int    msix_enabled:1;
+       unsigned int    ari_enabled:1;  /* ARI forwarding */
+       unsigned int    is_physfn:1;
 
        u32             saved_config_space[16]; /* config space saved at 
suspend time */
        struct hlist_head saved_cap_space;
        struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM 
entry */
        int rom_attr_enabled;           /* has display of the rom attribute 
been enabled? */
        struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file 
for resources */
-       unsigned int    ari_enabled:1;  /* ARI forwarding */
+#ifdef CONFIG_PCI_IOV
+       struct pci_sriov *sriov;        /* SR-IOV capability related */
+#endif
 };
 
 #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
diff -r e8a9f8910a3f -r 92730fa71044 include/linux/pci_regs.h
--- a/include/linux/pci_regs.h  Fri Mar 13 10:08:22 2009 +0000
+++ b/include/linux/pci_regs.h  Tue Mar 17 01:43:05 2009 -0400
@@ -332,6 +332,7 @@
 #define  PCI_EXP_TYPE_UPSTREAM 0x5     /* Upstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6   /* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7   /* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_RC_END   0x9     /* Root Complex Integrated Endpoint */
 #define PCI_EXP_FLAGS_SLOT     0x0100  /* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ      0x3e00  /* Interrupt message number */
 #define PCI_EXP_DEVCAP         4       /* Device capabilities */
@@ -393,6 +394,7 @@
 #define PCI_EXT_CAP_ID_DSN     3
 #define PCI_EXT_CAP_ID_PWR     4
 #define PCI_EXT_CAP_ID_ARI     14
+#define PCI_EXT_CAP_ID_SRIOV   16
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
@@ -478,4 +480,35 @@
 #define  PCI_ARI_CTRL_ACS      0x0002  /* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_FG(x)    (((x) >> 4) & 7) /* Function Group */
 
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP          0x04    /* SR-IOV Capabilities */
+#define  PCI_SRIOV_CAP_VFM     0x01    /* VF Migration Capable */
+#define  PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL         0x08    /* SR-IOV Control */
+#define  PCI_SRIOV_CTRL_VFE    0x01    /* VF Enable */
+#define  PCI_SRIOV_CTRL_VFM    0x02    /* VF Migration Enable */
+#define  PCI_SRIOV_CTRL_INTR   0x04    /* VF Migration Interrupt Enable */
+#define  PCI_SRIOV_CTRL_MSE    0x08    /* VF Memory Space Enable */
+#define  PCI_SRIOV_CTRL_ARI    0x10    /* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS       0x0a    /* SR-IOV Status */
+#define  PCI_SRIOV_STATUS_VFM  0x01    /* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF   0x0c    /* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF     0x0e    /* Total VFs */
+#define PCI_SRIOV_NUM_VF       0x10    /* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK    0x12    /* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET    0x14    /* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE    0x16    /* Following VF Stride */
+#define PCI_SRIOV_VF_DID       0x1a    /* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE   0x1c    /* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE   0x20    /* System Page Size */
+#define PCI_SRIOV_BAR          0x24    /* VF BAR0 */
+#define  PCI_SRIOV_NUM_BARS    6       /* Number of VF BARs */
+#define PCI_SRIOV_VFM          0x3c    /* VF Migration State Array Offset*/
+#define  PCI_SRIOV_VFM_BIR(x)  ((x) & 7)       /* State BIR */
+#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)    /* State Offset */
+#define  PCI_SRIOV_VFM_UA      0x0     /* Inactive.Unavailable */
+#define  PCI_SRIOV_VFM_MI      0x1     /* Dormant.MigrateIn */
+#define  PCI_SRIOV_VFM_MO      0x2     /* Active.MigrateOut */
+#define  PCI_SRIOV_VFM_AV      0x3     /* Active.Available */
+
 #endif /* LINUX_PCI_REGS_H */
# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237268742 14400
# Node ID 2629935bf356bb7118f8691a46e90daed77c3b48
# Parent  92730fa710446b2502809faa72bb29fda95ba878
PCI: restore saved SR-IOV state

Restore the volatile registers in the SR-IOV capability after the
D3->D0 transition.

Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>

diff -r 92730fa71044 -r 2629935bf356 drivers/pci/iov.c
--- a/drivers/pci/iov.c Tue Mar 17 01:43:05 2009 -0400
+++ b/drivers/pci/iov.c Tue Mar 17 01:45:42 2009 -0400
@@ -125,6 +125,25 @@
        dev->sriov = NULL;
 }
 
+static void sriov_restore_state(struct pci_dev *dev)
+{
+       int i;
+       u16 ctrl;
+       struct pci_sriov *iov = dev->sriov;
+
+       pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+       if (ctrl & PCI_SRIOV_CTRL_VFE)
+               return;
+
+       for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
+               pci_update_resource(dev, i);
+
+       pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+       pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+       if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+               msleep(100);
+}
+
 /**
  * pci_iov_init - initialize the IOV capability
  * @dev: the PCI device
@@ -173,3 +192,13 @@
        return dev->sriov->pos + PCI_SRIOV_BAR +
                4 * (resno - PCI_IOV_RESOURCES);
 }
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+       if (dev->is_physfn)
+               sriov_restore_state(dev);
+}
diff -r 92730fa71044 -r 2629935bf356 drivers/pci/pci.c
--- a/drivers/pci/pci.c Tue Mar 17 01:43:05 2009 -0400
+++ b/drivers/pci/pci.c Tue Mar 17 01:45:42 2009 -0400
@@ -562,6 +562,8 @@
        pci_restore_pcix_state(dev);
        pci_restore_msi_state(dev);
        pci_restore_msix_state(dev);
+       pci_restore_iov_state(dev);
+
        return 0;
 }
 
diff -r 92730fa71044 -r 2629935bf356 drivers/pci/pci.h
--- a/drivers/pci/pci.h Tue Mar 17 01:43:05 2009 -0400
+++ b/drivers/pci/pci.h Tue Mar 17 01:45:42 2009 -0400
@@ -154,6 +154,7 @@
 extern void pci_iov_release(struct pci_dev *dev);
 extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
                                enum pci_bar_type *type);
+extern void pci_restore_iov_state(struct pci_dev *dev);
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -168,4 +169,7 @@
 {
        return 0;
 }
+static inline void pci_restore_iov_state(struct pci_dev *dev)
+{
+}
 #endif /* CONFIG_PCI_IOV */
# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237268873 14400
# Node ID 6b776c705e444562dda66dc0b33fd80eaceb1bfb
# Parent  2629935bf356bb7118f8691a46e90daed77c3b48
PCI: reserve bus range for SR-IOV device

Reserve the bus number range used by the Virtual Function when
pcibios_assign_all_busses() returns true.

Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>

diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/iov.c
--- a/drivers/pci/iov.c Tue Mar 17 01:45:42 2009 -0400
+++ b/drivers/pci/iov.c Tue Mar 17 01:47:53 2009 -0400
@@ -13,6 +13,18 @@
 #include <linux/delay.h>
 #include "pci.h"
 
+
+static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+{
+       return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+                                   dev->sriov->stride * id) >> 8);
+}
+
+static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+{
+       return (dev->devfn + dev->sriov->offset +
+               dev->sriov->stride * id) & 0xff;
+}
 
 static int sriov_init(struct pci_dev *dev, int pos)
 {
@@ -202,3 +214,27 @@
        if (dev->is_physfn)
                sriov_restore_state(dev);
 }
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+       int max = 0;
+       u8 busnr;
+       struct pci_dev *dev;
+
+       list_for_each_entry(dev, &bus->devices, bus_list) {
+               if (!dev->is_physfn)
+                       continue;
+               busnr = virtfn_bus(dev, dev->sriov->total - 1);
+               if (busnr > max)
+                       max = busnr;
+       }
+
+       return max ? max - bus->number : 0;
+}
diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/pci.h
--- a/drivers/pci/pci.h Tue Mar 17 01:45:42 2009 -0400
+++ b/drivers/pci/pci.h Tue Mar 17 01:47:53 2009 -0400
@@ -155,6 +155,7 @@
 extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
                                enum pci_bar_type *type);
 extern void pci_restore_iov_state(struct pci_dev *dev);
+extern int pci_iov_bus_range(struct pci_bus *bus);
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -172,4 +173,8 @@
 static inline void pci_restore_iov_state(struct pci_dev *dev)
 {
 }
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+       return 0;
+}
 #endif /* CONFIG_PCI_IOV */
diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/probe.c
--- a/drivers/pci/probe.c       Tue Mar 17 01:45:42 2009 -0400
+++ b/drivers/pci/probe.c       Tue Mar 17 01:47:53 2009 -0400
@@ -976,6 +976,9 @@
        for (devfn = 0; devfn < 0x100; devfn += 8)
                pci_scan_slot(bus, devfn);
 
+       /* Reserve buses for SR-IOV capability. */
+       max += pci_iov_bus_range(bus);
+
        /*
         * After performing arch-dependent fixup of the bus, look behind
         * all PCI-to-PCI bridges on this bus.
# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237270918 14400
# Node ID 3a2d0f486f533f0ef21267b9a1682997e0caf463
# Parent  6b776c705e444562dda66dc0b33fd80eaceb1bfb
PCI: centralize device setup code

Move the device setup stuff into pci_setup_device() which will be used
to setup the Virtual Function later.

Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>

diff -r 6b776c705e44 -r 3a2d0f486f53 drivers/pci/pci.h
--- a/drivers/pci/pci.h Tue Mar 17 01:47:53 2009 -0400
+++ b/drivers/pci/pci.h Tue Mar 17 02:21:58 2009 -0400
@@ -117,6 +117,7 @@
        pci_bar_mem64,          /* A 64-bit memory BAR */
 };
 
+extern int pci_setup_device(struct pci_dev *dev);
 extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
                                struct resource *res, unsigned int reg);
 extern int pci_resource_bar(struct pci_dev *dev, int resno,
diff -r 6b776c705e44 -r 3a2d0f486f53 drivers/pci/probe.c
--- a/drivers/pci/probe.c       Tue Mar 17 01:47:53 2009 -0400
+++ b/drivers/pci/probe.c       Tue Mar 17 02:21:58 2009 -0400
@@ -683,13 +683,28 @@
  * Initialize the device structure with information about the device's 
  * vendor,class,memory and IO-space addresses,IRQ lines etc.
  * Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
  */
-static int pci_setup_device(struct pci_dev * dev)
+int pci_setup_device(struct pci_dev *dev)
 {
        u32 class;
+       u8 hdr_type;
 
+       if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+               return -EIO;
+
+       dev->sysdata = dev->bus->sysdata;
+       dev->dev.parent = dev->bus->bridge;
+       dev->dev.bus = &pci_bus_type;
+       dev->hdr_type = hdr_type & 0x7f;
+       dev->multifunction = !!(hdr_type & 0x80);
+       dev->cfg_size = pci_cfg_space_size(dev);
+       dev->error_state = pci_channel_io_normal;
+
+       /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+          set this higher, assuming the system even supports it.  */
+       dev->dma_mask = 0xffffffff;
        sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
                dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
 
@@ -706,7 +721,6 @@
 
        /* Early fixups, before probing the BARs */
        pci_fixup_device(pci_fixup_early, dev);
-       class = dev->class >> 8;
 
        switch (dev->hdr_type) {                    /* header type */
        case PCI_HEADER_TYPE_NORMAL:                /* standard header */
@@ -741,7 +755,7 @@
        default:                                    /* unknown header */
                printk(KERN_ERR "PCI: device %s has unknown header type %02x, 
ignoring.\n",
                        pci_name(dev), dev->hdr_type);
-               return -1;
+               return -EIO;
 
        bad:
                printk(KERN_ERR "PCI: %s: class %x doesn't match header type 
%02x. Ignoring class.\n",
@@ -823,7 +837,6 @@
 {
        struct pci_dev *dev;
        u32 l;
-       u8 hdr_type;
        int delay = 1;
 
        if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
@@ -850,29 +863,16 @@
                }
        }
 
-       if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type))
-               return NULL;
-
        dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
        if (!dev)
                return NULL;
 
        dev->bus = bus;
-       dev->sysdata = bus->sysdata;
-       dev->dev.parent = bus->bridge;
-       dev->dev.bus = &pci_bus_type;
        dev->devfn = devfn;
-       dev->hdr_type = hdr_type & 0x7f;
-       dev->multifunction = !!(hdr_type & 0x80);
        dev->vendor = l & 0xffff;
        dev->device = (l >> 16) & 0xffff;
-       dev->cfg_size = pci_cfg_space_size(dev);
-       dev->error_state = pci_channel_io_normal;
 
-       /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
-          set this higher, assuming the system even supports it.  */
-       dev->dma_mask = 0xffffffff;
-       if (pci_setup_device(dev) < 0) {
+       if (pci_setup_device(dev)) {
                kfree(dev);
                return NULL;
        }
# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237270982 14400
# Node ID 577169901110eb89ff36f1460e152a5c96297bde
# Parent  3a2d0f486f533f0ef21267b9a1682997e0caf463
PCI: add SR-IOV API for Physical Function driver

Add or remove the Virtual Function when the SR-IOV is enabled or
disabled by the device driver. This can happen anytime rather than
only at the device probe stage.

Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>

diff -r 3a2d0f486f53 -r 577169901110 drivers/pci/iov.c
--- a/drivers/pci/iov.c Tue Mar 17 02:21:58 2009 -0400
+++ b/drivers/pci/iov.c Tue Mar 17 02:23:02 2009 -0400
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include "pci.h"
 
+#define VIRTFN_ID_LEN  16
 
 static inline u8 virtfn_bus(struct pci_dev *dev, int id)
 {
@@ -24,6 +25,267 @@
 {
        return (dev->devfn + dev->sriov->offset +
                dev->sriov->stride * id) & 0xff;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+       struct pci_bus *child;
+
+       if (bus->number == busnr)
+               return bus;
+
+       child = pci_find_bus(pci_domain_nr(bus), busnr);
+       if (child)
+               return child;
+
+       child = pci_add_new_bus(bus, NULL, busnr);
+       if (!child)
+               return NULL;
+
+       child->subordinate = busnr;
+
+       return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
+{
+       struct pci_bus *child;
+
+       if (bus->number == busnr)
+               return;
+
+       child = pci_find_bus(pci_domain_nr(bus), busnr);
+       BUG_ON(!child);
+
+       if (list_empty(&child->devices))
+               pci_remove_bus(child);
+}
+
+static int virtfn_add(struct pci_dev *dev, int id)
+{
+       int i;
+       int rc;
+       u64 size;
+       char buf[VIRTFN_ID_LEN];
+       struct pci_dev *virtfn;
+       struct resource *res;
+       struct pci_sriov *iov = dev->sriov;
+
+       virtfn = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
+       if (!virtfn)
+               return -ENOMEM;
+
+       mutex_lock(&iov->dev->sriov->lock);
+       virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+       if (!virtfn->bus) {
+               kfree(virtfn);
+               mutex_unlock(&iov->dev->sriov->lock);
+               return -ENOMEM;
+       }
+       virtfn->devfn = virtfn_devfn(dev, id);
+       virtfn->vendor = dev->vendor;
+       pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
+       pci_setup_device(virtfn);
+       virtfn->dev.parent = dev->dev.parent;
+
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = dev->resource + PCI_IOV_RESOURCES + i;
+               if (!res->parent)
+                       continue;
+               virtfn->resource[i].name = pci_name(virtfn);
+               virtfn->resource[i].flags = res->flags;
+               size = res->end - res->start + 1;
+               do_div(size, iov->total);
+               virtfn->resource[i].start = res->start + size * id;
+               virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+               rc = request_resource(res, &virtfn->resource[i]);
+               BUG_ON(rc);
+       }
+
+       pci_device_add(virtfn, virtfn->bus);
+       mutex_unlock(&iov->dev->sriov->lock);
+
+       virtfn->physfn = pci_dev_get(dev);
+       virtfn->is_virtfn = 1;
+
+       pci_bus_add_device(virtfn);
+       sprintf(buf, "virtfn%u", id);
+       rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+       if (rc)
+               goto failed1;
+       rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+       if (rc)
+               goto failed2;
+
+       kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+       return 0;
+
+failed2:
+       sysfs_remove_link(&dev->dev.kobj, buf);
+failed1:
+       pci_dev_put(dev);
+       mutex_lock(&iov->dev->sriov->lock);
+       pci_remove_bus_device(virtfn);
+       virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+       mutex_unlock(&iov->dev->sriov->lock);
+
+       return rc;
+}
+
+static void virtfn_remove(struct pci_dev *dev, int id)
+{
+       char buf[VIRTFN_ID_LEN];
+       struct pci_bus *bus;
+       struct pci_dev *virtfn;
+       struct pci_sriov *iov = dev->sriov;
+
+       bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
+       if (!bus)
+               return;
+
+       virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
+       if (!virtfn)
+               return;
+
+       pci_dev_put(virtfn);
+
+       sprintf(buf, "virtfn%u", id);
+       sysfs_remove_link(&dev->dev.kobj, buf);
+       sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+       mutex_lock(&iov->dev->sriov->lock);
+       pci_remove_bus_device(virtfn);
+       virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+       mutex_unlock(&iov->dev->sriov->lock);
+
+       pci_dev_put(dev);
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+       int rc;
+       int i, j;
+       int nres;
+       u16 offset, stride, initial;
+       struct resource *res;
+       struct pci_dev *pdev;
+       struct pci_sriov *iov = dev->sriov;
+
+       if (!nr_virtfn)
+               return 0;
+
+       if (iov->nr_virtfn)
+               return -EINVAL;
+
+       pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+       if (initial > iov->total ||
+           (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
+               return -EIO;
+
+       if (nr_virtfn < 0 || nr_virtfn > iov->total ||
+           (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+               return -EINVAL;
+
+       pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+       pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
+       pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
+       if (!offset || (nr_virtfn > 1 && !stride))
+               return -EIO;
+
+       nres = 0;
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = dev->resource + PCI_IOV_RESOURCES + i;
+               if (res->parent)
+                       nres++;
+       }
+       if (nres != iov->nres) {
+               dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
+               return -ENOMEM;
+       }
+
+       iov->offset = offset;
+       iov->stride = stride;
+
+       if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
+               dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+               return -ENOMEM;
+       }
+
+       if (iov->link != dev->devfn) {
+               pdev = pci_get_slot(dev->bus, iov->link);
+               if (!pdev)
+                       return -ENODEV;
+
+               pci_dev_put(pdev);
+
+               if (!pdev->is_physfn)
+                       return -ENODEV;
+
+               rc = sysfs_create_link(&dev->dev.kobj,
+                                       &pdev->dev.kobj, "dep_link");
+               if (rc)
+                       return rc;
+       }
+
+       iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+       pci_block_user_cfg_access(dev);
+       pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+       msleep(100);
+       pci_unblock_user_cfg_access(dev);
+
+       iov->initial = initial;
+       if (nr_virtfn < initial)
+               initial = nr_virtfn;
+
+       for (i = 0; i < initial; i++) {
+               rc = virtfn_add(dev, i);
+               if (rc)
+                       goto failed;
+       }
+
+       kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+       iov->nr_virtfn = nr_virtfn;
+
+       return 0;
+
+failed:
+       for (j = 0; j < i; j++)
+               virtfn_remove(dev, j);
+
+       iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+       pci_block_user_cfg_access(dev);
+       pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+       ssleep(1);
+       pci_unblock_user_cfg_access(dev);
+
+       if (iov->link != dev->devfn)
+               sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+       return rc;
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+       int i;
+       struct pci_sriov *iov = dev->sriov;
+
+       if (!iov->nr_virtfn)
+               return;
+
+       for (i = 0; i < iov->nr_virtfn; i++)
+               virtfn_remove(dev, i);
+
+       iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+       pci_block_user_cfg_access(dev);
+       pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+       ssleep(1);
+       pci_unblock_user_cfg_access(dev);
+
+       if (iov->link != dev->devfn)
+               sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+       iov->nr_virtfn = 0;
 }
 
 static int sriov_init(struct pci_dev *dev, int pos)
@@ -128,6 +390,8 @@
 
 static void sriov_release(struct pci_dev *dev)
 {
+       BUG_ON(dev->sriov->nr_virtfn);
+
        if (dev == dev->sriov->dev)
                mutex_destroy(&dev->sriov->lock);
        else
@@ -151,6 +415,7 @@
                pci_update_resource(dev, i);
 
        pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+       pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
        if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
                msleep(100);
@@ -238,3 +503,35 @@
 
        return max ? max - bus->number : 0;
 }
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+       might_sleep();
+
+       if (!dev->is_physfn)
+               return -ENODEV;
+
+       return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+       might_sleep();
+
+       if (!dev->is_physfn)
+               return;
+
+       sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
diff -r 3a2d0f486f53 -r 577169901110 drivers/pci/pci.h
--- a/drivers/pci/pci.h Tue Mar 17 02:21:58 2009 -0400
+++ b/drivers/pci/pci.h Tue Mar 17 02:23:02 2009 -0400
@@ -141,6 +141,8 @@
        u32 cap;                /* SR-IOV Capabilities */
        u16 ctrl;               /* SR-IOV Control */
        u16 total;              /* total VFs associated with the PF */
+       u16 initial;            /* initial VFs associated with the PF */
+       u16 nr_virtfn;          /* number of VFs available */
        u16 offset;             /* first VF Routing ID offset */
        u16 stride;             /* following VF stride */
        u32 pgsz;               /* page size for BAR alignment */
diff -r 3a2d0f486f53 -r 577169901110 include/linux/pci.h
--- a/include/linux/pci.h       Tue Mar 17 02:21:58 2009 -0400
+++ b/include/linux/pci.h       Tue Mar 17 02:23:02 2009 -0400
@@ -199,6 +199,7 @@
        unsigned int    msix_enabled:1;
        unsigned int    ari_enabled:1;  /* ARI forwarding */
        unsigned int    is_physfn:1;
+       unsigned int    is_virtfn:1;
 
        u32             saved_config_space[16]; /* config space saved at 
suspend time */
        struct hlist_head saved_cap_space;
@@ -206,7 +207,10 @@
        int rom_attr_enabled;           /* has display of the rom attribute 
been enabled? */
        struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file 
for resources */
 #ifdef CONFIG_PCI_IOV
-       struct pci_sriov *sriov;        /* SR-IOV capability related */
+       union {
+               struct pci_sriov *sriov;        /* SR-IOV capability related */
+               struct pci_dev *physfn; /* the PF this VF is associated with */
+       };
 #endif
 };
 
@@ -829,5 +833,18 @@
 int pci_is_guestdev(struct pci_dev *dev);
 #endif /* CONFIG_PCI_GUESTDEV */
 
+#ifdef CONFIG_PCI_IOV
+extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+extern void pci_disable_sriov(struct pci_dev *dev);
+#else
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+       return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
# HG changeset patch
# User Yu Zhao <yu.zhao@xxxxxxxxx>
# Date 1237353055 14400
# Node ID 582ec8e86ffff64834e8c77ef6790774352ddc7a
# Parent  577169901110eb89ff36f1460e152a5c96297bde
PCI: pass ARI and SR-IOV device information to the hypervisor

PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended
Function -- a function whose function number is greater than 7 within an
ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended
Function is under the scope of the same remapping unit as the traditional
function. The hypervisor needs to know if a function is Extended Function
so it can find proper DMAR for it.

And section 8.3.3 specifies that the SR-IOV Virtual Function is under the
scope of the same remapping unit as the Physical Function. The hypervisor
also needs to know if a function is the Virtual Function and which Physical
Function it's associated with for same reason.

diff -r 577169901110 -r 582ec8e86fff drivers/xen/core/pci.c
--- a/drivers/xen/core/pci.c    Tue Mar 17 02:23:02 2009 -0400
+++ b/drivers/xen/core/pci.c    Wed Mar 18 01:10:55 2009 -0400
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <xen/interface/physdev.h>
+#include "../../pci/pci.h"
 
 static int (*pci_bus_probe)(struct device *dev);
 static int (*pci_bus_remove)(struct device *dev);
@@ -15,8 +16,16 @@
        int r;
        struct pci_dev *pci_dev = to_pci_dev(dev);
        struct physdev_manage_pci manage_pci;
+
+       memset(&manage_pci, 0, sizeof(manage_pci));
        manage_pci.bus = pci_dev->bus->number;
        manage_pci.devfn = pci_dev->devfn;
+       if (pci_dev->is_virtfn) {
+               manage_pci.is_virtfn = 1;
+               manage_pci.physfn.bus = pci_dev->physfn->bus->number;
+               manage_pci.physfn.devfn = pci_dev->physfn->devfn;
+       } else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+               manage_pci.is_extfn = 1;
 
        r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci);
        if (r && r != -ENOSYS)
diff -r 577169901110 -r 582ec8e86fff include/xen/interface/physdev.h
--- a/include/xen/interface/physdev.h   Tue Mar 17 02:23:02 2009 -0400
+++ b/include/xen/interface/physdev.h   Wed Mar 18 01:10:55 2009 -0400
@@ -178,6 +178,12 @@
     /* IN */
     uint8_t bus;
     uint8_t devfn;
+    unsigned is_extfn:1;
+    unsigned is_virtfn:1;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
 }; 
 
 typedef struct physdev_manage_pci physdev_manage_pci_t;
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>