WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 4/9] dom0 PCI: support SR-IOV capability

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 4/9] dom0 PCI: support SR-IOV capability
From: "Zhao, Yu" <yu.zhao@xxxxxxxxx>
Date: Sat, 27 Sep 2008 16:59:14 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Delivery-date: Sat, 27 Sep 2008 02:02:32 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Ackgf1C0FMmUCxjARzGIxU8qJxBB1w==
Thread-topic: [PATCH 4/9] dom0 PCI: support SR-IOV capability
Add Single Root I/O Virtualization (SR-IOV) support.

Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>

diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/Kconfig
--- a/drivers/pci/Kconfig       Sat Sep 27 01:25:31 2008 -0400
+++ b/drivers/pci/Kconfig       Sat Sep 27 01:27:01 2008 -0400
@@ -27,3 +27,14 @@

          When in doubt, say N.

+config PCI_IOV
+       bool "PCI SR-IOV support"
+       depends on PCI
+       select PCI_MSI
+       default n
+       help
+         This option allows device drivers to enable Single Root I/O
+         Virtualization. Each Virtual Function's PCI configuration
+         space can be accessed using its own Bus, Device and Function
+         Number (Routing ID). Each Virtual Function also has PCI Memory
+         Space, which is used to map its own register set.
diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/Makefile
--- a/drivers/pci/Makefile      Sat Sep 27 01:25:31 2008 -0400
+++ b/drivers/pci/Makefile      Sat Sep 27 01:27:01 2008 -0400
@@ -47,3 +47,5 @@
 ifeq ($(CONFIG_PCI_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
 endif
+
+obj-$(CONFIG_PCI_IOV) += iov.o
diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/iov.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/pci/iov.c Sat Sep 27 01:27:01 2008 -0400
@@ -0,0 +1,832 @@
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * PCI Express Single Root I/O Virtualization capability support.
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/page.h>
+#include "pci.h"
+
+#define VF_NAME_LEN    8
+
+
+struct iov_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct kobject *,
+                       struct iov_attr *, char *);
+       ssize_t (*store)(struct kobject *,
+                       struct iov_attr *, const char *, size_t);
+};
+
+#define iov_config_attr(field)                                         \
+static ssize_t field##_show(struct kobject *kobj,                      \
+               struct iov_attr *attr, char *buf)                       \
+{                                                                      \
+       struct pci_iov *iov = container_of(kobj, struct pci_iov, kobj); \
+                                                                       \
+       return sprintf(buf, "%d\n", iov->field);                        \
+}
+
+iov_config_attr(is_enabled);
+iov_config_attr(totalvfs);
+iov_config_attr(initialvfs);
+iov_config_attr(numvfs);
+
+struct vf_entry {
+       int vfn;
+       struct kobject kobj;
+       struct pci_iov *iov;
+       struct iov_attr *attr;
+       char name[VF_NAME_LEN];
+       char (*param)[PCI_IOV_PARAM_LEN];
+};
+
+static ssize_t iov_attr_show(struct kobject *kobj,
+               struct attribute *attr, char *buf)
+{
+       struct iov_attr *ia = container_of(attr, struct iov_attr, attr);
+
+       return ia->show ? ia->show(kobj, ia, buf) : -EIO;
+}
+
+static ssize_t iov_attr_store(struct kobject *kobj,
+               struct attribute *attr, const char *buf, size_t len)
+{
+       struct iov_attr *ia = container_of(attr, struct iov_attr, attr);
+
+       return ia->store ? ia->store(kobj, ia, buf, len) : -EIO;
+}
+
+static struct sysfs_ops iov_attr_ops = {
+       .show = iov_attr_show,
+       .store = iov_attr_store,
+};
+
+static struct kobj_type iov_ktype = {
+       .sysfs_ops = &iov_attr_ops,
+};
+
+static inline void vf_rid(struct pci_dev *dev, int vfn, u8 *busnr, u8 *devfn)
+{
+       u16 rid;
+
+       rid = (dev->bus->number << 8) + dev->devfn +
+               dev->iov->offset + dev->iov->stride * vfn;
+       *busnr = rid >> 8;
+       *devfn = rid & 0xff;
+}
+
+static int vf_add(struct pci_dev *dev, int vfn)
+{
+       int i;
+       int rc;
+       u8 busnr, devfn;
+       unsigned long size;
+       struct pci_dev *new;
+       struct pci_bus *bus;
+       struct resource *res;
+
+       vf_rid(dev, vfn, &busnr, &devfn);
+
+       new = kzalloc(sizeof(*new), GFP_KERNEL);
+       if (!new)
+               return -ENOMEM;
+
+       list_for_each_entry(bus, &dev->bus->children, node)
+               if (bus->number == busnr) {
+                       new->bus = bus;
+                       break;
+               }
+
+       BUG_ON(!new->bus);
+       new->sysdata = bus->sysdata;
+       new->dev.parent = dev->dev.parent;
+       new->dev.bus = dev->dev.bus;
+       new->devfn = devfn;
+       new->hdr_type = PCI_HEADER_TYPE_NORMAL;
+       new->multifunction = 0;
+       new->vendor = dev->vendor;
+       pci_read_config_word(dev, dev->iov->cap + PCI_IOV_VF_DID, &new->device);
+       new->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
+       new->error_state = pci_channel_io_normal;
+       new->dma_mask = 0xffffffff;
+
+       sprintf(pci_name(new), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+               busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+       new->class = dev->class;
+       new->current_state = PCI_UNKNOWN;
+       new->irq = 0;
+
+       for (i = 0; i < PCI_IOV_NUM_BAR; i++) {
+               res = dev->resource + PCI_IOV_RESOURCES + i;
+               if (!res->parent)
+                       continue;
+               new->resource[i].name = pci_name(new);
+               new->resource[i].flags = res->flags;
+               size = (res->end - res->start + 1) / dev->iov->totalvfs;
+               new->resource[i].start = res->start + size * vfn;
+               new->resource[i].end = new->resource[i].start + size - 1;
+               rc = request_resource(res, &new->resource[i]);
+               BUG_ON(rc);
+       }
+
+       new->subsystem_vendor = dev->subsystem_vendor;
+       pci_read_config_word(new, PCI_SUBSYSTEM_ID, &new->subsystem_device);
+
+       pci_device_add(new, bus);
+       pci_bus_add_device(new);
+       return 0;
+}
+
+static void vf_remove(struct pci_dev *dev, int vfn)
+{
+       u8 busnr, devfn;
+       struct pci_dev *tmp;
+
+       vf_rid(dev, vfn, &busnr, &devfn);
+
+       tmp = pci_find_slot(busnr, devfn);
+       if (!tmp)
+               return;
+       pci_remove_bus_device(tmp);
+}
+
+static int iov_enable(struct pci_iov *iov)
+{
+       int rc;
+       int i, j;
+       u16 ctrl;
+
+       if (!iov->notify)
+               return -ENODEV;
+
+       if (iov->is_enabled)
+               return 0;
+
+       iov->notify(iov->dev, iov->numvfs | PCI_IOV_ENABLE);
+       pci_read_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, &ctrl);
+       ctrl |= (PCI_IOV_CTRL_VFE | PCI_IOV_CTRL_MSE);
+       pci_write_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, ctrl);
+       ssleep(1);
+
+       for (i = 0; i < iov->numvfs; i++) {
+               rc = vf_add(iov->dev, i);
+               if (rc)
+                       goto failed;
+       }
+
+       iov->notify(iov->dev, iov->numvfs |
+                               PCI_IOV_ENABLE | PCI_IOV_POST_EVENT);
+       iov->is_enabled = 1;
+       return 0;
+
+failed:
+       for (j = 0; j < i; j++)
+               vf_remove(iov->dev, j);
+
+       pci_read_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, &ctrl);
+       ctrl &= ~(PCI_IOV_CTRL_VFE | PCI_IOV_CTRL_MSE);
+       pci_write_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, ctrl);
+       ssleep(1);
+
+       return rc;
+}
+
+static int iov_disable(struct pci_iov *iov)
+{
+       int i;
+       u16 ctrl;
+
+       if (!iov->notify)
+               return -ENODEV;
+
+       if (!iov->is_enabled)
+               return 0;
+
+       iov->notify(iov->dev, PCI_IOV_DISABLE);
+       for (i = 0; i < iov->numvfs; i++)
+               vf_remove(iov->dev, i);
+
+       pci_read_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, &ctrl);
+       ctrl &= ~(PCI_IOV_CTRL_VFE | PCI_IOV_CTRL_MSE);
+       pci_write_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, ctrl);
+       ssleep(1);
+
+       iov->notify(iov->dev, PCI_IOV_DISABLE | PCI_IOV_POST_EVENT);
+       iov->is_enabled = 0;
+       return 0;
+}
+
+static int iov_set_numvfs(struct pci_iov *iov, int numvfs)
+{
+       u16 offset, stride;
+
+       if (!iov->notify)
+               return -ENODEV;
+
+       if (numvfs == iov->numvfs)
+               return 0;
+
+       if (numvfs < 0 || numvfs > iov->initialvfs || iov->is_enabled)
+               return -EINVAL;
+
+       pci_write_config_word(iov->dev, iov->cap + PCI_IOV_NUM_VF, numvfs);
+       pci_read_config_word(iov->dev, iov->cap + PCI_IOV_VF_OFFSET, &offset);
+       pci_read_config_word(iov->dev, iov->cap + PCI_IOV_VF_STRIDE, &stride);
+       if ((numvfs && !offset) || (numvfs > 1 && !stride))
+               return -EIO;
+
+       iov->offset = offset;
+       iov->stride = stride;
+       iov->numvfs = numvfs;
+       return 0;
+}
+
+static ssize_t is_enabled_store(struct kobject *kobj, struct iov_attr *attr,
+                               const char *buf, size_t count)
+{
+       int rc;
+       long enable;
+       struct pci_iov *iov = container_of(kobj, struct pci_iov, kobj);
+
+       enable = simple_strtoll(buf, NULL, 0);
+
+       mutex_lock(&iov->mutex);
+       switch (enable) {
+       case 0:
+               rc = iov_disable(iov);
+               break;
+       case 1:
+               rc = iov_enable(iov);
+               break;
+       default:
+               rc = -EINVAL;
+       }
+       mutex_unlock(&iov->mutex);
+
+       return rc ? rc : count;
+}
+
+static ssize_t numvfs_store(struct kobject *kobj, struct iov_attr *attr,
+                               const char *buf, size_t count)
+{
+       int rc;
+       long numvfs;
+       struct pci_iov *iov = container_of(kobj, struct pci_iov, kobj);
+
+       numvfs = simple_strtoll(buf, NULL, 0);
+
+       mutex_lock(&iov->mutex);
+       rc = iov_set_numvfs(iov, numvfs);
+       mutex_unlock(&iov->mutex);
+
+       return rc ? rc : count;
+}
+
+
+static struct iov_attr iov_attr[] = {
+       __ATTR_RO(totalvfs),
+       __ATTR_RO(initialvfs),
+       __ATTR(numvfs, S_IWUSR | S_IRUGO, numvfs_show, numvfs_store),
+       __ATTR(enable, S_IWUSR | S_IRUGO, is_enabled_show, is_enabled_store),
+};
+
+static ssize_t vf_show(struct kobject *kobj, struct iov_attr *attr,
+                               char *buf)
+{
+       int vfn;
+       struct vf_entry *ve = container_of(kobj, struct vf_entry, kobj);
+
+       vfn = attr - ve->attr;
+       ve->iov->notify(ve->iov->dev, vfn | PCI_IOV_RD_CONF);
+
+       return sprintf(buf, "%s\n", ve->param[vfn]);
+}
+
+static ssize_t vf_store(struct kobject *kobj, struct iov_attr *attr,
+                               const char *buf, size_t count)
+{
+       int vfn;
+       struct vf_entry *ve = container_of(kobj, struct vf_entry, kobj);
+
+       vfn = attr - ve->attr;
+       sscanf(buf, "%63s", ve->param[vfn]);
+       ve->iov->notify(ve->iov->dev, vfn | PCI_IOV_WR_CONF);
+
+       return count;
+}
+
+static ssize_t rid_show(struct kobject *kobj, struct iov_attr *attr,
+                               char *buf)
+{
+       u8 busnr, devfn;
+       struct vf_entry *ve = container_of(kobj, struct vf_entry, kobj);
+
+       vf_rid(ve->iov->dev, ve->vfn, &busnr, &devfn);
+
+       return sprintf(buf, "%04x:%02x:%02x.%d\n",
+                       pci_domain_nr(ve->iov->dev->bus),
+                       busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
+}
+
+static struct iov_attr vf_attr = __ATTR_RO(rid);
+
+int iov_alloc_bus(struct pci_bus *bus, int busnr)
+{
+       int i;
+       int rc = 0;
+       struct pci_bus *child, *next;
+       struct list_head head;
+
+       INIT_LIST_HEAD(&head);
+
+       down_write(&pci_bus_sem);
+
+       for (i = bus->number + 1; i <= busnr; i++) {
+               list_for_each_entry(child, &bus->children, node)
+                       if (child->number == i)
+                               break;
+               if (child->number == i)
+                       continue;
+               child = pci_alloc_child_bus(bus, NULL, i);
+               if (!child) {
+                       rc = -ENOMEM;
+                       break;
+               }
+               child->subordinate = i;
+               list_add_tail(&child->node, &head);
+       }
+
+       if (rc)
+               list_for_each_entry_safe(child, next, &head, node)
+                       kfree(child);
+       else
+               list_for_each_entry_safe(child, next, &head, node)
+                       list_move_tail(&child->node, &bus->children);
+
+       up_write(&pci_bus_sem);
+
+       return rc;
+}
+
+void iov_release_bus(struct pci_bus *bus)
+{
+       struct pci_dev *dev;
+       struct pci_bus *child, *next;
+       struct list_head head;
+
+       INIT_LIST_HEAD(&head);
+
+       down_write(&pci_bus_sem);
+
+       list_for_each_entry(dev, &bus->devices, bus_list)
+               if (dev->iov && dev->iov->notify)
+                       goto done;
+
+       list_for_each_entry_safe(child, next, &bus->children, node)
+               if (!child->bridge)
+                       list_move(&child->node, &head);
+done:
+       up_write(&pci_bus_sem);
+
+       list_for_each_entry_safe(child, next, &head, node)
+               pci_remove_bus(child);
+}
+
+/**
+ * pci_iov_init - initialize device's SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ *
+ * The major differences between Virtual Function and PCI device are:
+ * 1) the device with multiple bus numbers uses internal routing, so
+ *    there is no explicit bridge device in this case.
+ * 2) Virtual Function memory spaces are designated by BARs encapsulated
+ *    in the capability structure, and the BARs in Virtual Function PCI
+ *    configuration space are read-only zero.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+       int i;
+       int pos;
+       u32 pgsz;
+       u16 ctrl, total, initial, offset, stride;
+       struct pci_iov *iov;
+       struct resource *res;
+
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_IOV);
+       if (!pos)
+               return -ENODEV;
+
+       ctrl = pci_ari_enabled(dev) ? PCI_IOV_CTRL_ARI : 0;
+       pci_write_config_word(dev, pos + PCI_IOV_CTRL, ctrl);
+       ssleep(1);
+
+       pci_read_config_word(dev, pos + PCI_IOV_TOTAL_VF, &total);
+       pci_read_config_word(dev, pos + PCI_IOV_INITIAL_VF, &initial);
+       pci_write_config_word(dev, pos + PCI_IOV_NUM_VF, initial);
+       pci_read_config_word(dev, pos + PCI_IOV_VF_OFFSET, &offset);
+       pci_read_config_word(dev, pos + PCI_IOV_VF_STRIDE, &stride);
+       if (!total || initial > total || (initial && !offset) ||
+           (initial > 1 && !stride))
+               return -EIO;
+
+       pci_read_config_dword(dev, pos + PCI_IOV_SUP_PGSIZE, &pgsz);
+       i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+       pgsz &= ~((1 << i) - 1);
+       if (!pgsz)
+               return -EIO;
+
+       pgsz &= ~(pgsz - 1);
+       pci_write_config_dword(dev, pos + PCI_IOV_SYS_PGSIZE, pgsz);
+
+       iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+       if (!iov)
+               return -ENOMEM;
+
+       iov->dev = dev;
+       iov->cap = pos;
+       iov->totalvfs = total;
+       iov->initialvfs = initial;
+       iov->offset = offset;
+       iov->stride = stride;
+       iov->align = pgsz << 12;
+       mutex_init(&iov->mutex);
+
+       for (i = 0; i < PCI_IOV_NUM_BAR; i++) {
+               res = dev->resource + PCI_IOV_RESOURCES + i;
+               pos = iov->cap + PCI_IOV_BAR_0 + i * 4;
+               i += pci_read_base(dev, pci_bar_unknown, res, pos);
+               if (!res->flags)
+                       continue;
+               res->end = res->start + (res->end - res->start + 1) * total - 1;
+       }
+
+       dev->iov = iov;
+       dev_info(&dev->dev, "SR-IOV capability is initialized\n");
+
+       return 0;
+}
+
+/**
+ * pci_iov_release - release resources used by SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+       if (!dev->iov)
+               return;
+
+       mutex_destroy(&dev->iov->mutex);
+       kfree(dev->iov);
+       dev->iov = NULL;
+}
+
+/**
+ * pci_iov_create_sysfs - create sysfs for SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_create_sysfs(struct pci_dev *dev)
+{
+       int rc;
+       int i, j;
+       struct pci_iov *iov = dev->iov;
+
+       if (!iov)
+               return;
+
+       iov->ve = kzalloc(sizeof(*iov->ve) * iov->totalvfs, GFP_KERNEL);
+       if (!iov->ve)
+               return;
+
+       for (i = 0; i < iov->totalvfs; i++) {
+               iov->ve[i].vfn = i;
+               iov->ve[i].iov = iov;
+       }
+
+       iov->kobj.ktype = &iov_ktype;
+       iov->kobj.parent = &dev->dev.kobj;
+       kobject_set_name(&iov->kobj, "iov");
+       rc = kobject_register(&iov->kobj);
+       if (rc)
+               goto failed1;
+
+       for (i = 0; i < ARRAY_SIZE(iov_attr); i++) {
+               rc = sysfs_create_file(&iov->kobj, &iov_attr[i].attr);
+               if (rc)
+                       goto failed2;
+       }
+
+       for (i = 0; i < iov->totalvfs; i++) {
+               iov->ve[i].kobj.ktype = &iov_ktype;
+               iov->ve[i].kobj.parent = &iov->kobj;
+               sprintf(iov->ve[i].name, "%d", i);
+               kobject_set_name(&iov->ve[i].kobj, iov->ve[i].name);
+               rc = kobject_register(&iov->ve[i].kobj);
+               if (rc)
+                       goto failed3;
+               rc = sysfs_create_file(&iov->ve[i].kobj, &vf_attr.attr);
+               if (rc) {
+                       kobject_unregister(&iov->ve[i].kobj);
+                       goto failed3;
+               }
+       }
+
+       return;
+
+failed3:
+       for (j = 0; j < i; j++) {
+               sysfs_remove_file(&iov->ve[j].kobj, &vf_attr.attr);
+               kobject_unregister(&iov->ve[j].kobj);
+       }
+failed2:
+       for (j = 0; j < i; j++)
+               sysfs_remove_file(&dev->iov->kobj, &iov_attr[j].attr);
+       kobject_unregister(&iov->kobj);
+failed1:
+       kfree(iov->ve);
+       iov->ve = NULL;
+
+       dev_err(&dev->dev, "can't create sysfs for SR-IOV.\n");
+}
+
+/**
+ * pci_iov_remove_sysfs - remove sysfs of SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_remove_sysfs(struct pci_dev *dev)
+{
+       int i;
+       struct pci_iov *iov = dev->iov;
+
+       if (!iov || !iov->ve)
+               return;
+
+       for (i = 0; i < iov->totalvfs; i++) {
+               sysfs_remove_file(&iov->ve[i].kobj, &vf_attr.attr);
+               kobject_unregister(&iov->ve[i].kobj);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(iov_attr); i++)
+               sysfs_remove_file(&dev->iov->kobj, &iov_attr[i].attr);
+
+       kobject_unregister(&iov->kobj);
+       kfree(iov->ve);
+}
+
+int pci_iov_resource_align(struct pci_dev *dev, int resno)
+{
+       if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCES_END)
+               return 0;
+
+       BUG_ON(!dev->iov);
+
+       return dev->iov->align;
+}
+
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+                        enum pci_bar_type *type)
+{
+       if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCES_END)
+               return 0;
+
+       BUG_ON(!dev->iov);
+
+       *type = pci_bar_unknown;
+       return dev->iov->cap + PCI_IOV_BAR_0 +
+               4 * (resno - PCI_IOV_RESOURCES);
+}
+
+/**
+ * pci_iov_register - register SR-IOV service
+ * @dev: the PCI device
+ * @notify: callback function for SR-IOV events
+ * @entries: sysfs entries used by Physical Function driver
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_register(struct pci_dev *dev, int (*notify)(struct pci_dev *, u32),
+                       char **entries)
+{
+       int rc;
+       int n, i, j, k;
+       u8 busnr, devfn;
+       struct iov_attr *attr;
+       struct pci_iov *iov = dev->iov;
+
+       if (!iov || !iov->ve)
+               return -ENODEV;
+
+       if (!notify)
+               return -EINVAL;
+
+       vf_rid(dev, iov->totalvfs - 1, &busnr, &devfn);
+       if (busnr > dev->bus->subordinate)
+               return -EIO;
+
+       iov->notify = notify;
+       rc = iov_alloc_bus(dev->bus, busnr);
+       if (rc)
+               return rc;
+
+       for (n = 0; entries && entries[n] && *entries[n]; n++)
+               ;
+       if (!n)
+               return 0;
+
+       for (i = 0; i < iov->totalvfs; i++) {
+               rc = -ENOMEM;
+               iov->ve[i].param = kzalloc(PCI_IOV_PARAM_LEN * n, GFP_KERNEL);
+               if (!iov->ve[i].param)
+                       goto failed;
+               attr = kzalloc(sizeof(*attr) * n, GFP_KERNEL);
+               if (!attr) {
+                       kfree(iov->ve[i].param);
+                       goto failed;
+               }
+               iov->ve[i].attr = attr;
+               for (j = 0; j < n; j++) {
+                       attr[j].attr.name = entries[j];
+                       attr[j].attr.mode = S_IWUSR | S_IRUGO;
+                       attr[j].show = vf_show;
+                       attr[j].store = vf_store;
+                       rc = sysfs_create_file(&iov->ve[i].kobj, &attr[j].attr);
+                       if (rc) {
+                               while (j--)
+                                       sysfs_remove_file(&iov->ve[i].kobj,
+                                                               &attr[j].attr);
+                               kfree(iov->ve[i].attr);
+                               kfree(iov->ve[i].param);
+                               goto failed;
+                       }
+               }
+       }
+
+       iov->nentries = n;
+       return 0;
+
+failed:
+       for (k = 0; k < i; k++) {
+               for (j = 0; j < n; j++)
+                       sysfs_remove_file(&iov->ve[k].kobj,
+                                       &iov->ve[k].attr[j].attr);
+               kfree(iov->ve[k].attr);
+               kfree(iov->ve[k].param);
+       }
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pci_iov_register);
+
+/**
+ * pci_iov_unregister - unregister SR-IOV service
+ * @dev: the PCI device
+ */
+void pci_iov_unregister(struct pci_dev *dev)
+{
+       int i, j;
+       struct pci_iov *iov = dev->iov;
+
+       BUG_ON(!iov || !iov->notify);
+
+       if (!iov->nentries)
+               return;
+
+       for (i = 0; i < iov->totalvfs; i++) {
+               for (j = 0; j < iov->nentries; j++)
+                       sysfs_remove_file(&iov->ve[i].kobj,
+                                       &iov->ve[i].attr[j].attr);
+               kfree(iov->ve[i].attr);
+               kfree(iov->ve[i].param);
+       }
+       iov->notify = NULL;
+       iov_release_bus(dev->bus);
+}
+EXPORT_SYMBOL_GPL(pci_iov_unregister);
+
+/**
+ * pci_iov_enable - enable SR-IOV capability
+ * @dev: the PCI device
+ * @numvfs: number of VFs to be available
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_enable(struct pci_dev *dev, int numvfs)
+{
+       int rc;
+       struct pci_iov *iov = dev->iov;
+
+       if (!iov)
+               return -ENODEV;
+
+       if (!iov->notify)
+               return -EINVAL;
+
+       mutex_lock(&iov->mutex);
+       rc = iov_set_numvfs(iov, numvfs);
+       if (rc)
+               goto done;
+       rc = iov_enable(iov);
+done:
+       mutex_unlock(&iov->mutex);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pci_iov_enable);
+
+/**
+ * pci_iov_disable - disable SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Should be called upon Physical Function driver removal, and power
+ * state change. All previous allocated Virtual Functions are reclaimed.
+ */
+void pci_iov_disable(struct pci_dev *dev)
+{
+       struct pci_iov *iov = dev->iov;
+
+       BUG_ON(!iov || !iov->notify);
+       mutex_lock(&iov->mutex);
+       iov_disable(iov);
+       mutex_unlock(&iov->mutex);
+}
+EXPORT_SYMBOL_GPL(pci_iov_disable);
+
+/**
+ * pci_iov_read_config - read SR-IOV configurations
+ * @dev: the PCI device
+ * @vfn: Virtual Function Number
+ * @entry: the entry to be read
+ * @buf: the buffer to be filled
+ * @size: size of the buffer
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_read_config(struct pci_dev *dev, int vfn,
+                       char *entry, char *buf, int size)
+{
+       int i;
+       struct pci_iov *iov = dev->iov;
+
+       if (!iov)
+               return -ENODEV;
+
+       if (!iov->notify || !iov->ve || !iov->nentries)
+               return -EINVAL;
+
+       if (vfn < 0 || vfn >= iov->totalvfs)
+               return -EINVAL;
+
+       for (i = 0; i < iov->nentries; i++)
+               if (!strcmp(iov->ve[vfn].attr[i].attr.name, entry)) {
+                       strncpy(buf, iov->ve[vfn].param[i], size);
+                       buf[size - 1] = '\0';
+                       return 0;
+               }
+
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(pci_iov_read_config);
+
+/**
+ * pci_iov_write_config - write SR-IOV configurations
+ * @dev: the PCI device
+ * @vfn: Virtual Function Number
+ * @entry: the entry to be written
+ * @buf: the buffer contains configurations
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_write_config(struct pci_dev *dev, int vfn,
+                       char *entry, char *buf)
+{
+       int i;
+       struct pci_iov *iov = dev->iov;
+
+       if (!iov)
+               return -ENODEV;
+
+       if (!iov->notify || !iov->ve || !iov->nentries)
+               return -EINVAL;
+
+       if (vfn < 0 || vfn >= iov->totalvfs)
+               return -EINVAL;
+
+       for (i = 0; i < iov->nentries; i++)
+               if (!strcmp(iov->ve[vfn].attr[i].attr.name, entry)) {
+                       strncpy(iov->ve[vfn].param[i], buf, PCI_IOV_PARAM_LEN);
+                       iov->ve[vfn].param[i][PCI_IOV_PARAM_LEN - 1] = '\0';
+                       return 0;
+               }
+
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(pci_iov_write_config);
diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/pci-sysfs.c
--- a/drivers/pci/pci-sysfs.c   Sat Sep 27 01:25:31 2008 -0400
+++ b/drivers/pci/pci-sysfs.c   Sat Sep 27 01:27:01 2008 -0400
@@ -559,6 +559,9 @@
        }
        /* add platform-specific attributes */
        pcibios_add_platform_entries(pdev);
+
+       /* Single Root I/O Virtualization */
+       pci_iov_create_sysfs(pdev);

        return 0;
 }
@@ -587,6 +590,8 @@
                        kfree(pdev->rom_attr);
                }
        }
+
+       pci_iov_remove_sysfs(pdev);
 }

 static int __init pci_sysfs_init(void)
diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/pci.c
--- a/drivers/pci/pci.c Sat Sep 27 01:25:31 2008 -0400
+++ b/drivers/pci/pci.c Sat Sep 27 01:27:01 2008 -0400
@@ -921,11 +921,17 @@
  */
 int pci_resource_alignment(struct pci_dev *dev, int resno)
 {
+       resource_size_t align;
        struct resource *res = dev->resource + resno;

        if (resno <= PCI_ROM_RESOURCE)
                return res->end - res->start + 1;
-       else if (resno <= PCI_BRIDGE_RES_END)
+       else if (resno < PCI_BRIDGE_RESOURCES) {
+               /* may be device specific resource */
+               align = pci_iov_resource_align(dev, resno);
+               if (align)
+                       return align;
+       } else if (resno <= PCI_BRIDGE_RES_END)
                return res->start;

        dev_err(&dev->dev, "alignment: invalid resource #%d\n", resno);
@@ -942,12 +948,19 @@
  */
 int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 {
+       int reg;
+
        if (resno < PCI_ROM_RESOURCE) {
                *type = pci_bar_unknown;
                return PCI_BASE_ADDRESS_0 + 4 * resno;
        } else if (resno == PCI_ROM_RESOURCE) {
                *type = pci_bar_rom;
                return dev->rom_base_reg;
+       } else if (resno < PCI_BRIDGE_RESOURCES) {
+               /* may be device specific resource */
+               reg = pci_iov_resource_bar(dev, resno, type);
+               if (reg)
+                       return reg;
        }

        dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/pci.h
--- a/drivers/pci/pci.h Sat Sep 27 01:25:31 2008 -0400
+++ b/drivers/pci/pci.h Sat Sep 27 01:27:01 2008 -0400
@@ -133,4 +133,59 @@
        return dev->ari_enabled;
 }

+/* Single Root I/O Virtualization */
+#define PCI_IOV_PARAM_LEN      64
+
+struct vf_entry;
+
+struct pci_iov {
+       int cap;                /* capability position */
+       int align;              /* page size used to map memory space */
+       int is_enabled;         /* status of SR-IOV */
+       int nentries;           /* number of sysfs entries used by PF driver */
+       u16 totalvfs;           /* total VFs associated with the PF */
+       u16 initialvfs;         /* initial VFs associated with the PF */
+       u16 numvfs;             /* number of VFs available */
+       u16 offset;             /* first VF Routing ID offset */
+       u16 stride;             /* following VF stride */
+       struct mutex mutex;     /* lock for SR-IOV */
+       struct kobject kobj;    /* koject for IOV */
+       struct pci_dev *dev;    /* Physical Function */
+       struct vf_entry *ve;    /* Virtual Function related */
+       int (*notify)(struct pci_dev *, u32);   /* event callback function */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+void pci_iov_create_sysfs(struct pci_dev *dev);
+void pci_iov_remove_sysfs(struct pci_dev *dev);
+extern int pci_iov_resource_align(struct pci_dev *dev, int resno);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+                               enum pci_bar_type *type);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+       return -EIO;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+{
+}
+static inline void pci_iov_create_sysfs(struct pci_dev *dev)
+{
+}
+static inline void pci_iov_remove_sysfs(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_resource_align(struct pci_dev *dev, int resno)
+{
+       return 0;
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+                                      enum pci_bar_type *type)
+{
+       return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #endif /* DRIVERS_PCI_H */
diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/probe.c
--- a/drivers/pci/probe.c       Sat Sep 27 01:25:31 2008 -0400
+++ b/drivers/pci/probe.c       Sat Sep 27 01:27:01 2008 -0400
@@ -760,6 +760,7 @@
        struct pci_dev *pci_dev;

        pci_dev = to_pci_dev(dev);
+       pci_iov_release(pci_dev);
        kfree(pci_dev);
 }

@@ -886,6 +887,9 @@

        /* Alternative Routing-ID Forwarding */
        pci_ari_init(dev);
+
+       /* Single Root I/O Virtualization */
+       pci_iov_init(dev);

        /*
         * Add the device to our list of discovered devices
diff -r 040046b91eb7 -r 75504b97c0ab include/linux/pci.h
--- a/include/linux/pci.h       Sat Sep 27 01:25:31 2008 -0400
+++ b/include/linux/pci.h       Sat Sep 27 01:27:01 2008 -0400
@@ -77,6 +77,12 @@
        /* #6: expansion ROM */
        PCI_ROM_RESOURCE,

+       /* device specific resources */
+#ifdef CONFIG_PCI_IOV
+       PCI_IOV_RESOURCES,
+       PCI_IOV_RESOURCES_END = PCI_IOV_RESOURCES + PCI_IOV_NUM_BAR - 1,
+#endif
+
        /* address space assigned to buses behind the bridge */
 #ifndef PCI_BRIDGE_RES_NUM
 #define PCI_BRIDGE_RES_NUM 4
@@ -128,6 +134,8 @@
        char cap_nr;
        u32 data[0];
 };
+
+struct pci_iov;

 /*
  * The pci_dev structure is used to describe PCI devices.
@@ -200,6 +208,7 @@
        struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM 
entry */
        int rom_attr_enabled;           /* has display of the rom attribute 
been enabled? */
        struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file 
for resources */
+       struct pci_iov *iov;
 };

 #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
@@ -811,5 +820,54 @@
 #define PCIPCI_VSFX            16
 #define PCIPCI_ALIMAGIK                32

+/* SR-IOV events masks */
+#define PCI_IOV_VIRTFN_ID      0x0000FFFFU     /* Virtual Function Number */
+#define PCI_IOV_NUM_VIRTFN     0x0000FFFFU     /* num of Virtual Functions */
+#define PCI_IOV_EVENT_TYPE     0x80000000U     /* event type (pre/post) */
+/* SR-IOV events values */
+#define PCI_IOV_ENABLE         0x00010000U     /* SR-IOV enable request */
+#define PCI_IOV_DISABLE                0x00020000U     /* SR-IOV disable 
request */
+#define PCI_IOV_RD_CONF                0x00040000U     /* read configuration */
+#define PCI_IOV_WR_CONF                0x00080000U     /* write configuration 
*/
+#define PCI_IOV_POST_EVENT     0x80000000U     /* post event */
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_enable(struct pci_dev *dev, int numvfs);
+extern void pci_iov_disable(struct pci_dev *dev);
+extern int pci_iov_register(struct pci_dev *dev,
+       int (*notify)(struct pci_dev *dev, u32 event), char **entries);
+extern void pci_iov_unregister(struct pci_dev *dev);
+extern int pci_iov_read_config(struct pci_dev *dev, int id,
+                       char *entry, char *buf, int size);
+extern int pci_iov_write_config(struct pci_dev *dev, int id,
+                       char *entry, char *buf);
+#else
+static inline int pci_iov_enable(struct pci_dev *dev, int numvfs)
+{
+       return -EIO;
+}
+static inline void pci_iov_disable(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_register(struct pci_dev *dev,
+       int (*notify)(struct pci_dev *dev, u32 event), char **entries)
+{
+       return -EIO;
+}
+static inline void pci_iov_unregister(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_read_config(struct pci_dev *dev, int id,
+                       char *entry, char *buf, int size)
+{
+       return -EIO;
+}
+static inline int pci_iov_write_config(struct pci_dev *dev, int id,
+                       char *entry, char *buf)
+{
+       return -EIO;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
diff -r 040046b91eb7 -r 75504b97c0ab include/linux/pci_regs.h
--- a/include/linux/pci_regs.h  Sat Sep 27 01:25:31 2008 -0400
+++ b/include/linux/pci_regs.h  Sat Sep 27 01:27:01 2008 -0400
@@ -332,6 +332,7 @@
 #define  PCI_EXP_TYPE_UPSTREAM 0x5     /* Upstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6   /* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7   /* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_RC_END   0x9     /* Root Complex Integrated Endpoint */
 #define PCI_EXP_FLAGS_SLOT     0x0100  /* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ      0x3e00  /* Interrupt message number */
 #define PCI_EXP_DEVCAP         4       /* Device capabilities */
@@ -393,6 +394,7 @@
 #define PCI_EXT_CAP_ID_DSN     3
 #define PCI_EXT_CAP_ID_PWR     4
 #define PCI_EXT_CAP_ID_ARI     14
+#define PCI_EXT_CAP_ID_IOV     16

 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
@@ -478,4 +480,23 @@
 #define  PCI_ARI_CTRL_ACS      0x0002  /* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_FG(x)    (((x) >> 4) & 7) /* Function Group */

+/* Single Root I/O Virtualization */
+#define PCI_IOV_CAP            0x04    /* SR-IOV Capabilities */
+#define PCI_IOV_CTRL           0x08    /* SR-IOV Control */
+#define  PCI_IOV_CTRL_VFE      0x01    /* VF Enable */
+#define  PCI_IOV_CTRL_MSE      0x08    /* VF Memory Space Enable */
+#define  PCI_IOV_CTRL_ARI      0x10    /* ARI Capable Hierarchy */
+#define PCI_IOV_STATUS         0x0a    /* SR-IOV Status */
+#define PCI_IOV_INITIAL_VF     0x0c    /* Initial VFs */
+#define PCI_IOV_TOTAL_VF       0x0e    /* Total VFs */
+#define PCI_IOV_NUM_VF         0x10    /* Number of VFs */
+#define PCI_IOV_FUNC_LINK      0x12    /* Function Dependency Link */
+#define PCI_IOV_VF_OFFSET      0x14    /* First VF Offset */
+#define PCI_IOV_VF_STRIDE      0x16    /* Following VF Stride */
+#define PCI_IOV_VF_DID         0x1a    /* VF Device ID */
+#define PCI_IOV_SUP_PGSIZE     0x1c    /* Supported Page Sizes */
+#define PCI_IOV_SYS_PGSIZE     0x20    /* System Page Size */
+#define PCI_IOV_BAR_0          0x24    /* VF BAR0 */
+#define PCI_IOV_NUM_BAR                6       /* Number of VF BARs */
+
 #endif /* LINUX_PCI_REGS_H */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 4/9] dom0 PCI: support SR-IOV capability, Zhao, Yu <=