WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 1/3] PCIe IO space multiplexing: Linux part

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 1/3] PCIe IO space multiplexing: Linux part
From: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
Date: Thu, 2 Apr 2009 13:01:28 +0900
Cc: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>, Ian.Jackson@xxxxxxxxxxxxx
Delivery-date: Wed, 01 Apr 2009 21:06:30 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.18 (2008-05-17)
PCI pass through: PCIe IO space multiplexing

This patch is for PCIe IO space multiplexing.
This is required for more than 16 HVM domain to boot from
PCIe pass through device.

Linux as dom0 exclusively assigns IO space to downstream PCI bridges
and the assignment unit of PCI bridge IO space is 4K. So the only up to
16 PCIe device can be accessed via IO space within 64K IO ports.
PCI expansion ROM BIOS often uses IO port access to boot from the device,
so on virtualized environment, it means only up to 16 guest domain
can boot from pass-through device.

This patch allows PCIe IO space sharing of pass-through device.
- reassign IO space of PCIe devices specified by
  "guestiomuldev=[<segment>:]<bus>:<dev>[,[<segment:><bus>:dev]][,...]"
  to be shared.
  This is implemented as Linux PCI quirk fixup.

  The sharing unit is PCIe switch. Ie IO space of the end point devices
  under the same switch will be shared.
  If there are more than one switches, two areas of IO space will be
  used.

- And the driver which arbitrates the accesses to the multiplexed PCIe IO
  space. Later qemu-dm will use this.

Limitation:
IO port of IO shared devices can't be accessed from dom0 Linux device driver.
But this wouldn't be a big issue because PCIe specification discourages
the use of IO space and recommends that IO space should be used only
for bootable device with ROM code. OS device driver should work without
IO space access.

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -37,6 +37,12 @@ config PCI_GUESTDEV
        help
          Say Y here if you want to reserve PCI device for passthrough.
 
+config PCI_IOMULTI
+       bool "PCI Device IO Multiplex for Passthrough"
+       depends on PCI && ACPI
+       default y
+       help
+         Say Y here if you need io multiplexing.
 config PCI_IOV
        bool "PCI IOV support"
        depends on PCI
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -4,6 +4,7 @@
 
 obj-y          += access.o bus.o probe.o remove.o pci.o quirks.o \
                        pci-driver.o search.o pci-sysfs.o rom.o setup-res.o
+obj-$(CONFIG_PCI_IOMULTI) += iomulti.o
 obj-$(CONFIG_PCI_REASSIGN) += reassigndev.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_PCI_GUESTDEV) += guestdev.o
diff --git a/drivers/pci/iomulti.c b/drivers/pci/iomulti.c
new file mode 100644
--- /dev/null
+++ b/drivers/pci/iomulti.c
@@ -0,0 +1,1121 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (c) 2009 Isaku Yamahata
+ *                    VA Linux Systems Japan K.K.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/pci.h>
+#include <linux/sort.h>
+
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+
+#include "iomulti.h"
+
+#define PCI_NUM_BARS           6
+#define PCI_BUS_MAX            255
+#define PCI_DEV_MAX            31
+#define PCI_FUNC_MAX           7
+#define PCI_NUM_FUNC           8
+
+/* see pci_resource_len */
+static inline resource_size_t pci_iomul_len(const struct resource* r)
+{
+       if (r->start == 0 && r->start == r->end)
+               return 0;
+       return r->end - r->start + 1;
+}
+
+#define ROUND_UP(x, a)         (((x) + (a) - 1) & ~((a) - 1))
+/* stolen from pbus_size_io() */
+static unsigned long pdev_size_io(struct pci_dev *pdev)
+{
+       unsigned long size = 0, size1 = 0;
+       int i;
+
+       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+               struct resource *r = &pdev->resource[i];
+               unsigned long r_size;
+
+               if (!(r->flags & IORESOURCE_IO))
+                       continue;
+
+               r_size = r->end - r->start + 1;
+
+               if (r_size < 0x400)
+                       /* Might be re-aligned for ISA */
+                       size += r_size;
+               else
+                       size1 += r_size;
+       }
+
+/* To be fixed in 2.5: we should have sort of HAVE_ISA
+   flag in the struct pci_bus. */
+#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
+       size = (size & 0xff) + ((size & ~0xffUL) << 2);
+#endif
+       size = ROUND_UP(size + size1, 4096);
+       return size;
+}
+
+/*
+ * primary bus number of PCI-PCI bridge in switch on which
+ * this slots sits.
+ * i.e. the primary bus number of PCI-PCI bridge of downstream port
+ *      or root port in switch.
+ *      the secondary bus number of PCI-PCI bridge of upstream port
+ *      in switch.
+ */
+static inline unsigned char pci_dev_switch_busnr(struct pci_dev *pdev)
+{
+       if (pci_find_capability(pdev, PCI_CAP_ID_EXP))
+               return pdev->bus->primary;
+       return pdev->bus->number;
+}
+
+struct pci_iomul_func {
+       int             segment;
+       uint8_t         bus;
+       uint8_t         devfn;
+
+       /* only start and end are used */
+       unsigned long   io_size;
+       uint8_t         io_bar;
+       struct resource resource[PCI_NUM_BARS];
+};
+
+struct pci_iomul_switch {
+       struct list_head        list;   /* bus_list_lock protects */
+
+       /*
+        * This lock the following entry and following
+        * pci_iomul_slot/pci_iomul_func.
+        */
+       struct mutex            lock;
+
+       struct resource         *io_region;
+       unsigned int            count;
+       struct pci_dev          *current_pdev;
+
+       int                     segment;
+       uint8_t                 bus;
+
+       uint32_t                io_base;
+       uint32_t                io_limit;
+
+       /* func which has the largeset io size*/
+       struct pci_iomul_func   *func;
+
+       struct list_head        slots;
+};
+
+struct pci_iomul_slot {
+       struct list_head        sibling;
+
+       /*
+        * busnr
+        * when pcie, the primary busnr of the PCI-PCI bridge on which
+        * this devices sits.
+        */
+       uint8_t                 switch_busnr;
+
+       /* device */
+       int                     segment;
+       uint8_t                 bus;
+       uint8_t                 dev;
+
+       struct pci_iomul_func   *func[PCI_NUM_FUNC];
+};
+
+static LIST_HEAD(switch_list);
+static DEFINE_MUTEX(switch_list_lock);
+
+/*****************************************************************************/
+struct pci_iomul_switch *__pci_iomul_find_switch_locked(int segment,
+                                                       uint8_t bus)
+{
+       struct pci_iomul_switch *sw;
+
+       list_for_each_entry(sw, &switch_list, list) {
+               if (sw->segment == segment && sw->bus == bus)
+                       return sw;
+       }
+       return NULL;
+}
+
+struct pci_iomul_switch *__pci_iomul_find_switch(int segment, uint8_t bus)
+{
+       struct pci_iomul_switch *found = NULL;
+
+       mutex_lock(&switch_list_lock);
+       found = __pci_iomul_find_switch_locked(segment, bus);
+       mutex_unlock(&switch_list_lock);
+       return found;
+}
+
+struct pci_iomul_switch *pci_iomul_find_switch(int segment, uint8_t bus)
+{
+       struct pci_iomul_switch *tmp;
+       struct pci_iomul_switch *found = NULL;
+
+       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+
+       mutex_lock(&switch_list_lock);
+       found = __pci_iomul_find_switch_locked(segment, bus);
+       if (found != NULL) {
+               mutex_unlock(&switch_list_lock);
+               kfree(tmp);
+               return found;
+       }
+       if (tmp != NULL) {
+               tmp->segment = segment;
+               tmp->bus = bus;
+               tmp->func = NULL;
+               mutex_init(&tmp->lock);
+               INIT_LIST_HEAD(&tmp->slots);
+               list_add(&tmp->list, &switch_list);
+       }
+       mutex_unlock(&switch_list_lock);
+       return tmp;
+}
+
+struct pci_iomul_slot *pci_iomul_find_slot_locked(struct pci_iomul_switch *sw,
+                                                 uint8_t busnr, uint8_t dev)
+{
+       struct pci_iomul_slot *slot;
+
+       BUG_ON(!mutex_is_locked(&sw->lock));
+       list_for_each_entry(slot, &sw->slots, sibling) {
+               if (slot->bus == busnr && slot->dev == dev)
+                       return slot;
+       }
+       return NULL;
+}
+
+struct pci_iomul_slot *pci_iomul_find_slot(struct pci_iomul_switch *sw,
+                                          uint8_t busnr, uint8_t dev)
+{
+       struct pci_iomul_slot *found;
+
+       mutex_lock(&sw->lock);
+       found = pci_iomul_find_slot_locked(sw, busnr, dev);
+       mutex_unlock(&sw->lock);
+       return found;
+}
+
+void pci_iomul_find_switch_slot(int segment, uint8_t bus, uint8_t dev,
+                               struct pci_iomul_switch **swp,
+                               struct pci_iomul_slot **slot)
+{
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_slot *s;
+
+       *swp = NULL;
+       *slot = NULL;
+
+       mutex_lock(&switch_list_lock);
+       list_for_each_entry(sw, &switch_list, list) {
+               if (sw->segment != segment)
+                       continue;
+               s = pci_iomul_find_slot(sw, bus, dev);
+               if (s != NULL) {
+                       *swp = sw;
+                       *slot = s;
+                       break;
+               }
+       }
+       mutex_unlock(&switch_list_lock);
+}
+
+static int __init pci_iomul_slot_init(struct pci_dev *pdev,
+                                     struct pci_iomul_slot *slot)
+{
+       u16 rpcap;
+       u16 cap;
+       struct pci_iomul_switch *sw;
+
+       rpcap = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+       if (!rpcap) {
+               /* pci device isn't supported */
+               printk(KERN_INFO
+                      "PCI: sharing io port of non PCIe device %s "
+                      "isn't supported. ignoring.\n",
+                      pci_name(pdev));
+               return -ENOSYS;
+       }
+
+        pci_read_config_word(pdev, rpcap + PCI_CAP_FLAGS, &cap);
+       switch ((cap & PCI_EXP_FLAGS_TYPE) >> 4) {
+       case PCI_EXP_TYPE_RC_END:
+               printk(KERN_INFO
+                      "PCI: io port sharing of root complex integrated "
+                      "endpoint %s isn't supported. ignoring.\n",
+                      pci_name(pdev));
+               return -ENOSYS;
+       case PCI_EXP_TYPE_ENDPOINT:
+       case PCI_EXP_TYPE_LEG_END:
+               break;
+       default:
+               printk(KERN_INFO
+                      "PCI: io port sharing of non endpoint %s "
+                      "doesn't make sense. ignoring.\n",
+                      pci_name(pdev));
+               return -EINVAL;
+       }
+
+       slot->segment = pci_domain_nr(pdev->bus);
+       slot->bus = pdev->bus->number;
+       slot->dev = PCI_SLOT(pdev->devfn);
+       slot->switch_busnr = pci_dev_switch_busnr(pdev);
+
+       sw = pci_iomul_find_switch(slot->segment, slot->switch_busnr);
+       if (sw == NULL)
+               return -ENOMEM;
+
+       mutex_lock(&sw->lock);
+       if (pci_iomul_find_slot_locked(sw, slot->bus, slot->dev)) {
+               mutex_unlock(&sw->lock);
+               return -EEXIST;
+       } else {
+               list_add(&slot->sibling, &sw->slots);
+       }
+       mutex_unlock(&sw->lock);
+       return 0;
+}
+
+static struct pci_iomul_slot *pci_iomul_slot_alloc(struct pci_dev *pdev)
+{
+       struct pci_iomul_slot *slot;
+
+       slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+       if (slot == NULL)
+               return NULL;
+
+       if (pci_iomul_slot_init(pdev, slot) != 0) {
+               kfree(slot);
+               return NULL;
+       }
+       return slot;
+}
+
+/*****************************************************************************/
+static int __init pci_get_sbd(const char *str,
+                             int *segment__, uint8_t *bus__, uint8_t *dev__)
+{
+       int segment;
+       int bus;
+       int dev;
+
+       if (sscanf(str, "%x:%x:%x", &segment, &bus, &dev) != 3) {
+               if (sscanf(str, "%x:%x", &bus, &dev) == 2)
+                       segment = 0;
+               else
+                       return -EINVAL;
+       }
+
+       if (segment < 0 || INT_MAX <= segment)
+               return -EINVAL;
+       if (bus < 0 || PCI_BUS_MAX < bus)
+               return -EINVAL;
+       if (dev < 0 || PCI_DEV_MAX < dev)
+               return -EINVAL;
+
+       *segment__ = segment;
+       *bus__ = bus;
+       *dev__ = dev;
+       return 0;
+}
+
+static char iomul_param[COMMAND_LINE_SIZE];
+#define TOKEN_MAX      10      /* SSSS:BB:DD length is 10 */
+static int pci_is_iomul_dev_param(struct pci_dev *pdev)
+{
+        int len;
+        char *p;
+       char *next_str;
+
+       for (p = &iomul_param[0]; *p != '\0'; p = next_str + 1) {
+               next_str = strchr(p, ',');
+               if (next_str != NULL)
+                       len = next_str - p;
+               else
+                       len = strlen(p);
+
+               if (len > 0 && len <= TOKEN_MAX) {
+                       char tmp[TOKEN_MAX+1];
+                       int seg;
+                       uint8_t bus;
+                       uint8_t dev;
+
+                       strncpy(tmp, p, len);
+                       *(tmp + len) = '\0';
+                       if (pci_get_sbd(tmp, &seg, &bus, &dev) == 0 &&
+                           pci_domain_nr(pdev->bus) == seg &&
+                           pdev->bus->number == bus &&
+                           PCI_SLOT(pdev->devfn) == dev)
+                               return 1;
+               }
+               if (next_str == NULL)
+                       break;
+       }
+
+       return 0;
+}
+
+/*
+ * Format: [<segment>:]<bus>:<dev>[,[<segment>:]<bus>:<dev>[,...]
+ */
+static int __init pci_iomul_param_setup(char *str)
+{
+       if (strlen(str) >= COMMAND_LINE_SIZE)
+               return 0;
+
+       /* parse it after pci bus scanning */
+       strncpy(iomul_param, str, sizeof(iomul_param));
+       return 1;
+}
+__setup("guestiomuldev=", pci_iomul_param_setup);
+
+/*****************************************************************************/
+static void pci_iomul_set_bridge_io_window(struct pci_dev *bridge,
+                                          uint32_t io_base, uint32_t io_limit)
+{
+       uint16_t l;
+       uint32_t upper16;
+
+       io_base >>= 12;
+       io_base <<= 4;
+       io_limit >>= 12;
+       io_limit <<= 4;
+       l = (io_base & 0xff) | ((io_limit & 0xff) << 8);
+       upper16 = ((io_base & 0xffff00) >> 8) |
+               (((io_limit & 0xffff00) >> 8) << 16);
+
+        /* Temporarily disable the I/O range before updating PCI_IO_BASE. */
+        pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
+        /* Update lower 16 bits of I/O base/limit. */
+        pci_write_config_word(bridge, PCI_IO_BASE, l);
+        /* Update upper 16 bits of I/O base/limit. */
+        pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, upper16);
+}
+
+static void pci_disable_bridge_io_window(struct pci_dev *bridge)
+{
+       /* set base = 0xffffff limit = 0x0 */
+       pci_iomul_set_bridge_io_window(bridge, 0xffffff, 0);
+}
+
+static int __devinit pci_iomul_func_scan(struct pci_dev *pdev,
+                                        struct pci_iomul_slot *slot,
+                                        uint8_t func)
+{
+       struct pci_iomul_func *f;
+       unsigned int i;
+
+       f = kzalloc(sizeof(*f), GFP_KERNEL);
+       if (f == NULL)
+               return -ENOMEM;
+
+       f->segment = slot->segment;
+       f->bus = slot->bus;
+       f->devfn = PCI_DEVFN(slot->dev, func);
+       f->io_size = pdev_size_io(pdev);
+
+       for (i = 0; i < PCI_NUM_BARS; i++) {
+               if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO))
+                       continue;
+               if (pci_resource_len(pdev, i) == 0)
+                       continue;
+
+               f->io_bar |= 1 << i;
+               f->resource[i] = pdev->resource[i];
+       }
+
+       if (f->io_bar)
+               slot->func[func] = f;
+       else
+               kfree(f);
+       return 0;
+}
+
+static void __devinit pci_iomul_fixup_ioresource(struct pci_dev *pdev,
+                                                struct pci_iomul_func *func,
+                                                int reassign, int dealloc)
+{
+       uint8_t i;
+       struct resource *r;
+
+       printk(KERN_INFO "PCI: deallocating io resource[%s]. io size 0x%lx\n",
+              pci_name(pdev), func->io_size);
+       for (i = 0; i < PCI_NUM_BARS; i++) {
+               r = &pdev->resource[i];
+               if (!(func->io_bar & (1 << i)))
+                       continue;
+
+               if (reassign) {
+                       r->end -= r->start;
+                       r->start = 0;
+                       pci_update_resource(pdev, i);
+                       func->resource[i] = *r;
+               }
+
+               if (dealloc)
+                       r->flags = 0; /* don't allocate this resource */
+       }
+
+       /* parent PCI-PCI bridge */
+       if (!reassign)
+               return;
+       pdev = pdev->bus->self;
+       if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+               return;
+       pci_disable_bridge_io_window(pdev);
+       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+               r = &pdev->resource[i];
+               if (!(r->flags & IORESOURCE_IO))
+                       continue;
+
+               r->end -= r->start;
+               r->start = 0;
+               if (i < PCI_BRIDGE_RESOURCES)
+                       pci_update_resource(pdev, i);
+       }
+}
+
+static void __devinit quirk_iomul_dealloc_ioresource(struct pci_dev *pdev)
+{
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_slot *slot;
+       struct pci_iomul_func *f;
+       struct pci_iomul_func *__f;
+
+       if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+               return;
+       if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+               return; /* PCI Host Bridge isn't a target device */
+       if (!pci_is_iomul_dev_param(pdev))
+               return;
+
+       pci_iomul_find_switch_slot(pci_domain_nr(pdev->bus), pdev->bus->number,
+                                  PCI_SLOT(pdev->devfn), &sw, &slot);
+       if (sw == NULL || slot == NULL) {
+               slot = pci_iomul_slot_alloc(pdev);
+               if (slot == NULL)
+                       return;
+               sw = __pci_iomul_find_switch(pci_domain_nr(pdev->bus),
+                                            pci_dev_switch_busnr(pdev));
+               BUG_ON(sw == NULL);
+       }
+
+       printk(KERN_INFO "PCI: disable device and release io resource[%s].\n",
+              pci_name(pdev));
+       pci_disable_device(pdev);
+
+       if (pci_iomul_func_scan(pdev, slot, PCI_FUNC(pdev->devfn)) != 0)
+               return;
+
+       f = slot->func[PCI_FUNC(pdev->devfn)];
+       if (f == NULL)
+               return;
+
+       __f = sw->func;
+       if (__f == NULL || __f->io_size < f->io_size) {
+               if (__f != NULL) {
+                       struct pci_bus *__pbus;
+                       struct pci_dev *__pdev;
+
+                       __pbus = pci_find_bus(__f->segment, __f->bus);
+                       BUG_ON(__pbus == NULL);
+                       __pdev = pci_get_slot(__pbus, __f->devfn);
+                       BUG_ON(__pdev == NULL);
+                       pci_iomul_fixup_ioresource(__pdev, __f, 0, 1);
+                       pci_dev_put(__pdev);
+               }
+
+               pci_iomul_fixup_ioresource(pdev, f, 1, 0);
+               sw->func = f;
+       } else {
+               pci_iomul_fixup_ioresource(pdev, f, 1, 1);
+       }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+                        quirk_iomul_dealloc_ioresource);
+
+
+static void pci_iomul_read_bridge_io(struct pci_iomul_switch *sw)
+{
+       struct pci_iomul_func *f = sw->func;
+
+       struct pci_bus *pbus;
+       struct pci_dev *pdev;
+       struct pci_dev *bridge;
+
+       uint16_t l;
+       uint16_t base_upper16;
+       uint16_t limit_upper16;
+       uint32_t io_base;
+       uint32_t io_limit;
+
+       pbus = pci_find_bus(f->segment, f->bus);
+       BUG_ON(pbus == NULL);
+
+       pdev = pci_get_slot(pbus, f->devfn);
+       BUG_ON(pdev == NULL);
+
+       bridge = pdev->bus->self;
+       pci_read_config_word(bridge, PCI_IO_BASE, &l);
+       pci_read_config_word(bridge, PCI_IO_BASE_UPPER16, &base_upper16);
+       pci_read_config_word(bridge, PCI_IO_LIMIT_UPPER16, &limit_upper16);
+
+       io_base = (l & 0xf0) | ((uint32_t)base_upper16 << 8);
+       io_base <<= 8;
+       io_limit = (l >> 8) | ((uint32_t)limit_upper16 << 8);
+       io_limit <<= 8;
+       io_limit |= 0xfff;
+
+       sw->io_base = io_base;
+       sw->io_limit = io_limit;
+
+       pci_dev_put(pdev);
+       printk(KERN_INFO "PCI: bridge %s base 0x%x limit 0x%x\n",
+              pci_name(bridge), sw->io_base, sw->io_limit);
+}
+
+static void pci_iomul_setup_brige(struct pci_dev *bridge,
+                                 uint32_t io_base, uint32_t io_limit)
+{
+       uint16_t cmd;
+
+       if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+               return;
+
+       pci_iomul_set_bridge_io_window(bridge, io_base, io_limit);
+
+       /* and forcibly enables IO */
+       pci_read_config_word(bridge, PCI_COMMAND, &cmd);
+       if (!(cmd & PCI_COMMAND_IO)) {
+               cmd |= PCI_COMMAND_IO;
+                printk(KERN_INFO "PCI: Forcibly Enabling IO %s\n",
+                      pci_name(bridge));
+                pci_write_config_word(bridge, PCI_COMMAND, cmd);
+       }
+}
+
+struct __bar {
+       unsigned long size;
+       uint8_t bar;
+};
+
+/* decending order */
+static int pci_iomul_bar_cmp(const void *lhs__, const void *rhs__)
+{
+       const struct __bar *lhs = (struct __bar*)lhs__;
+       const struct __bar *rhs = (struct __bar*)rhs__;
+       return - (lhs->size - rhs->size);
+}
+
+static void pci_iomul_setup_dev(struct pci_dev *pdev,
+                               struct pci_iomul_func *f,
+                               uint32_t io_base)
+{
+       struct __bar bars[PCI_NUM_BARS];
+       int i;
+       uint8_t num_bars = 0;
+       struct resource *r;
+
+       printk(KERN_INFO "PCI: Forcibly assign IO %s from 0x%x\n",
+              pci_name(pdev), io_base);
+
+       for (i = 0; i < PCI_NUM_BARS; i++) {
+               if (!(f->io_bar & (1 << i)))
+                       continue;
+
+               r = &f->resource[i];
+               bars[num_bars].size = pci_iomul_len(r);
+               bars[num_bars].bar = i;
+
+               num_bars++;
+       }
+
+       sort(bars, num_bars, sizeof(bars[0]), &pci_iomul_bar_cmp, NULL);
+
+       for (i = 0; i < num_bars; i++) {
+               struct resource *fr = &f->resource[bars[i].bar];
+               r = &pdev->resource[bars[i].bar];
+
+               BUG_ON(r->start != 0);
+               r->start += io_base;
+               r->end += io_base;
+               r->flags = fr->flags;
+
+               fr->start = r->start;
+               fr->end = r->end;
+
+               /* allocate the io region */
+               pci_update_resource(pdev, bars[i].bar);
+
+               io_base += bars[i].size;
+       }
+}
+
+static void __devinit pci_iomul_release_io_resource(struct pci_dev *pdev,
+                                                   struct pci_iomul_func *f)
+{
+       int i;
+       for (i = 0; i < PCI_NUM_BARS; i++) {
+               if (pci_resource_flags(pdev, i) & IORESOURCE_IO &&
+                   pdev->resource[i].parent != NULL) {
+                       f->resource[i] = pdev->resource[i];
+                       release_resource(&pdev->resource[i]);
+               }
+       }
+
+       /* parent PCI-PCI bridge */
+       pdev = pdev->bus->self;
+       if ((pdev->class >> 8) != PCI_CLASS_BRIDGE_HOST) {
+               for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+                       if (pci_resource_flags(pdev, i) & IORESOURCE_IO &&
+                           pdev->resource[i].parent != NULL) {
+                               release_resource(&pdev->resource[i]);
+                       }
+               }
+       }
+}
+
+static void __devinit quirk_iomul_reassign_ioresource(struct pci_dev *pdev)
+{
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_slot *slot;
+       struct pci_iomul_func *sf;
+       struct pci_iomul_func *f;
+
+       pci_iomul_find_switch_slot(pci_domain_nr(pdev->bus), pdev->bus->number,
+                                  PCI_SLOT(pdev->devfn), &sw, &slot);
+       if (sw == NULL || slot == NULL)
+               return;
+
+       if (sw->io_base == 0)
+               pci_iomul_read_bridge_io(sw);
+       if (sw->io_base == 0 || sw->io_base > sw->io_limit)
+               return;
+
+       sf = sw->func;
+       f = slot->func[PCI_FUNC(pdev->devfn)];
+       if (sf == NULL || f == NULL)
+               /*
+                * this case can happen when all the specified devices
+                * don't have io space
+                */
+               return;
+
+       if ((pci_domain_nr(pdev->bus) != sf->segment ||
+            pdev->bus->number != sf->bus ||
+            PCI_SLOT(pdev->devfn) != PCI_SLOT(sf->devfn)) &&
+           PCI_FUNC(pdev->devfn) == 0) {
+               pci_iomul_setup_brige(pdev->bus->self,
+                                     sw->io_base, sw->io_limit);
+       }
+
+       BUG_ON(f->io_size > sw->io_limit - sw->io_base + 1);
+       if (/* f == sf */
+           pci_domain_nr(pdev->bus) == sf->segment &&
+           pdev->bus->number == sf->bus &&
+           pdev->devfn == sf->devfn)
+               pci_iomul_release_io_resource(pdev, f);
+       else
+               pci_iomul_setup_dev(pdev, f, sw->io_base);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+                       quirk_iomul_reassign_ioresource);
+
+/*****************************************************************************/
+struct pci_iomul_data {
+       struct mutex lock;
+
+       struct pci_dev *pdev;
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_slot *slot;
+       struct pci_iomul_func *func;
+};
+
+static int pci_iomul_func_ioport(struct pci_iomul_func *func,
+                                uint8_t bar, uint64_t offset, int *port)
+{
+       if (!(func->io_bar & (1 << bar)))
+               return -EINVAL;
+
+       *port = func->resource[bar].start + offset;
+       if (*port < func->resource[bar].start ||
+           *port > func->resource[bar].end)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void __pci_iomul_enable_io(struct pci_dev *pdev)
+{
+       uint16_t cmd;
+       pci_dev_get(pdev);
+       pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+       cmd |= PCI_COMMAND_IO;
+       pci_write_config_word(pdev, PCI_COMMAND, cmd);
+}
+
+static void __pci_iomul_disable_io(struct pci_dev *pdev)
+{
+       uint16_t cmd;
+       pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+       cmd &= ~PCI_COMMAND_IO;
+       pci_write_config_word(pdev, PCI_COMMAND, cmd);
+       pci_dev_put(pdev);
+}
+
+static int pci_iomul_open(struct inode *inode, struct file *filp)
+{
+       struct pci_iomul_data *iomul;
+       iomul = kmalloc(sizeof(*iomul), GFP_KERNEL);
+       if (iomul == NULL)
+               return -ENOMEM;
+
+       mutex_init(&iomul->lock);
+       iomul->pdev = NULL;
+       iomul->sw = NULL;
+       iomul->func = NULL;
+       filp->private_data = (void*)iomul;
+
+       return 0;
+}
+
+static int pci_iomul_release(struct inode *inode, struct file *filp)
+{
+       struct pci_iomul_data *iomul =
+               (struct pci_iomul_data*)filp->private_data;
+       struct pci_iomul_switch *sw;
+
+       mutex_lock(&iomul->lock);
+       sw = iomul->sw;
+       if (iomul->pdev != NULL) {
+               if (sw != NULL) {
+                       mutex_lock(&sw->lock);
+                       if (sw->current_pdev == iomul->pdev) {
+                               __pci_iomul_disable_io(sw->current_pdev);
+                               sw->current_pdev = NULL;
+                       }
+                       sw->count--;
+                       if (sw->count == 0) {
+                               release_region(sw->io_region->start, 
sw->io_region->end - sw->io_region->start + 1);
+                               sw->io_region = NULL;
+                       }
+                       mutex_unlock(&sw->lock);
+               }
+               pci_dev_put(iomul->pdev);
+       }
+       mutex_unlock(&iomul->lock);
+
+       kfree(iomul);
+       return 0;
+}
+
+static long pci_iomul_setup(struct pci_iomul_data *iomul,
+                           struct pci_iomul_setup __user *arg)
+{
+       long error = 0;
+       struct pci_iomul_setup setup;
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_slot *slot;
+       struct pci_bus *pbus;
+       struct pci_dev *pdev = NULL;
+
+       if (copy_from_user(&setup, arg, sizeof(setup)))
+               return -EFAULT;
+
+       mutex_lock(&iomul->lock);
+       if (iomul->sw != NULL) {
+               error = -EBUSY;
+               goto out;
+       }
+
+       pci_iomul_find_switch_slot(setup.segment, setup.bus, setup.dev,
+                                  &sw, &slot);
+       if (sw == NULL || slot == NULL) {
+               error = -ENODEV;
+               goto out;
+       }
+
+       if (slot->func[setup.func] == NULL) {
+               error = -ENODEV;
+               goto out;
+       }
+
+       pbus = pci_find_bus(slot->segment, slot->bus);
+       if (pbus == NULL) {
+               error = -ENODEV;
+               goto out;
+       }
+
+       pdev = pci_get_slot(pbus, PCI_DEVFN(setup.dev, setup.func));
+       if (pdev == NULL) {
+               error = -ENODEV;
+               goto out;
+       }
+
+       mutex_lock(&sw->lock);
+       if (sw->count == 0) {
+               BUG_ON(sw->io_region != NULL);
+               sw->io_region =
+                       request_region(sw->io_base,
+                                      sw->io_limit - sw->io_base + 1,
+                                      "PCI IO multiplexer");
+               if (sw->io_region == NULL) {
+                       mutex_unlock(&sw->lock);
+                       error = -EBUSY;
+                       goto out;
+               }
+       }
+       sw->count++;
+       mutex_unlock(&sw->lock);
+
+       iomul->pdev = pdev;
+       iomul->sw = sw;
+       iomul->slot = slot;
+       iomul->func = slot->func[setup.func];
+
+out:
+       mutex_unlock(&iomul->lock);
+       if (error != 0 && pdev != NULL)
+               pci_dev_put(pdev);
+       return error;
+}
+
+static long pci_iomul_disable_io(struct pci_iomul_data *iomul)
+{
+       long error = 0;
+       struct pci_iomul_switch *sw;
+       struct pci_dev *pdev;
+
+       mutex_lock(&iomul->lock);
+       sw = iomul->sw;
+       pdev = iomul->pdev;
+       if (sw == NULL || pdev == NULL) {
+               mutex_unlock(&iomul->lock);
+               return -ENODEV;
+       }
+       mutex_lock(&sw->lock);
+
+       if (sw->current_pdev == pdev) {
+               __pci_iomul_disable_io(pdev);
+               sw->current_pdev = NULL;
+       }
+
+       mutex_unlock(&sw->lock);
+       mutex_unlock(&iomul->lock);
+       return error;
+}
+
+static void pci_iomul_switch(struct pci_iomul_switch *sw,
+                            struct pci_dev *next_pdev)
+{
+       if (sw->current_pdev == next_pdev)
+               /* nothing to do */
+               return;
+
+       if (sw->current_pdev != NULL)
+               __pci_iomul_disable_io(sw->current_pdev);
+
+       __pci_iomul_enable_io(next_pdev);
+       sw->current_pdev = next_pdev;
+}
+
+static long pci_iomul_in(struct pci_iomul_data *iomul,
+                        struct pci_iomul_in __user *arg)
+{
+       struct pci_iomul_in in;
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_func *func;
+
+       long error = 0;
+       int port;
+       uint32_t value = 0;
+
+       if (copy_from_user(&in, arg, sizeof(in)))
+               return -EFAULT;
+
+       mutex_lock(&iomul->lock);
+       sw = iomul->sw;
+       func = iomul->func;
+       if (sw == NULL || func == NULL) {
+               mutex_unlock(&iomul->lock);
+               return -ENODEV;
+       }
+
+       mutex_lock(&sw->lock);
+
+       error = pci_iomul_func_ioport(func, in.bar, in.offset, &port);
+       if (error)
+               goto out;
+
+       pci_iomul_switch(sw, iomul->pdev);
+       switch (in.size) {
+       case 4:
+               value = inl(port);
+               break;
+       case 2:
+               value = inw(port);
+               break;
+       case 1:
+               value = inb(port);
+               break;
+       default:
+               error = -EINVAL;
+               break;
+       }
+
+out:
+       mutex_unlock(&sw->lock);
+       mutex_unlock(&iomul->lock);
+
+       if (error == 0 && put_user(value, &arg->value))
+               return -EFAULT;
+       return error;
+}
+
+static long pci_iomul_out(struct pci_iomul_data *iomul,
+                         struct pci_iomul_out __user *arg)
+{
+       struct pci_iomul_in out;
+       struct pci_iomul_switch *sw;
+       struct pci_iomul_func *func;
+
+       long error = 0;
+       int port;
+
+       if (copy_from_user(&out, arg, sizeof(out)))
+               return -EFAULT;
+
+       mutex_lock(&iomul->lock);
+       sw = iomul->sw;
+       func = iomul->func;
+       if (sw == NULL || func == NULL) {
+               mutex_unlock(&iomul->lock);
+               return -ENODEV;
+       }
+
+       mutex_lock(&sw->lock);
+
+       error = pci_iomul_func_ioport(func, out.bar, out.offset, &port);
+       if (error)
+               goto out;
+
+       pci_iomul_switch(sw, iomul->pdev);
+       switch (out.size) {
+       case 4:
+               outl(out.value, port);
+               break;
+       case 2:
+               outw(out.value, port);
+               break;
+       case 1:
+               outb(out.value, port);
+               break;
+       default:
+               error = -EINVAL;
+               break;
+       }
+
+out:
+       mutex_unlock(&sw->lock);
+       mutex_unlock(&iomul->lock);
+       return error;
+}
+
+static long pci_iomul_ioctl(struct file *filp,
+                           unsigned int cmd, unsigned long arg)
+{
+       long error;
+       struct pci_iomul_data *iomul =
+               (struct pci_iomul_data*)filp->private_data;
+
+       switch (cmd) {
+       case PCI_IOMUL_SETUP:
+               error = pci_iomul_setup(iomul,
+                                       (struct pci_iomul_setup __user *)arg);
+               break;
+       case PCI_IOMUL_DISABLE_IO:
+               error = pci_iomul_disable_io(iomul);
+               break;
+       case PCI_IOMUL_IN:
+               error = pci_iomul_in(iomul, (struct pci_iomul_in __user *)arg);
+               break;
+       case PCI_IOMUL_OUT:
+               error = pci_iomul_out(iomul,
+                                     (struct pci_iomul_out __user *)arg);
+               break;
+       default:
+               error = -ENOSYS;
+               break;
+       }
+
+       return error;
+}
+
+static const struct file_operations pci_iomul_fops = {
+       .owner = THIS_MODULE,
+
+       .open = pci_iomul_open, /* nonseekable_open */
+       .release = pci_iomul_release,
+
+       .unlocked_ioctl = pci_iomul_ioctl,
+};
+
+static struct miscdevice pci_iomul_miscdev = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = "pci_iomul",
+       .fops = &pci_iomul_fops,
+};
+
+static int pci_iomul_init(void)
+{
+       int error;
+       error = misc_register(&pci_iomul_miscdev);
+       if (error != 0) {
+               printk(KERN_ALERT "Couldn't register /dev/misc/pci_iomul");
+               return error;
+       }
+       printk("PCI IO multiplexer device installed.\n");
+       return 0;
+}
+
+#if 0
+static void pci_iomul_cleanup(void)
+{
+       misc_deregister(&pci_iomul_miscdev);
+}
+#endif
+
+/*
+ * This must be called after pci fixup final which is called by
+ * device_initcall(pci_init).
+ */
+late_initcall(pci_iomul_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Isaku Yamahata <yamahata@xxxxxxxxxxxxx>");
+MODULE_DESCRIPTION("PCI IO space multiplexing driver");
diff --git a/drivers/pci/iomulti.h b/drivers/pci/iomulti.h
new file mode 100644
--- /dev/null
+++ b/drivers/pci/iomulti.h
@@ -0,0 +1,51 @@
+#ifndef PCI_IOMULTI_H
+#define PCI_IOMULTI_H
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (c) 2009 Isaku Yamahata
+ *                    VA Linux Systems Japan K.K.
+ *
+ */
+
+struct pci_iomul_setup {
+       uint16_t        segment;
+       uint8_t         bus;
+       uint8_t         dev;
+       uint8_t         func;
+};
+
+struct pci_iomul_in {
+       uint8_t         bar;
+       uint64_t        offset;
+
+       uint8_t         size;
+       uint32_t        value;
+};
+
+struct pci_iomul_out {
+       uint8_t         bar;
+       uint64_t        offset;
+
+       uint8_t         size;
+       uint32_t        value;
+};
+
+#define PCI_IOMUL_SETUP                _IOW ('P', 0, struct pci_iomul_setup)
+#define PCI_IOMUL_DISABLE_IO   _IO  ('P', 1)
+#define PCI_IOMUL_IN           _IOWR('P', 2, struct pci_iomul_in)
+#define PCI_IOMUL_OUT          _IOW ('P', 3, struct pci_iomul_out)
+
+#endif /* PCI_IOMULTI_H */

Attachment: pci-quirk-iomulti.patch
Description: Text Data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 1/3] PCIe IO space multiplexing: Linux part, Isaku Yamahata <=