This patch adds the function that reassign page-aligned memory
resources, to dom0 linux. The function is useful when we assign I/O
device to HVM domain using pci passthrough.
When we assign a device to HVM domain using pci passthrough,
the device needs to be assigned page-aligned memory resources. If the
memory resource is not page-aligned, following error occurs.
Error: pci: 0000:00:1d.7: non-page-aligned MMIO BAR found.
On many system, BIOS assigns memory resources to the device and
enables it. So my patch disables the device, and releases resources.
Then it assigns page-aligned memory resource to the device.
Note: we don't need to re-enable device, because guest software will
enable it.
To reassign resources, please add boot parameters of dom0 linux as follows.
reassign_resources reassigndev=00:1d.7,01:00.0
reassign_resources
Enables reassigning resources.
reassigndev= Specifies devices include I/O device and PCI-PCI
bridge to reassign resources. PCI-PCI bridge
can be specified, if resource windows need to
be expanded.
You can easily improve the way of specifying device to reassign,
changing the code of reassigndev.c.
There is a similar function enabled by pci-mem-align boot
parameter. Currently it is kept. But if many people agree with me, I'd
like to remove it from dom0 linux, because there are two problems.
- pci-mem-align reassigns all devices' memory resources if they are
not page-aligned. This is not safe, because some devices are
used by firmware.
- pci-mem-align can't expand resource window of PCI-PCI bridge.
Thanks,
--
Yuji Shimada
Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
diff -r b54652ee29ef drivers/pci/Makefile
--- a/drivers/pci/Makefile Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/Makefile Wed Oct 08 12:12:27 2008 +0900
@@ -3,7 +3,8 @@
#
obj-y += access.o bus.o probe.o remove.o pci.o quirks.o \
- pci-driver.o search.o pci-sysfs.o rom.o setup-res.o
+ pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
+ reassigndev.o
obj-$(CONFIG_PROC_FS) += proc.o
# Build PCI Express stuff if needed
diff -r b54652ee29ef drivers/pci/pci.h
--- a/drivers/pci/pci.h Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/pci.h Wed Oct 08 12:12:27 2008 +0900
@@ -99,3 +99,8 @@
return NULL;
}
+#define ROUND_UP_TO_PAGESIZE(size) ((size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
+
+extern int reassign_resources;
+extern int is_reassigndev(struct pci_dev *dev);
+extern void pci_update_bridge(struct pci_dev *dev, int resno);
diff -r b54652ee29ef drivers/pci/quirks.c
--- a/drivers/pci/quirks.c Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/quirks.c Wed Oct 08 12:12:27 2008 +0900
@@ -33,6 +33,19 @@
}
__setup("pci-mem-align", set_pci_mem_align);
+
+int reassign_resources = 0;
+
+static int __init set_reassign_resources(char *str)
+{
+ /* resources reassign on */
+ reassign_resources = 1;
+ printk(KERN_DEBUG "PCI: resource reassign ON.\n");
+
+ return 1;
+}
+__setup("reassign_resources", set_reassign_resources);
+
/* This quirk function enables us to force all memory resources which are
* assigned to PCI devices, to be page-aligned.
*/
@@ -41,6 +54,42 @@
int i;
struct resource *r;
resource_size_t old_start;
+
+ if (reassign_resources) {
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+ /* PCI Host Bridge isn't a target device */
+ return;
+ }
+ if (is_reassigndev(dev)) {
+ printk(KERN_INFO
+ "PCI: Disable device and release resources"
+ " [%s].\n", pci_name(dev));
+ pci_disable_device(dev);
+
+ for (i=0; i < PCI_NUM_RESOURCES; i++) {
+ r = &dev->resource[i];
+ if ((r == NULL) ||
+ !(r->flags & IORESOURCE_MEM))
+ continue;
+
+ r->end = r->end - r->start;
+ r->start = 0;
+
+ if (i < PCI_BRIDGE_RESOURCES) {
+ pci_update_resource(dev, r, i);
+ } else if (i == 8 || i == 9) {
+ /* need to update(clear) the Base/Limit
+ * register also, because PCI bridge is
+ * disabled and the resource is
+ * released.
+ */
+ pci_update_bridge(dev, i);
+ }
+ }
+ }
+ return;
+ }
if (!pci_mem_align)
return;
diff -r b54652ee29ef drivers/pci/reassigndev.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/pci/reassigndev.c Wed Oct 08 12:12:27 2008 +0900
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2008, NEC Corporation.
+ * Taro Nichiden <t-nichiden at ab jp nec com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include "pci.h"
+
+
+#define REASSIGNDEV_PARAM_MAX (2048)
+#define TOKEN_MAX (12) /* "SSSS:BB:DD.F" length is 12 */
+
+static char param_reassigndev[REASSIGNDEV_PARAM_MAX] = {0};
+
+static int __init reassigndev_setup(char *str)
+{
+ strncpy(param_reassigndev, str, REASSIGNDEV_PARAM_MAX);
+ param_reassigndev[REASSIGNDEV_PARAM_MAX - 1] = '\0';
+ return 1;
+}
+__setup("reassigndev=", reassigndev_setup);
+
+int is_reassigndev(struct pci_dev *dev)
+{
+ char dev_str[TOKEN_MAX+1];
+ int seg, bus, slot, func;
+ int len;
+ char *p, *next_str;
+
+ p = param_reassigndev;
+ for (; p; p = next_str + 1) {
+ next_str = strpbrk(p, ",");
+ if (next_str) {
+ len = next_str - p;
+ } else {
+ len = strlen(p);
+ }
+ if (len > 0 && len <= TOKEN_MAX) {
+ strncpy(dev_str, p, len);
+ *(dev_str + len) = '\0';
+
+ if (sscanf(dev_str, "%x:%x:%x.%x",
+ &seg, &bus, &slot, &func) != 4) {
+ if (sscanf(dev_str, "%x:%x.%x",
+ &bus, &slot, &func) == 3) {
+ seg = 0;
+ } else {
+ /* failed to scan strings */
+ seg = -1;
+ bus = -1;
+ }
+ }
+ if (seg == pci_domain_nr(dev->bus) &&
+ bus == dev->bus->number &&
+ slot == PCI_SLOT(dev->devfn) &&
+ func == PCI_FUNC(dev->devfn)) {
+ /* It's a target device */
+ return 1;
+ }
+ }
+ if (!next_str)
+ break;
+ }
+
+ return 0;
+}
diff -r b54652ee29ef drivers/pci/setup-bus.c
--- a/drivers/pci/setup-bus.c Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/setup-bus.c Wed Oct 08 12:12:27 2008 +0900
@@ -26,6 +26,7 @@
#include <linux/cache.h>
#include <linux/slab.h>
+#include "pci.h"
#define DEBUG_CONFIG 1
#if DEBUG_CONFIG
@@ -344,7 +345,8 @@
list_for_each_entry(dev, &bus->devices, bus_list) {
int i;
-
+ int reassign = reassign_resources ? is_reassigndev(dev) : 0;
+
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
struct resource *r = &dev->resource[i];
unsigned long r_size;
@@ -352,6 +354,11 @@
if (r->parent || (r->flags & mask) != type)
continue;
r_size = r->end - r->start + 1;
+
+ if (reassign) {
+ r_size = ROUND_UP_TO_PAGESIZE(r_size);
+ }
+
/* For bridges size != alignment */
align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
order = __ffs(align) - 20;
diff -r b54652ee29ef drivers/pci/setup-res.c
--- a/drivers/pci/setup-res.c Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/setup-res.c Wed Oct 08 12:12:27 2008 +0900
@@ -117,19 +117,96 @@
}
EXPORT_SYMBOL_GPL(pci_claim_resource);
+void
+pci_update_bridge(struct pci_dev *dev, int resno)
+{
+ struct resource *res = &dev->resource[resno];
+ struct pci_bus_region region;
+ u32 l, dw, base_up32, limit_up32;
+
+ if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE ||
+ (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) {
+ return;
+ }
+
+ if (!res->flags)
+ return;
+
+ switch (resno) {
+ case 8 : /* MMIO Base/Limit */
+ pcibios_resource_to_bus(dev, ®ion, res);
+ if (res->flags & IORESOURCE_MEM &&
+ !(res->flags & IORESOURCE_PREFETCH)) {
+ l = (region.start >> 16) & 0xfff0;
+ l |= region.end & 0xfff00000;
+ } else {
+ l = 0x0000fff0;
+ }
+ pci_write_config_dword(dev, PCI_MEMORY_BASE, l);
+
+ break;
+
+ case 9 : /* Prefetchable MMIO Base/Limit */
+ /* Clear out the upper 32 bits of PREF limit.
+ * If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
+ * disables PREF range, which is ok.
+ */
+ pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+
+ /* Get PREF 32/64 bits Addressing mode */
+ pci_read_config_dword(dev, PCI_PREF_MEMORY_BASE, &dw);
+
+ pcibios_resource_to_bus(dev, ®ion, res);
+ if (res->flags & IORESOURCE_MEM &&
+ res->flags & IORESOURCE_PREFETCH) {
+ l = (region.start >> 16) & 0xfff0;
+ l |= region.end & 0xfff00000;
+
+ if (dw & PCI_PREF_RANGE_TYPE_64) {
+ base_up32 = (region.start >> 32) & 0xffffffff;
+ limit_up32 = (region.end >> 32) & 0xffffffff;
+ } else {
+ base_up32 = 0;
+ limit_up32 = 0;
+ }
+ } else {
+ l = 0x0000fff0;
+ base_up32 = 0xffffffff;
+ limit_up32 = 0;
+ }
+ pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, l);
+ /* Set up the upper 32 bits of PREF base/limit. */
+ pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, base_up32);
+ pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, limit_up32);
+ break;
+ default :
+ BUG();
+ break;
+ }
+}
+
int pci_assign_resource(struct pci_dev *dev, int resno)
{
struct pci_bus *bus = dev->bus;
struct resource *res = dev->resource + resno;
resource_size_t size, min, align;
int ret;
+ int reassigndev = reassign_resources ? is_reassigndev(dev) : 0;
size = res->end - res->start + 1;
min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
/* The bridge resources are special, as their
size != alignment. Sizing routines return
required alignment in the "start" field. */
- align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
+ if (resno < PCI_BRIDGE_RESOURCES) {
+ align = size;
+ if ((reassigndev) &&
+ (res->flags & IORESOURCE_MEM)) {
+ align = ROUND_UP_TO_PAGESIZE(align);
+ }
+ } else {
+ align = res->start;
+ }
/* First, try exact prefetching match.. */
ret = pci_bus_alloc_resource(bus, res, size, align, min,
@@ -154,6 +231,9 @@
resno, (unsigned long long)size,
(unsigned long long)res->start, pci_name(dev));
} else if (resno < PCI_BRIDGE_RESOURCES) {
+ printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
+ "%016llx - %016llx\n", resno, pci_name(dev),
+ (u64)res->start, (u64)res->end);
pci_update_resource(dev, res, resno);
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|