WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 07/10] Xen/x86/PCI: Add support for the Xen PCI subsy

To: Ingo Molnar <mingo@xxxxxxx>
Subject: [Xen-devel] [PATCH 07/10] Xen/x86/PCI: Add support for the Xen PCI subsystem
From: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Date: Wed, 27 May 2009 00:24:39 -0700
Cc: Chris Wright <chrisw@xxxxxxxxxx>, Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>, Ian Campbell <ian.campbell@xxxxxxxxxx>, Matthew Wilcox <matthew@xxxxxx>, Ky Srinivasan <ksrinivasan@xxxxxxxxxx>, kurt.hackel@xxxxxxxxxx, the arch/x86 maintainers <x86@xxxxxxxxxx>, Linux Kernel Mailing List <linux-kernel@xxxxxxxxxxxxxxx>, Alex Nixon <alex.nixon@xxxxxxxxxx>, Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>, Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx>, Jens Axboe <jens.axboe@xxxxxxxxxx>, Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>, Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>, Greg KH <gregkh@xxxxxxx>
Delivery-date: Wed, 27 May 2009 00:42:30 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1243409082-21349-1-git-send-email-jeremy@xxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1243409082-21349-1-git-send-email-jeremy@xxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
From: Alex Nixon <alex.nixon@xxxxxxxxxx>

On boot, the system will search to see if a Xen iommu/pci subsystem is
available.  If the kernel detects it's running in a domain rather than
on bare hardware, this subsystem will be used.  Otherwise, it falls
back to using hardware as usual.

The frontend stub lives in arch/x86/pci-xen.c, alongside other
sub-arch PCI init code (e.g. olpc.c)

(All subsequent fixes, API changes and swiotlb operations folded in.)

[ Impact: add core of Xen PCI support ]

Signed-off-by: Alex Nixon <alex.nixon@xxxxxxxxxx>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Reviewed-by: "H. Peter Anvin" <hpa@xxxxxxxxx>
Reviewed-by: Matthew Wilcox <willy@xxxxxxxxxxxxxxx>
---
 arch/x86/Kconfig                 |    4 +
 arch/x86/include/asm/io.h        |    2 +
 arch/x86/include/asm/pci_x86.h   |    1 +
 arch/x86/include/asm/xen/iommu.h |   12 ++
 arch/x86/kernel/pci-dma.c        |    3 +
 arch/x86/pci/Makefile            |    1 +
 arch/x86/pci/init.c              |    6 +
 arch/x86/pci/xen.c               |   51 +++++++
 drivers/pci/Makefile             |    2 +
 drivers/pci/xen-iommu.c          |  271 ++++++++++++++++++++++++++++++++++++++
 10 files changed, 353 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/include/asm/xen/iommu.h
 create mode 100644 arch/x86/pci/xen.c
 create mode 100644 drivers/pci/xen-iommu.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index df9e885..15cc23a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1829,6 +1829,10 @@ config PCI_OLPC
        def_bool y
        depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
 
+config PCI_XEN
+       def_bool y
+       depends on XEN_PCI_PASSTHROUGH || XEN_DOM0_PCI
+
 config PCI_DOMAINS
        def_bool y
        depends on PCI
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7373932..57c7b26 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -7,6 +7,8 @@
 #include <asm-generic/int-ll64.h>
 #include <asm/page.h>
 
+extern int isapnp_disable;
+
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
 { type ret; asm volatile("mov" size " %1,%0":reg (ret) \
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 5401ca2..34f03a4 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -107,6 +107,7 @@ extern int pci_direct_probe(void);
 extern void pci_direct_init(int type);
 extern void pci_pcbios_init(void);
 extern int pci_olpc_init(void);
+extern int pci_xen_init(void);
 extern void __init dmi_check_pciprobe(void);
 extern void __init dmi_check_skip_isa_align(void);
 
diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h
new file mode 100644
index 0000000..75df312
--- /dev/null
+++ b/arch/x86/include/asm/xen/iommu.h
@@ -0,0 +1,12 @@
+#ifndef ASM_X86__XEN_IOMMU_H
+
+#ifdef CONFIG_PCI_XEN
+extern void xen_iommu_init(void);
+#else
+static inline void xen_iommu_init(void)
+{
+}
+#endif
+
+#endif
+
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 745579b..e486c40 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -10,6 +10,7 @@
 #include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/amd_iommu.h>
+#include <asm/xen/iommu.h>
 
 static int forbid_dac __read_mostly;
 
@@ -275,6 +276,8 @@ static int __init pci_iommu_init(void)
        dma_debug_add_bus(&pci_bus_type);
 #endif
 
+       xen_iommu_init();
+
        calgary_iommu_init();
 
        intel_iommu_init();
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index d49202e..64182c5 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS)          += pcbios.o
 obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig_$(BITS).o direct.o mmconfig-shared.o
 obj-$(CONFIG_PCI_DIRECT)       += direct.o
 obj-$(CONFIG_PCI_OLPC)         += olpc.o
+obj-$(CONFIG_PCI_XEN)          += xen.o
 
 obj-y                          += fixup.o
 obj-$(CONFIG_ACPI)             += acpi.o
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 25a1f8e..4e2f90a 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -15,10 +15,16 @@ static __init int pci_arch_init(void)
        if (!(pci_probe & PCI_PROBE_NOEARLY))
                pci_mmcfg_early_init();
 
+#ifdef CONFIG_PCI_XEN
+       if (!pci_xen_init())
+               return 0;
+#endif
+
 #ifdef CONFIG_PCI_OLPC
        if (!pci_olpc_init())
                return 0;       /* skip additional checks if it's an XO */
 #endif
+
 #ifdef CONFIG_PCI_BIOS
        pci_pcbios_init();
 #endif
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
new file mode 100644
index 0000000..1b922aa
--- /dev/null
+++ b/arch/x86/pci/xen.c
@@ -0,0 +1,51 @@
+/*
+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
+ *                        x86 PCI core to support the Xen PCI Frontend
+ *
+ *   Author: Ryan Wilson <hap9@xxxxxxxxxxxxxx>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+
+#include <asm/io.h>
+#include <asm/pci_x86.h>
+
+#include <asm/xen/hypervisor.h>
+
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
+{
+       return 0;
+}
+
+int __init pci_xen_init(void)
+{
+       if (!xen_pv_domain() || xen_initial_domain())
+               return -ENODEV;
+
+       printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
+
+       pcibios_set_cache_line_size();
+
+       pcibios_enable_irq = xen_pcifront_enable_irq;
+       pcibios_disable_irq = NULL;
+
+#ifdef CONFIG_ACPI
+       /* Keep ACPI out of the picture */
+       acpi_noirq = 1;
+#endif
+
+#ifdef CONFIG_ISAPNP
+       /* Stop isapnp from probing */
+       isapnp_disable = 1;
+#endif
+
+       /* Ensure a device still gets scanned even if it's fn number
+        * is non-zero.
+        */
+       pci_scan_all_fns = 1;
+
+       return 0;
+}
+
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index ba6af16..8db0cb5 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -27,6 +27,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
 # Build Intel IOMMU support
 obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 
+# Build Xen IOMMU support
+obj-$(CONFIG_PCI_XEN) += xen-iommu.o
 obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
 
 obj-$(CONFIG_PCI_IOV) += iov.o
diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c
new file mode 100644
index 0000000..ac6bcdb
--- /dev/null
+++ b/drivers/pci/xen-iommu.c
@@ -0,0 +1,271 @@
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/bug.h>
+
+#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+
+#include <asm/iommu.h>
+#include <asm/swiotlb.h>
+#include <asm/tlbflush.h>
+
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
+
+/* Print address range with message */
+#define PAR(msg, addr, size)                                   \
+do {                                                   \
+       printk(msg "[%#llx - %#llx]\n",                 \
+       (unsigned long long)addr,                       \
+       (unsigned long long)addr + size);               \
+} while (0)
+
+static inline int address_needs_mapping(struct device *hwdev,
+                                               dma_addr_t addr)
+{
+       dma_addr_t mask = DMA_BIT_MASK(32);
+       int ret;
+
+       /* If the device has a mask, use it, otherwise default to 32 bits */
+       if (hwdev)
+               mask = *hwdev->dma_mask;
+
+       ret = (addr & ~mask) != 0;
+
+       if (ret) {
+               printk(KERN_ERR "dma address needs mapping\n");
+               printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr);
+       }
+       return ret;
+}
+
+static int check_pages_physically_contiguous(unsigned long pfn,
+                                            unsigned int offset,
+                                            size_t length)
+{
+       unsigned long next_mfn;
+       int i;
+       int nr_pages;
+
+       next_mfn = pfn_to_mfn(pfn);
+       nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+       for (i = 1; i < nr_pages; i++) {
+               if (pfn_to_mfn(++pfn) != ++next_mfn)
+                       return 0;
+       }
+       return 1;
+}
+
+static int range_straddles_page_boundary(phys_addr_t p, size_t size)
+{
+       unsigned long pfn = PFN_DOWN(p);
+       unsigned int offset = p & ~PAGE_MASK;
+
+       if (offset + size <= PAGE_SIZE)
+               return 0;
+       if (check_pages_physically_contiguous(pfn, offset, size))
+               return 0;
+       return 1;
+}
+
+static inline void xen_dma_unmap_page(struct page *page)
+{
+       /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here
+        * to deal with foreign pages.  We'll need similar logic here at
+        * some point.
+        */
+}
+
+/* Gets dma address of a page */
+static inline dma_addr_t xen_dma_map_page(struct page *page)
+{
+       /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal
+        * with foreign pages.  We'll need similar logic here at some
+        * point.
+        */
+       return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT;
+}
+
+static int xen_map_sg(struct device *hwdev, struct scatterlist *sg,
+                     int nents,
+                     enum dma_data_direction direction,
+                     struct dma_attrs *attrs)
+{
+       struct scatterlist *s;
+       struct page *page;
+       int i, rc;
+
+       BUG_ON(direction == DMA_NONE);
+       WARN_ON(nents == 0 || sg[0].length == 0);
+
+       for_each_sg(sg, s, nents, i) {
+               BUG_ON(!sg_page(s));
+               page = sg_page(s);
+               s->dma_address = xen_dma_map_page(page) + s->offset;
+               s->dma_length = s->length;
+               IOMMU_BUG_ON(range_straddles_page_boundary(
+                               page_to_phys(page), s->length));
+       }
+
+       rc = nents;
+
+       flush_write_buffers();
+       return rc;
+}
+
+static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+                        int nents,
+                        enum dma_data_direction direction,
+                        struct dma_attrs *attrs)
+{
+       struct scatterlist *s;
+       struct page *page;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address)));
+               xen_dma_unmap_page(page);
+       }
+}
+
+static void *xen_alloc_coherent(struct device *dev, size_t size,
+                               dma_addr_t *dma_handle, gfp_t gfp)
+{
+       void *ret;
+       unsigned int order = get_order(size);
+       unsigned long vstart;
+       u64 mask;
+
+       /* ignore region specifiers */
+       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+       if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+               return ret;
+
+       if (dev == NULL || (dev->coherent_dma_mask < DMA_BIT_MASK(32)))
+               gfp |= GFP_DMA;
+
+       vstart = __get_free_pages(gfp, order);
+       ret = (void *)vstart;
+
+       if (dev != NULL && dev->coherent_dma_mask)
+               mask = dev->coherent_dma_mask;
+       else
+               mask = DMA_BIT_MASK(32);
+
+       if (ret != NULL) {
+               if (xen_create_contiguous_region(vstart, order,
+                                                fls64(mask)) != 0) {
+                       free_pages(vstart, order);
+                       return NULL;
+               }
+               memset(ret, 0, size);
+               *dma_handle = virt_to_machine(ret).maddr;
+       }
+       return ret;
+}
+
+static void xen_free_coherent(struct device *dev, size_t size,
+                             void *vaddr, dma_addr_t dma_addr)
+{
+       int order = get_order(size);
+
+       if (dma_release_from_coherent(dev, order, vaddr))
+               return;
+
+       xen_destroy_contiguous_region((unsigned long)vaddr, order);
+       free_pages((unsigned long)vaddr, order);
+}
+
+static dma_addr_t xen_map_page(struct device *dev, struct page *page,
+                              unsigned long offset, size_t size,
+                              enum dma_data_direction direction,
+                              struct dma_attrs *attrs)
+{
+       dma_addr_t dma;
+
+       BUG_ON(direction == DMA_NONE);
+
+       WARN_ON(size == 0);
+
+       dma = xen_dma_map_page(page) + offset;
+
+       IOMMU_BUG_ON(address_needs_mapping(dev, dma));
+       flush_write_buffers();
+       return dma;
+}
+
+static void xen_unmap_page(struct device *dev, dma_addr_t dma_addr,
+                          size_t size,
+                          enum dma_data_direction direction,
+                          struct dma_attrs *attrs)
+{
+       BUG_ON(direction == DMA_NONE);
+       xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr))));
+}
+
+static struct dma_map_ops xen_dma_ops = {
+       .dma_supported = NULL,
+
+       .alloc_coherent = xen_alloc_coherent,
+       .free_coherent = xen_free_coherent,
+
+       .map_page = xen_map_page,
+       .unmap_page = xen_unmap_page,
+
+       .map_sg = xen_map_sg,
+       .unmap_sg = xen_unmap_sg,
+
+       .mapping_error = NULL,
+
+       .is_phys = 0,
+};
+
+static struct dma_map_ops xen_swiotlb_dma_ops = {
+       .dma_supported = swiotlb_dma_supported,
+
+       .alloc_coherent = xen_alloc_coherent,
+       .free_coherent = xen_free_coherent,
+
+       .map_page = swiotlb_map_page,
+       .unmap_page = swiotlb_unmap_page,
+
+       .map_sg = swiotlb_map_sg_attrs,
+       .unmap_sg = swiotlb_unmap_sg_attrs,
+
+       .mapping_error = swiotlb_dma_mapping_error,
+
+       .is_phys = 0,
+};
+
+void __init xen_iommu_init(void)
+{
+       if (!xen_pv_domain())
+               return;
+
+       printk(KERN_INFO "Xen: Initializing Xen DMA ops\n");
+
+       force_iommu = 0;
+       dma_ops = &xen_dma_ops;
+
+       if (swiotlb) {
+               printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n");
+               dma_ops = &xen_swiotlb_dma_ops;
+       }
+}
+
-- 
1.6.0.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel