WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] Re: [PATCH 07/11] Xen/x86/PCI: Add support for the Xen PCI s

To: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Subject: [Xen-devel] Re: [PATCH 07/11] Xen/x86/PCI: Add support for the Xen PCI subsytem
From: Joerg Roedel <joro@xxxxxxxxxx>
Date: Mon, 11 May 2009 11:40:28 +0200
Cc: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>, Ian Campbell <ian.campbell@xxxxxxxxxx>, the arch/x86 maintainers <x86@xxxxxxxxxx>, Linux Kernel Mailing List <linux-kernel@xxxxxxxxxxxxxxx>, Alex Nixon <alex.nixon@xxxxxxxxxx>, Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>, Ingo Molnar <mingo@xxxxxxx>
Delivery-date: Mon, 11 May 2009 06:53:53 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1241732737-7669-8-git-send-email-jeremy@xxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1241732737-7669-1-git-send-email-jeremy@xxxxxxxx> <1241732737-7669-8-git-send-email-jeremy@xxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.18 (2008-05-17)
On Thu, May 07, 2009 at 02:45:33PM -0700, Jeremy Fitzhardinge wrote:
> From: Alex Nixon <alex.nixon@xxxxxxxxxx>
> 
> Impact: add core of Xen PCI support
> 
> On boot, the system will search to see if a Xen iommu/pci subsystem is
> available.  If the kernel detects it's running in a domain rather than
> on bare hardware, this subsystem will be used.  Otherwise, it falls
> back to using hardware as usual.
> 
> The frontend stub lives in arch/x86/pci-xen.c, alongside other
> sub-arch PCI init code (e.g. olpc.c)
> 
> (All subsequent fixes, API changes and swiotlb operations folded in.)
> 
> Signed-off-by: Alex Nixon <alex.nixon@xxxxxxxxxx>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> Reviewed-by: "H. Peter Anvin" <hpa@xxxxxxxxx>
> ---
>  arch/x86/Kconfig                 |    4 +
>  arch/x86/include/asm/pci_x86.h   |    1 +
>  arch/x86/include/asm/xen/iommu.h |   12 ++
>  arch/x86/kernel/pci-dma.c        |    3 +
>  arch/x86/pci/Makefile            |    1 +
>  arch/x86/pci/init.c              |    6 +
>  arch/x86/pci/xen.c               |   52 +++++++
>  drivers/pci/Makefile             |    2 +
>  drivers/pci/xen-iommu.c          |  302 
> ++++++++++++++++++++++++++++++++++++++
>  9 files changed, 383 insertions(+), 0 deletions(-)
>  create mode 100644 arch/x86/include/asm/xen/iommu.h
>  create mode 100644 arch/x86/pci/xen.c
>  create mode 100644 drivers/pci/xen-iommu.c
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 4b34082..4a62659 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1830,6 +1830,10 @@ config PCI_OLPC
>       def_bool y
>       depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
>  
> +config PCI_XEN
> +     def_bool y
> +     depends on XEN_PCI_PASSTHROUGH || XEN_DOM0_PCI
> +
>  config PCI_DOMAINS
>       def_bool y
>       depends on PCI
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index 5401ca2..34f03a4 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -107,6 +107,7 @@ extern int pci_direct_probe(void);
>  extern void pci_direct_init(int type);
>  extern void pci_pcbios_init(void);
>  extern int pci_olpc_init(void);
> +extern int pci_xen_init(void);
>  extern void __init dmi_check_pciprobe(void);
>  extern void __init dmi_check_skip_isa_align(void);
>  
> diff --git a/arch/x86/include/asm/xen/iommu.h 
> b/arch/x86/include/asm/xen/iommu.h
> new file mode 100644
> index 0000000..75df312
> --- /dev/null
> +++ b/arch/x86/include/asm/xen/iommu.h
> @@ -0,0 +1,12 @@
> +#ifndef ASM_X86__XEN_IOMMU_H
> +
> +#ifdef CONFIG_PCI_XEN
> +extern void xen_iommu_init(void);
> +#else
> +static inline void xen_iommu_init(void)
> +{
> +}
> +#endif
> +
> +#endif
> +
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index 745579b..2fffc22 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -10,6 +10,7 @@
>  #include <asm/gart.h>
>  #include <asm/calgary.h>
>  #include <asm/amd_iommu.h>
> +#include <asm/xen/iommu.h>
>  
>  static int forbid_dac __read_mostly;
>  
> @@ -275,6 +276,8 @@ static int __init pci_iommu_init(void)
>       dma_debug_add_bus(&pci_bus_type);
>  #endif
>  
> +     xen_iommu_init();
> +     
>       calgary_iommu_init();
>  
>       intel_iommu_init();
> diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
> index d49202e..64182c5 100644
> --- a/arch/x86/pci/Makefile
> +++ b/arch/x86/pci/Makefile
> @@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS)                += pcbios.o
>  obj-$(CONFIG_PCI_MMCONFIG)   += mmconfig_$(BITS).o direct.o mmconfig-shared.o
>  obj-$(CONFIG_PCI_DIRECT)     += direct.o
>  obj-$(CONFIG_PCI_OLPC)               += olpc.o
> +obj-$(CONFIG_PCI_XEN)                += xen.o
>  
>  obj-y                                += fixup.o
>  obj-$(CONFIG_ACPI)           += acpi.o
> diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
> index 25a1f8e..4e2f90a 100644
> --- a/arch/x86/pci/init.c
> +++ b/arch/x86/pci/init.c
> @@ -15,10 +15,16 @@ static __init int pci_arch_init(void)
>       if (!(pci_probe & PCI_PROBE_NOEARLY))
>               pci_mmcfg_early_init();
>  
> +#ifdef CONFIG_PCI_XEN
> +     if (!pci_xen_init())
> +             return 0;
> +#endif
> +
>  #ifdef CONFIG_PCI_OLPC
>       if (!pci_olpc_init())
>               return 0;       /* skip additional checks if it's an XO */
>  #endif
> +
>  #ifdef CONFIG_PCI_BIOS
>       pci_pcbios_init();
>  #endif
> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
> new file mode 100644
> index 0000000..76f803f
> --- /dev/null
> +++ b/arch/x86/pci/xen.c
> @@ -0,0 +1,52 @@
> +/*
> + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
> + *                      x86 PCI core to support the Xen PCI Frontend
> + *
> + *   Author: Ryan Wilson <hap9@xxxxxxxxxxxxxx>
> + */
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/pci.h>
> +#include <linux/acpi.h>
> +
> +#include <asm/pci_x86.h>
> +
> +#include <asm/xen/hypervisor.h>
> +
> +static int xen_pcifront_enable_irq(struct pci_dev *dev)
> +{
> +     return 0;
> +}
> +
> +extern int isapnp_disable;
> +
> +int __init pci_xen_init(void)
> +{
> +     if (!xen_pv_domain() || xen_initial_domain())
> +             return -ENODEV;
> +
> +     printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
> +
> +     pcibios_set_cache_line_size();
> +
> +     pcibios_enable_irq = xen_pcifront_enable_irq;
> +     pcibios_disable_irq = NULL;
> +
> +#ifdef CONFIG_ACPI
> +     /* Keep ACPI out of the picture */
> +     acpi_noirq = 1;
> +#endif
> +
> +#ifdef CONFIG_ISAPNP
> +     /* Stop isapnp from probing */
> +     isapnp_disable = 1;
> +#endif
> +
> +     /* Ensure a device still gets scanned even if it's fn number
> +      * is non-zero.
> +      */
> +     pci_scan_all_fns = 1;
> +
> +     return 0;
> +}
> +
> diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
> index ba6af16..8db0cb5 100644
> --- a/drivers/pci/Makefile
> +++ b/drivers/pci/Makefile
> @@ -27,6 +27,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
>  # Build Intel IOMMU support
>  obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
>  
> +# Build Xen IOMMU support
> +obj-$(CONFIG_PCI_XEN) += xen-iommu.o
>  obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
>  
>  obj-$(CONFIG_PCI_IOV) += iov.o
> diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c
> new file mode 100644
> index 0000000..e7a22f1
> --- /dev/null
> +++ b/drivers/pci/xen-iommu.c
> @@ -0,0 +1,302 @@
> +#include <linux/types.h>
> +#include <linux/mm.h>
> +#include <linux/string.h>
> +#include <linux/pci.h>
> +#include <linux/module.h>
> +#include <linux/version.h>
> +#include <linux/scatterlist.h>
> +#include <linux/io.h>
> +#include <linux/bug.h>
> +
> +#include <xen/interface/xen.h>
> +#include <xen/grant_table.h>
> +#include <xen/page.h>
> +#include <xen/xen-ops.h>
> +
> +#include <asm/iommu.h>
> +#include <asm/swiotlb.h>
> +#include <asm/tlbflush.h>
> +
> +#define IOMMU_BUG_ON(test)                           \
> +do {                                                 \
> +     if (unlikely(test)) {                           \
> +             printk(KERN_ALERT "Fatal DMA error! "   \
> +                    "Please use 'swiotlb=force'\n"); \
> +             BUG();                                  \
> +     }                                               \
> +} while (0)
> +
> +/* Print address range with message */
> +#define PAR(msg, addr, size)                                 \
> +do {                                                 \
> +     printk(msg "[%#llx - %#llx]\n",                 \
> +     (unsigned long long)addr,                       \
> +     (unsigned long long)addr + size);               \
> +} while (0)
> +
> +struct dma_coherent_mem {
> +     void            *virt_base;
> +     u32             device_base;
> +     int             size;
> +     int             flags;
> +     unsigned long   *bitmap;
> +};
> +
> +static inline int address_needs_mapping(struct device *hwdev,
> +                                             dma_addr_t addr)
> +{
> +     dma_addr_t mask = 0xffffffff;

You can use DMA_32BIT_MASK here.

> +     int ret;
> +
> +     /* If the device has a mask, use it, otherwise default to 32 bits */
> +     if (hwdev && hwdev->dma_mask)
> +             mask = *hwdev->dma_mask;

I think the check for a valid hwdev->dma_mask is not necessary. Other
IOMMU drivers also don't check for this.

> +
> +     ret = (addr & ~mask) != 0;
> +
> +     if (ret) {
> +             printk(KERN_ERR "dma address needs mapping\n");
> +             printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr);
> +     }
> +     return ret;
> +}
> +
> +static int check_pages_physically_contiguous(unsigned long pfn,
> +                                          unsigned int offset,
> +                                          size_t length)
> +{
> +     unsigned long next_mfn;
> +     int i;
> +     int nr_pages;
> +
> +     next_mfn = pfn_to_mfn(pfn);
> +     nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
> +
> +     for (i = 1; i < nr_pages; i++) {
> +             if (pfn_to_mfn(++pfn) != ++next_mfn)
> +                     return 0;
> +     }
> +     return 1;
> +}
> +
> +static int range_straddles_page_boundary(phys_addr_t p, size_t size)
> +{
> +     unsigned long pfn = PFN_DOWN(p);
> +     unsigned int offset = p & ~PAGE_MASK;
> +
> +     if (offset + size <= PAGE_SIZE)
> +             return 0;

You can use iommu_num_pages here from lib/iommu_helpers.c

> +     if (check_pages_physically_contiguous(pfn, offset, size))
> +             return 0;
> +     return 1;
> +}
> +
> +static inline void xen_dma_unmap_page(struct page *page)
> +{
> +     /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here
> +      * to deal with foreign pages.  We'll need similar logic here at
> +      * some point.
> +      */
> +}
> +
> +/* Gets dma address of a page */
> +static inline dma_addr_t xen_dma_map_page(struct page *page)
> +{
> +     /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal
> +      * with foreign pages.  We'll need similar logic here at some
> +      * point.
> +      */
> +     return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT;
> +}
> +
> +static int xen_map_sg(struct device *hwdev, struct scatterlist *sg,
> +                     int nents, int direction)
> +{
> +     struct scatterlist *s;
> +     struct page *page;
> +     int i, rc;
> +
> +     BUG_ON(direction == DMA_NONE);
> +     WARN_ON(nents == 0 || sg[0].length == 0);
> +
> +     for_each_sg(sg, s, nents, i) {
> +             BUG_ON(!sg_page(s));
> +             page = sg_page(s);
> +             s->dma_address = xen_dma_map_page(page) + s->offset;
> +             s->dma_length = s->length;
> +             IOMMU_BUG_ON(range_straddles_page_boundary(
> +                             page_to_phys(page), s->length));

I have a question on this. How do you make sure that the memory to map
does not cross page boundarys? I have a stats counter for x-page
requests in amd iommu code and around 10% of the requests are actually
x-page for me.

> +     }
> +
> +     rc = nents;
> +
> +     flush_write_buffers();
> +     return rc;
> +}
> +
> +static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg,
> +                      int nents, int direction)
> +{
> +     struct scatterlist *s;
> +     struct page *page;
> +     int i;
> +
> +     for_each_sg(sg, s, nents, i) {
> +             page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address)));
> +             xen_dma_unmap_page(page);
> +     }
> +}
> +
> +static void *xen_alloc_coherent(struct device *dev, size_t size,
> +                             dma_addr_t *dma_handle, gfp_t gfp)
> +{
> +     void *ret;
> +     struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
> +     unsigned int order = get_order(size);
> +     unsigned long vstart;
> +     u64 mask;
> +
> +     /* ignore region specifiers */
> +     gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
> +
> +     if (mem) {
> +             int page = bitmap_find_free_region(mem->bitmap, mem->size,
> +                                                  order);

Can you use iommu_area_alloc here?

> +             if (page >= 0) {
> +                     *dma_handle = mem->device_base + (page << PAGE_SHIFT);
> +                     ret = mem->virt_base + (page << PAGE_SHIFT);
> +                     memset(ret, 0, size);
> +                     return ret;
> +             }
> +             if (mem->flags & DMA_MEMORY_EXCLUSIVE)
> +                     return NULL;
> +     }
> +
> +     if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))

DMA_32BIT_MASK

> +             gfp |= GFP_DMA;
> +
> +     vstart = __get_free_pages(gfp, order);
> +     ret = (void *)vstart;
> +
> +     if (dev != NULL && dev->coherent_dma_mask)
> +             mask = dev->coherent_dma_mask;
> +     else
> +             mask = 0xffffffff;

DMA_32BIT_MASK

> +
> +     if (ret != NULL) {
> +             if (xen_create_contiguous_region(vstart, order,
> +                                              fls64(mask)) != 0) {
> +                     free_pages(vstart, order);
> +                     return NULL;
> +             }
> +             memset(ret, 0, size);
> +             *dma_handle = virt_to_machine(ret).maddr;
> +     }
> +     return ret;
> +}
> +
> +static void xen_free_coherent(struct device *dev, size_t size,
> +                      void *vaddr, dma_addr_t dma_addr)
> +{
> +     struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
> +     int order = get_order(size);
> +
> +     if (mem && vaddr >= mem->virt_base &&
> +         vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
> +             int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
> +             bitmap_release_region(mem->bitmap, page, order);

iommu_area_free

> +     } else {
> +             xen_destroy_contiguous_region((unsigned long)vaddr, order);
> +             free_pages((unsigned long)vaddr, order);
> +     }
> +}
> +
> +static dma_addr_t xen_swiotlb_map_single(struct device *dev, phys_addr_t 
> paddr,
> +                                      size_t size, int direction)
> +{
> +     dma_addr_t dma;
> +     BUG_ON(direction == DMA_NONE);
> +
> +     WARN_ON(size == 0);
> +     dma = swiotlb_map_single(dev, phys_to_virt(paddr), size, direction);
> +
> +     flush_write_buffers();
> +     return dma;
> +}
> +
> +static dma_addr_t xen_map_single(struct device *dev, phys_addr_t paddr,
> +                                     size_t size, int direction)
> +{
> +     struct page *page;
> +     dma_addr_t dma;
> +
> +     BUG_ON(direction == DMA_NONE);
> +
> +     WARN_ON(size == 0);
> +     page = pfn_to_page(PFN_DOWN(paddr));
> +
> +     dma = xen_dma_map_page(page) + offset_in_page(paddr);
> +
> +     IOMMU_BUG_ON(address_needs_mapping(dev, dma));
> +     IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
> +     flush_write_buffers();
> +     return dma;
> +}
> +
> +static void xen_unmap_single(struct device *dev, dma_addr_t dma_addr,
> +                             size_t size, int direction)
> +{
> +     BUG_ON(direction == DMA_NONE);
> +     xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr))));
> +}
> +
> +static struct dma_mapping_ops xen_dma_ops = {
> +     .dma_supported = NULL,
> +
> +     .alloc_coherent = xen_alloc_coherent,
> +     .free_coherent = xen_free_coherent,
> +
> +     .map_single = xen_map_single,
> +     .unmap_single = xen_unmap_single,
> +
> +     .map_sg = xen_map_sg,
> +     .unmap_sg = xen_unmap_sg,
> +
> +     .mapping_error = NULL,
> +
> +     .is_phys = 0,
> +};
> +
> +static struct dma_mapping_ops xen_swiotlb_dma_ops = {
> +     .dma_supported = swiotlb_dma_supported,
> +
> +     .alloc_coherent = xen_alloc_coherent,
> +     .free_coherent = xen_free_coherent,
> +
> +     .map_single = xen_swiotlb_map_single,   /* swiotlb_map_single has a 
> different prototype */
> +     .unmap_single = swiotlb_unmap_single,
> +
> +     .map_sg = swiotlb_map_sg,
> +     .unmap_sg = swiotlb_unmap_sg,
> +
> +     .mapping_error = swiotlb_dma_mapping_error,
> +
> +     .is_phys = 0,
> +};
> +
> +void __init xen_iommu_init(void)
> +{
> +     if (!xen_pv_domain())
> +             return;
> +
> +     printk(KERN_INFO "Xen: Initializing Xen DMA ops\n");
> +
> +     force_iommu = 0;
> +     dma_ops = &xen_dma_ops;
> +
> +     if (swiotlb) {
> +             printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n");
> +             dma_ops = &xen_swiotlb_dma_ops;
> +     }
> +}
> +
> -- 
> 1.6.0.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>