[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH 1/2] xen/unpopulated-alloc: Introduce helpers for DMA allocations




On 15.06.22 03:45, Stefano Stabellini wrote:

Hello Stefano


On Tue, 14 Jun 2022, Oleksandr wrote:
On 11.06.22 03:12, Stefano Stabellini wrote:
On Wed, 8 Jun 2022, Oleksandr wrote:
2. Drop the "page_list" entirely and use "dma_pool" for all (contiguous
and
non-contiguous) allocations. After all, all pages are initially contiguous
in
fill_list() as they are built from the resource. This changes behavior for
all
users of xen_alloc_unpopulated_pages()

Below the diff for unpopulated-alloc.c. The patch is also available at:

https://github.com/otyshchenko1/linux/commit/7be569f113a4acbdc4bcb9b20cb3995b3151387a


diff --git a/drivers/xen/unpopulated-alloc.c
b/drivers/xen/unpopulated-alloc.c
index a39f2d3..ab5c7bd 100644
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -1,5 +1,7 @@
   // SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-mapping.h>
   #include <linux/errno.h>
+#include <linux/genalloc.h>
   #include <linux/gfp.h>
   #include <linux/kernel.h>
   #include <linux/mm.h>
@@ -13,8 +15,8 @@
   #include <xen/xen.h>

   static DEFINE_MUTEX(list_lock);
-static struct page *page_list;
-static unsigned int list_count;
+
+static struct gen_pool *dma_pool;

   static struct resource *target_resource;

@@ -36,7 +38,7 @@ static int fill_list(unsigned int nr_pages)
          struct dev_pagemap *pgmap;
          struct resource *res, *tmp_res = NULL;
          void *vaddr;
-       unsigned int i, alloc_pages = round_up(nr_pages,
PAGES_PER_SECTION);
+       unsigned int alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
          struct range mhp_range;
          int ret;

@@ -106,6 +108,7 @@ static int fill_list(unsigned int nr_pages)
            * conflict with any devices.
            */
          if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               unsigned int i;
                  xen_pfn_t pfn = PFN_DOWN(res->start);

                  for (i = 0; i < alloc_pages; i++) {
@@ -125,16 +128,17 @@ static int fill_list(unsigned int nr_pages)
                  goto err_memremap;
          }

-       for (i = 0; i < alloc_pages; i++) {
-               struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i);
-
-               pg->zone_device_data = page_list;
-               page_list = pg;
-               list_count++;
+       ret = gen_pool_add_virt(dma_pool, (unsigned long)vaddr,
res->start,
+                       alloc_pages * PAGE_SIZE, NUMA_NO_NODE);
+       if (ret) {
+               pr_err("Cannot add memory range to the pool\n");
+               goto err_pool;
          }

          return 0;

+err_pool:
+       memunmap_pages(pgmap);
   err_memremap:
          kfree(pgmap);
   err_pgmap:
@@ -149,51 +153,49 @@ static int fill_list(unsigned int nr_pages)
          return ret;
   }

-/**
- * xen_alloc_unpopulated_pages - alloc unpopulated pages
- * @nr_pages: Number of pages
- * @pages: pages returned
- * @return 0 on success, error otherwise
- */
-int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page
**pages)
+static int alloc_unpopulated_pages(unsigned int nr_pages, struct page
**pages,
+               bool contiguous)
   {
          unsigned int i;
          int ret = 0;
+       void *vaddr;
+       bool filled = false;

          /*
           * Fallback to default behavior if we do not have any suitable
resource
           * to allocate required region from and as the result we won't be
able
to
           * construct pages.
           */
-       if (!target_resource)
+       if (!target_resource) {
+               if (contiguous)
+                       return -ENODEV;
+
                  return xen_alloc_ballooned_pages(nr_pages, pages);
+       }

          mutex_lock(&list_lock);
-       if (list_count < nr_pages) {
-               ret = fill_list(nr_pages - list_count);
+
+       while (!(vaddr = (void *)gen_pool_alloc(dma_pool, nr_pages *
PAGE_SIZE))) {
+               if (filled)
+                       ret = -ENOMEM;
+               else {
+                       ret = fill_list(nr_pages);
+                       filled = true;
+               }
                  if (ret)
                          goto out;
          }

          for (i = 0; i < nr_pages; i++) {
-               struct page *pg = page_list;
-
-               BUG_ON(!pg);
-               page_list = pg->zone_device_data;
-               list_count--;
-               pages[i] = pg;
+               pages[i] = virt_to_page(vaddr + PAGE_SIZE * i);

   #ifdef CONFIG_XEN_HAVE_PVMMU
                  if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       ret = xen_alloc_p2m_entry(page_to_pfn(pg));
+                       ret = xen_alloc_p2m_entry(page_to_pfn(pages[i]));
                          if (ret < 0) {
-                               unsigned int j;
-
-                               for (j = 0; j <= i; j++) {
- pages[j]->zone_device_data = page_list;
-                                       page_list = pages[j];
-                                       list_count++;
-                               }
+                               /* XXX Do we need to zeroed pages[i]? */
+                               gen_pool_free(dma_pool, (unsigned
long)vaddr,
+                                               nr_pages * PAGE_SIZE);
                                  goto out;
                          }
                  }
@@ -204,32 +206,89 @@ int xen_alloc_unpopulated_pages(unsigned int
nr_pages,
struct page **pages)
          mutex_unlock(&list_lock);
          return ret;
   }
-EXPORT_SYMBOL(xen_alloc_unpopulated_pages);

-/**
- * xen_free_unpopulated_pages - return unpopulated pages
- * @nr_pages: Number of pages
- * @pages: pages to return
- */
-void xen_free_unpopulated_pages(unsigned int nr_pages, struct page
**pages)
+static void free_unpopulated_pages(unsigned int nr_pages, struct page
**pages,
+               bool contiguous)
   {
-       unsigned int i;
-
          if (!target_resource) {
+               if (contiguous)
+                       return;
+
                  xen_free_ballooned_pages(nr_pages, pages);
                  return;
          }

          mutex_lock(&list_lock);
-       for (i = 0; i < nr_pages; i++) {
-               pages[i]->zone_device_data = page_list;
-               page_list = pages[i];
-               list_count++;
+
+       /* XXX Do we need to check the range (gen_pool_has_addr)? */
+       if (contiguous)
+               gen_pool_free(dma_pool, (unsigned
long)page_to_virt(pages[0]),
+                               nr_pages * PAGE_SIZE);
+       else {
+               unsigned int i;
+
+               for (i = 0; i < nr_pages; i++)
+                       gen_pool_free(dma_pool, (unsigned
long)page_to_virt(pages[i]),
+                                       PAGE_SIZE);
          }
+
          mutex_unlock(&list_lock);
   }
+
+/**
+ * xen_alloc_unpopulated_pages - alloc unpopulated pages
+ * @nr_pages: Number of pages
+ * @pages: pages returned
+ * @return 0 on success, error otherwise
+ */
+int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page
**pages)
+{
+       return alloc_unpopulated_pages(nr_pages, pages, false);
+}
+EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
+
+/**
+ * xen_free_unpopulated_pages - return unpopulated pages
+ * @nr_pages: Number of pages
+ * @pages: pages to return
+ */
+void xen_free_unpopulated_pages(unsigned int nr_pages, struct page
**pages)
+{
+       free_unpopulated_pages(nr_pages, pages, false);
+}
   EXPORT_SYMBOL(xen_free_unpopulated_pages);

+/**
+ * xen_alloc_unpopulated_dma_pages - alloc unpopulated DMAable pages
+ * @dev: valid struct device pointer
+ * @nr_pages: Number of pages
+ * @pages: pages returned
+ * @return 0 on success, error otherwise
+ */
+int xen_alloc_unpopulated_dma_pages(struct device *dev, unsigned int
nr_pages,
+               struct page **pages)
+{
+       /* XXX Handle devices which support 64-bit DMA address only for
now */
+       if (dma_get_mask(dev) != DMA_BIT_MASK(64))
+               return -EINVAL;
+
+       return alloc_unpopulated_pages(nr_pages, pages, true);
+}
+EXPORT_SYMBOL(xen_alloc_unpopulated_dma_pages);
+
+/**
+ * xen_free_unpopulated_dma_pages - return unpopulated DMAable pages
+ * @dev: valid struct device pointer
+ * @nr_pages: Number of pages
+ * @pages: pages to return
+ */
+void xen_free_unpopulated_dma_pages(struct device *dev, unsigned int
nr_pages,
+               struct page **pages)
+{
+       free_unpopulated_pages(nr_pages, pages, true);
+}
+EXPORT_SYMBOL(xen_free_unpopulated_dma_pages);
+
   static int __init unpopulated_init(void)
   {
          int ret;
@@ -237,9 +296,19 @@ static int __init unpopulated_init(void)
          if (!xen_domain())
                  return -ENODEV;

+       dma_pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
+       if (!dma_pool) {
+               pr_err("xen:unpopulated: Cannot create DMA pool\n");
+               return -ENOMEM;
+       }
+
+       gen_pool_set_algo(dma_pool, gen_pool_best_fit, NULL);
+
          ret = arch_xen_unpopulated_init(&target_resource);
          if (ret) {
                  pr_err("xen:unpopulated: Cannot initialize target
resource\n");
+               gen_pool_destroy(dma_pool);
+               dma_pool = NULL;
                  target_resource = NULL;
          }

[snip]


I think, regarding on the approach we would likely need to do some
renaming
for fill_list, page_list, list_lock, etc.


Both options work in my Arm64 based environment, not sure regarding x86.
Or do we have another option here?
I would be happy to go any route. What do you think?
The second option (use "dma_pool" for all) looks great, thank you for
looking into it!

ok, great


May I please clarify a few moments before starting preparing non-RFC version:


1. According to the discussion at "[RFC PATCH 2/2] xen/grant-table: Use
unpopulated DMAable pages instead of real RAM ones" we decided
to stay away from the "dma" in the name, also the second option (use
"dma_pool" for all) implies dropping the "page_list" entirely, so I am going
to do some renaming:

- s/xen_alloc_unpopulated_dma_pages()/xen_alloc_unpopulated_contiguous_pages()
- s/dma_pool/unpopulated_pool
- s/list_lock/pool_lock
- s/fill_list()/fill_pool()

Any objections?
Looks good


thank you for the clarification



2. I don't like much the fact that in free_unpopulated_pages() we have to free
"page by page" if contiguous is false, but unfortunately we cannot avoid doing
that.
I noticed that many users of unpopulated pages retain initially allocated
pages[i] array, so it passed here absolutely unmodified since being allocated,
but there is a code (for example, gnttab_page_cache_shrink() in grant-table.c)
that can pass pages[i] which contains arbitrary pages.

static void free_unpopulated_pages(unsigned int nr_pages, struct page **pages,
         bool contiguous)
{

[snip]

     /* XXX Do we need to check the range (gen_pool_has_addr)? */
     if (contiguous)
         gen_pool_free(dma_pool, (unsigned long)page_to_virt(pages[0]),
                 nr_pages * PAGE_SIZE);
     else {
         unsigned int i;

         for (i = 0; i < nr_pages; i++)
             gen_pool_free(dma_pool, (unsigned long)page_to_virt(pages[i]),
                     PAGE_SIZE);
     }

[snip]

}

I think, it wouldn't be a big deal for the small allocations, but for the big
allocations it might be not optimal for the speed.

What do you think if we update some places which always require big
allocations to allocate (and free) contiguous pages instead?
The possible candidate is
gem_create()/xen_drm_front_gem_free_object_unlocked() in
drivers/gpu/drm/xen/xen_drm_front_gem.c.
OTOH I realize this might be inefficient use of resources. Or better not?
Yes I think it is a good idea, more on this below.

thanks



3. The alloc_unpopulated_pages() might be optimized for the non-contiguous
allocations, currently we always try to allocate a single chunk even if
contiguous is false.

static int alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages,
         bool contiguous)
{

[snip]

     /* XXX: Optimize for non-contiguous allocations */
     while (!(vaddr = (void *)gen_pool_alloc(dma_pool, nr_pages * PAGE_SIZE)))
{
         if (filled)
             ret = -ENOMEM;
         else {
             ret = fill_list(nr_pages);
             filled = true;
         }
         if (ret)
             goto out;
     }

[snip]

}


But we can allocate "page by page" for the non-contiguous allocations, it
might be not optimal for the speed, but would be optimal for the resource
usage. What do you think?

static int alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages,
         bool contiguous)
{

[snip]

     if (contiguous) {
         while (!(vaddr = (void *)gen_pool_alloc(dma_pool, nr_pages *
PAGE_SIZE))) {
             if (filled)
                 ret = -ENOMEM;
             else {
                 ret = fill_list(nr_pages);
                 filled = true;
             }
             if (ret)
                 goto out;
         }

         for (i = 0; i < nr_pages; i++)
             pages[i] = virt_to_page(vaddr + PAGE_SIZE * i);
     } else {
         if (gen_pool_avail(dma_pool) < nr_pages) {
             ret = fill_list(nr_pages - gen_pool_avail(dma_pool));
             if (ret)
                 goto out;
         }

         for (i = 0; i < nr_pages; i++) {
             vaddr = (void *)gen_pool_alloc(dma_pool, PAGE_SIZE);
             if (!vaddr) {
                 while (i--)
                     gen_pool_free(dma_pool, (unsigned
long)page_to_virt(pages[i]),
                             PAGE_SIZE);

                 ret = -ENOMEM;
                 goto out;
             }

             pages[i] = virt_to_page(vaddr);
         }
     }

[snip]

}
Basically, if we allocate (and free) page-by-page it leads to more
efficient resource utilization but it is slower.

yes, I think the same


  If we allocate larger
contiguous chunks it is faster but it leads to less efficient resource
utilization.

yes, I think the same



Given that on both x86 and ARM the unpopulated memory resource is
arbitrarily large, I don't think we need to worry about resource
utilization. It is not backed by real memory. The only limitation is the
address space size which is very large.

I agree



So I would say optimize for speed and use larger contiguous chunks even
when continuity is not strictly required.

thank you for the clarification, will do


--
Regards,

Oleksandr Tyshchenko




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.