[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] tools: libxl/xl: run NUMA placement even when an hard-affinity is set



Right now, if either an hard or soft-affinity are explicitly specified
in a domain's config file, automatic NUMA placement is skipped. However,
automatic NUMA placement affects only the soft-affinity of the domain
which is being created.

Therefore, it is ok to let it run if an hard-affinity is specified. The
semantics will be that the best placement candidate would be found,
respecting the specified hard-affinity, i.e., using only the nodes that
contain the pcpus in the hard-affinity mask.

This is particularly helpful if global xl pinning masks are defined, as
made possible by commit aa67b97ed34279c43 ("xl.conf: Add global affinity
masks"). In fact, without this commit, defining a global affinity mask
would also mean disabling automatic placement, but that does not
necessarily have to be the case (especially in large systems).

Signed-off-by: Dario Faggioli <dfaggioli@xxxxxxxx>
---
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: George Dunlap <george.dunlap@xxxxxxxxxx>
---
 tools/libxl/libxl_dom.c |   46 ++++++++++++++++++++++++++++++++++++++++------
 tools/xl/xl_parse.c     |    6 ++++--
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index eb401cf1d6..e30e2dca9a 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -27,6 +27,8 @@
 
 #include "_paths.h"
 
+//#define DEBUG 1
+
 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -142,12 +144,13 @@ static int numa_place_domain(libxl__gc *gc, uint32_t 
domid,
 {
     int found;
     libxl__numa_candidate candidate;
-    libxl_bitmap cpupool_nodemap;
+    libxl_bitmap cpumap, cpupool_nodemap, *map;
     libxl_cpupoolinfo cpupool_info;
     int i, cpupool, rc = 0;
     uint64_t memkb;
 
     libxl__numa_candidate_init(&candidate);
+    libxl_bitmap_init(&cpumap);
     libxl_bitmap_init(&cpupool_nodemap);
     libxl_cpupoolinfo_init(&cpupool_info);
 
@@ -162,6 +165,38 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
     if (rc)
         goto out;
+    map = &cpupool_info.cpumap;
+
+    /*
+     * If there's a well defined hard affinity mask (i.e., the same one for all
+     * the vcpus), we can try to run the placement considering only the pcpus
+     * within such mask.
+     */
+    if (info->num_vcpu_hard_affinity)
+    {
+#ifdef DEBUG
+        int j;
+
+        for (j = 0; j < info->num_vcpu_hard_affinity; j++)
+            assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
+                                      &info->vcpu_hard_affinity[j], 0));
+#endif /* DEBUG */
+
+        rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
+                              &cpupool_info.cpumap);
+        if (rc)
+            goto out;
+
+        /*
+         * Hard affinity should _really_ contain cpus that are inside our
+         * cpupool. Anyway, if it does not, log a warning and only use the
+         * cpupool's cpus for placement.
+         */
+        if (!libxl_bitmap_is_empty(&cpumap))
+            map = &cpumap;
+        else
+            LOG(WARN, "Hard affinity completely outside of domain's cpupool?");
+    }
 
     rc = libxl_domain_need_memory(CTX, info, &memkb);
     if (rc)
@@ -174,8 +209,7 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
     /* Find the best candidate with enough free memory and at least
      * as much pcpus as the domain has vcpus.  */
     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
-                                   0, 0, &cpupool_info.cpumap,
-                                   numa_cmpf, &candidate, &found);
+                                   0, 0, map, numa_cmpf, &candidate, &found);
     if (rc)
         goto out;
 
@@ -206,6 +240,7 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
  out:
     libxl__numa_candidate_dispose(&candidate);
     libxl_bitmap_dispose(&cpupool_nodemap);
+    libxl_bitmap_dispose(&cpumap);
     libxl_cpupoolinfo_dispose(&cpupool_info);
     return rc;
 }
@@ -379,9 +414,8 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
      * reflect the placement result if that is the case
      */
     if (libxl_defbool_val(info->numa_placement)) {
-        if (info->cpumap.size || info->num_vcpu_hard_affinity ||
-            info->num_vcpu_soft_affinity)
-            LOG(WARN, "Can't run NUMA placement, as an (hard or soft) "
+        if (info->cpumap.size || info->num_vcpu_soft_affinity)
+            LOG(WARN, "Can't run NUMA placement, as a soft "
                       "affinity has been specified explicitly");
         else if (info->nodemap.size)
             LOG(WARN, "Can't run NUMA placement, as the domain has "
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 971ec1bc56..ad6774a7f7 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -356,7 +356,7 @@ static void parse_vcpu_affinity(libxl_domain_build_info 
*b_info,
             j++;
         }
 
-        /* We have a list of cpumaps, disable automatic placement */
+        /* When we have a list of cpumaps, always disable automatic placement 
*/
         libxl_defbool_set(&b_info->numa_placement, false);
     } else {
         int i;
@@ -380,7 +380,9 @@ static void parse_vcpu_affinity(libxl_domain_build_info 
*b_info,
                               &vcpu_affinity_array[0]);
         }
 
-        libxl_defbool_set(&b_info->numa_placement, false);
+        /* We have soft affinity already, disable automatic placement */
+        if (!is_hard)
+            libxl_defbool_set(&b_info->numa_placement, false);
     }
 }
 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.