[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 09 of 10 [RFC]] xl: Introduce Best and Worst Fit guest placement algorithms



Besides than "auto" and "ffit", as per the previous patch, enable
Best and Worst Fit placement heuristics to `xl`, for the user to
chose them in the config file. Implementation just sits on top
of the First Fit algorithm code, with just the proper reordering
of the nodes and their free memory before invoking it (and after
it finishes).

TODO: * As usual for this series, benchmarking and testing,
        as much thorough and comprehensive as possible!

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -146,6 +146,16 @@ to try automatically fitting the guest o
 use the First Fit algorithm to try automatically fitting the
 guest on the host's NUMA nodes
 
+=item B<bfit>
+
+use the Best Fit algorithm to try automatically fitting the
+guest on the host's NUMA nodes
+
+=item B<wfit>
+
+use the Worst Fit algorithm to try automatically fitting the
+guest on the host's NUMA nodes
+
 =back
 
 This option interacts with `nodes=` such that, if the number of
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -117,6 +117,8 @@ static const char *action_on_shutdown_na
  *  NONE   : no automatic placement at all;
  *  STATIC : explicit nodes specification.
  *  FFIT   : use the First Fit algorithm for automatic placement;
+ *  BFIT   : use the Best Fit algorithm for automatic placement;
+ *  WFIT   : use the Worst Fit algorithm for automatic placement;
  *  AUTO   : an not better specified automatic placement, likely
  *           to be implemented as a short circuit to (one of)
  *           the above(s);
@@ -124,7 +126,9 @@ static const char *action_on_shutdown_na
 #define NODES_POLICY_NONE    0
 #define NODES_POLICY_STATIC  1
 #define NODES_POLICY_FFIT    2
-#define NODES_POLICY_AUTO    3
+#define NODES_POLICY_BFIT    3
+#define NODES_POLICY_WFIT    4
+#define NODES_POLICY_AUTO    5
 
 /* Default behaviour wrt to automatic domain placement on nodes,
  * maximum number of attempts made for applying the desired
@@ -153,6 +157,8 @@ static const char *nodes_policy_names[] 
     [NODES_POLICY_NONE]   = "none",
     [NODES_POLICY_STATIC] = "static",
     [NODES_POLICY_FFIT]   = "ffit",
+    [NODES_POLICY_BFIT]   = "bfit",
+    [NODES_POLICY_WFIT]   = "wfit",
     [NODES_POLICY_AUTO]   = "auto",
 };
 
@@ -595,10 +601,17 @@ static inline void __add_nodes_to_nodema
 /*
  * First Fit automatic placement. Just scan all the nodes in the
  * provided map (@nodemap) linearly and pick up the fist @nodes
- * that fit the memory requirements (@memkb) of the domain.
+ * that fit the memory requirements (@memb) of the domain.
  * This also returns the actual number of used nodes (still via
  * @nodes) and the actual nodes map to be used (still via @nodemap),
  * as they both can change depending on the retry policy (@retry).
+ *
+ * The idea behing First Fit is to be efficient and quick, and it
+ * generally works better than Best Fit wrt fragmentation, although
+ * it tends to occupy "early" nodes more than "late" ones.
+ *
+ * This function is also used as the ground for implementing Best
+ * and Worst Fit placement solutions too.
  */
 static int place_domain_ffit(const libxl_numainfo *numa, uint64_t memb,
                              int retry, int nr_nodes, int *nodes,
@@ -678,6 +691,133 @@ static int place_domain_ffit(const libxl
     return rc;
 }
 
+typedef struct {
+    int idx;
+    uint32_t memfree;
+} numa_reorderer;
+
+typedef int (*reorderer_cmp)(const void *, const void *);
+
+static int reorderer_cmp_asc(const void *r1,
+                             const void *r2)
+{
+    return ((const numa_reorderer*) r1)->memfree -
+        ((const numa_reorderer*) r2)->memfree;
+}
+
+/* Turns the comparison the other way around for descending ordering */
+static int reorderer_cmp_desc(const void *r1,
+                              const void *r2)
+{
+    return ((const numa_reorderer*) r2)->memfree -
+        ((const numa_reorderer*) r1)->memfree;
+}
+
+static int __place_domain_bw_fit(const libxl_numainfo *numa, uint64_t memb,
+                                 int retry, int nr_nodes, int *nodes,
+                                 libxl_nodemap *nodemap, reorderer_cmp cmpr)
+{
+    numa_reorderer *n;
+    libxl_numainfo *numa_ord;
+    libxl_nodemap nodemap_ord;
+    int i, rc;
+
+    n = malloc(sizeof(*n) * nr_nodes);
+    if (!n) {
+        fprintf(stderr, "malloc failed\n");
+        return ENOMEM;
+    }
+    numa_ord = malloc(sizeof(*numa) * nr_nodes);
+    if (!numa_ord) {
+        fprintf(stderr, "malloc failed\n");
+        rc = ENOMEM;
+        goto out_n;
+    }
+    if (libxl_nodemap_alloc(ctx, &nodemap_ord) < 0) {
+        fprintf(stderr, "libxl_nodemap_alloc failed\n");
+        rc = ENOMEM;
+        goto out_numaord;
+    }
+
+    /* Initialize the reordering structure so that we can
+     * 'sort' the idx basing on the values of memfree, and
+     * thus have the full trace of the permutations applied
+     * by the sorting algorithm. */
+    for (i = 0; i < nr_nodes; i++) {
+        n[i].idx = i;
+        n[i].memfree = numa[i].free;
+   } 
+
+    qsort(n, nr_nodes, sizeof(*n), cmpr);
+
+    /* Apply the permutation to the numainfo array as
+     * well as to the nodemap. */
+    for (i = 0; i < nr_nodes; i++) {
+        numa_ord[i] = numa[n[i].idx];
+        if (libxl_nodemap_test(nodemap, n[i].idx))
+            libxl_nodemap_set(&nodemap_ord, i);
+    }
+
+    /* Now let First Fit do his job on the ordered data */
+    rc = place_domain_ffit(numa_ord, memb, retry, nr_nodes,
+                           nodes, &nodemap_ord);
+
+    /* `Rollback` the permutation of the node map */
+    libxl_nodemap_set_none(nodemap);
+    for (i = 0; i < nr_nodes; i++) {
+        if (libxl_nodemap_test(&nodemap_ord, i))
+            libxl_nodemap_set(nodemap, n[i].idx);
+    }
+
+    libxl_nodemap_dispose(&nodemap_ord);
+out_numaord:
+    free(numa_ord);
+out_n:
+    free(n);
+
+    return rc;
+}
+
+/* Best Fit automatic placement. It will (try to) find the node(s)
+ * with the smallest possible amount of free memory that also satisfies
+ * the domain memory requirements (@memb).
+ *
+ * The idea behing Best Fit is to optimize memory usage, although it
+ * might introduce quite a bit of fragmentation, by leaving a large
+ * amount of small free areas.
+ *
+ * This is easily implemented by sorting the nodes array in
+ * ascending order (of free memory on each node) and then
+ * asking First Fit to run on the now-ordered data.
+ */
+static int place_domain_bfit(const libxl_numainfo *numa, uint64_t memb,
+                             int retry, int nr_nodes, int *nodes,
+                             libxl_nodemap *nodemap)
+{
+    return __place_domain_bw_fit(numa, memb, retry, nr_nodes,
+                                 nodes, nodemap, reorderer_cmp_asc);
+}
+
+/* Worst Fit automatic placement. It will (try to) find the node(s)
+ * with the highest possible amount of free memory that also satisfies
+ * domain memory requirements (@memb).
+ *
+ * The idea behind Worst Fit is that it will leave big enough free
+ * memory areas to limit the amount of fragmentation, especially
+ * compared to Best Fit.
+ *
+ * This is easily implemented by sorting the nodes array in
+ * ascending order (of free memory on each node) and then
+ * asking First Fit to run on the now-ordered data.
+ */
+static int place_domain_wfit(const libxl_numainfo *numa, uint64_t memb,
+                             int retry, int nr_nodes, int *nodes,
+                             libxl_nodemap *nodemap)
+{
+    return __place_domain_bw_fit(numa, memb, retry, nr_nodes,
+                                 nodes, nodemap, reorderer_cmp_desc);
+}
+
 /* Try to achieve optimal node-affinity for the domain */
 static int place_domain(libxl_domain_build_info *b_info)
 {
@@ -804,6 +944,16 @@ static int place_domain(libxl_domain_bui
                                    retry_policy, nr_nodes, &use_nodes,
                                    &new_nodemap);
             break;
+        case NODES_POLICY_BFIT:
+            rc = place_domain_bfit(numa, (uint64_t) need_memkb * 1024,
+                                   retry_policy, nr_nodes, &use_nodes,
+                                   &new_nodemap);
+            break;
+        case NODES_POLICY_WFIT:
+            rc = place_domain_wfit(numa, (uint64_t) need_memkb * 1024,
+                                   retry_policy, nr_nodes, &use_nodes,
+                                   &new_nodemap);
+            break;
         }
 
         if (rc)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.