[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v20210601 32/38] tools: add --min_remaining to libxl_domain_suspend


  • To: xen-devel@xxxxxxxxxxxxxxxxxxxx
  • From: Olaf Hering <olaf@xxxxxxxxx>
  • Date: Tue, 1 Jun 2021 18:11:12 +0200
  • Arc-authentication-results: i=1; strato.com; dkim=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; t=1622563900; s=strato-dkim-0002; d=strato.com; h=References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Cc:Date: From:Subject:Sender; bh=z1lmf0xePHwyesyqjBFkPKIgt7yTBfVYJQlZCg92Pig=; b=Ui/N5dcIi30X7j3unjmJJR0yBmG7TDkeBhos7LZDGabwet4KhNstuaiNwJyRVPeV8/ ZHi6YMufr7c1fyUoohYHo9FvJPMmxZGuJSSuWj5u3lv0Bczv5Wc3/max9djUTSJFmIhA uGZnH7y6swi4ePLz9u0JGFnPsz57gvhatxVZygXmuobmmES1YO7MTkBJ5QNuLy+r5MQ2 7KGuss9KN2PMo9FmC9ZU5KXBeuJeWQfxahLi9fQO6d5LSIjwa9xFrEpgIY0HVOUn6Sys 4/2SxEOlLVFKZBJY53UhA8NF4tjlXgCa3w8q7DiUOnHh2rtJ/07fA5UwhsGl97XLpQ/l cQAQ==
  • Arc-seal: i=1; a=rsa-sha256; t=1622563900; cv=none; d=strato.com; s=strato-dkim-0002; b=q4QpgJ61zpOybvUvhUr09ZmeltRRHXUJPMawiTvc9JFN5FTIFY5oP+64k6xDO1D0s5 UV04M/mB5Hi/+GidhhR4hIyGNA/HRQvyAdRue4BOPs+/PR96QxUyLbSNHvqdr9hL9GQu EFAU9dhkSJIbLNjkMaPBE1yd4XHuax7yhS5ThiG+xqBgMbjoqQeFFifH43qJSc9HnguL 96nBvelGvVr27kzdU39w7y0s0nQivTOHmVwVKCyPdLi31+n5kgoNSmmJ8BJiJinuvO8x kKMaeAGIvjreAuP2dQogevYgTNgdMN2LH7J6yvucvfDOFNcYkkm+q0sTeUyOYsmv/3Tx qj4A==
  • Authentication-results: strato.com; dkim=none
  • Cc: Olaf Hering <olaf@xxxxxxxxx>, Ian Jackson <iwj@xxxxxxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>
  • Delivery-date: Tue, 01 Jun 2021 16:24:37 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The decision to stop+move a domU to the new host must be based on two factors:
- the available network bandwidth for the migration stream
- the maximum time a workload within a domU can be savely suspended

Both values define how many dirty pages a workload may produce prior the
final stop+move.

The default value of 50 pages is much too low with todays network bandwidths.
On an idle 1GiB link these 200K will be transferred within ~2ms.

Give the admin a knob to adjust the point when the final stop+move will
be done, so he can base this decision on his own needs.

This patch adjusts xl(1) and the libxl API.
External users check LIBXL_HAVE_DOMAIN_SUSPEND_PROPS for the availibility
of the new .min_remaining property.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
 docs/man/xl.1.pod.in              |  8 ++++++++
 tools/include/libxl.h             |  1 +
 tools/libs/light/libxl_dom_save.c |  2 +-
 tools/libs/light/libxl_domain.c   |  1 +
 tools/libs/light/libxl_internal.h |  1 +
 tools/xl/xl_cmdtable.c            | 23 ++++++++++++-----------
 tools/xl/xl_migrate.c             |  9 ++++++++-
 7 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
index b13e09c0ee..43609f6cdd 100644
--- a/docs/man/xl.1.pod.in
+++ b/docs/man/xl.1.pod.in
@@ -500,6 +500,14 @@ possible to use this option for a 'localhost' migration.
 
 Number of copy iterations before final suspend+move (default: 5)
 
+=item B<--min_remaing> I<pages>
+
+Number of remaining dirty pages. If the number of dirty pages drops that
+low, the guest is suspended and the domU will finally be moved to I<host>.
+
+This allows the host admin to control for how long the domU will likely
+be suspended during transit.
+
 =back
 
 =item B<remus> [I<OPTIONS>] I<domain-id> I<host>
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index bf77da0524..28d70b1078 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -1715,6 +1715,7 @@ static inline int 
libxl_retrieve_domain_configuration_0x041200(
 typedef struct {
     uint32_t flags; /* LIBXL_SUSPEND_* */
     uint32_t max_iters;
+    uint32_t min_remaining;
 } libxl_domain_suspend_props;
 #define LIBXL_SUSPEND_DEBUG 1
 #define LIBXL_SUSPEND_LIVE 2
diff --git a/tools/libs/light/libxl_dom_save.c 
b/tools/libs/light/libxl_dom_save.c
index 938c0127f3..ad5df89b2c 100644
--- a/tools/libs/light/libxl_dom_save.c
+++ b/tools/libs/light/libxl_dom_save.c
@@ -381,7 +381,7 @@ static int 
libxl__domain_save_precopy_policy(precopy_stats_t stats, void *user)
 
     LOGD(DEBUG, shs->domid, "iteration %u dirty_count %ld total_written %lu",
          stats.iteration, stats.dirty_count, stats.total_written);
-    if (stats.dirty_count >= 0 && stats.dirty_count < 
LIBXL_XGS_POLICY_TARGET_DIRTY_COUNT)
+    if (stats.dirty_count >= 0 && stats.dirty_count < dss->min_remaining)
         goto stop_copy;
     if (stats.iteration >= dss->max_iters)
         goto stop_copy;
diff --git a/tools/libs/light/libxl_domain.c b/tools/libs/light/libxl_domain.c
index 612d3dc4ea..ae4dc9ad01 100644
--- a/tools/libs/light/libxl_domain.c
+++ b/tools/libs/light/libxl_domain.c
@@ -528,6 +528,7 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, 
int fd,
     dss->fd = fd;
     dss->type = type;
     dss->max_iters = props->max_iters ?: LIBXL_XGS_POLICY_MAX_ITERATIONS;
+    dss->min_remaining = props->min_remaining ?: 
LIBXL_XGS_POLICY_TARGET_DIRTY_COUNT;
     dss->live = props->flags & LIBXL_SUSPEND_LIVE;
     dss->debug = props->flags & LIBXL_SUSPEND_DEBUG;
     dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_NONE;
diff --git a/tools/libs/light/libxl_internal.h 
b/tools/libs/light/libxl_internal.h
index 82b9dca5a0..63028586fe 100644
--- a/tools/libs/light/libxl_internal.h
+++ b/tools/libs/light/libxl_internal.h
@@ -3642,6 +3642,7 @@ struct libxl__domain_save_state {
     int debug;
     int checkpointed_stream;
     uint32_t max_iters;
+    uint32_t min_remaining;
     const libxl_domain_remus_info *remus;
     /* private */
     int rc;
diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c
index 8f8fa72760..acb84e3486 100644
--- a/tools/xl/xl_cmdtable.c
+++ b/tools/xl/xl_cmdtable.c
@@ -165,17 +165,18 @@ const struct cmd_spec cmd_table[] = {
       &main_migrate, 0, 1,
       "Migrate a domain to another host",
       "[options] <Domain> <host>",
-      "-h              Print this help.\n"
-      "-C <config>     Send <config> instead of config file from creation.\n"
-      "-s <sshcommand> Use <sshcommand> instead of ssh.  String will be 
passed\n"
-      "                to sh. If empty, run <host> instead of ssh <host> xl\n"
-      "                migrate-receive [-d -e]\n"
-      "-e              Do not wait in the background (on <host>) for the 
death\n"
-      "                of the domain.\n"
-      "--debug         Verify transferred domU page data.\n"
-      "-p              Do not unpause domain after migrating it.\n"
-      "-D              Preserve the domain id\n"
-      "--max_iters N   Number of copy iterations before final stop+move"
+      "-h                Print this help.\n"
+      "-C <config>       Send <config> instead of config file from creation.\n"
+      "-s <sshcommand>   Use <sshcommand> instead of ssh.  String will be 
passed\n"
+      "                  to sh. If empty, run <host> instead of ssh <host> 
xl\n"
+      "                  migrate-receive [-d -e]\n"
+      "-e                Do not wait in the background (on <host>) for the 
death\n"
+      "                  of the domain.\n"
+      "--debug           Verify transferred domU page data.\n"
+      "-p                Do not unpause domain after migrating it.\n"
+      "-D                Preserve the domain id\n"
+      "--max_iters N     Number of copy iterations before final stop+move\n"
+      "--min_remaining N Number of remaining dirty pages before final 
stop+move"
     },
     { "restore",
       &main_restore, 0, 1,
diff --git a/tools/xl/xl_migrate.c b/tools/xl/xl_migrate.c
index af117d4d56..14feb2b7ec 100644
--- a/tools/xl/xl_migrate.c
+++ b/tools/xl/xl_migrate.c
@@ -179,6 +179,7 @@ static void migrate_do_preamble(int send_fd, int recv_fd, 
pid_t child,
 static void migrate_domain(uint32_t domid, int preserve_domid,
                            const char *rune, int debug,
                            uint32_t max_iters,
+                           uint32_t min_remaining,
                            const char *override_config_file)
 {
     pid_t child = -1;
@@ -191,6 +192,7 @@ static void migrate_domain(uint32_t domid, int 
preserve_domid,
     libxl_domain_suspend_props props = {
         .flags = LIBXL_SUSPEND_LIVE,
         .max_iters = max_iters,
+        .min_remaining = min_remaining,
         };
 
     save_domain_core_begin(domid, preserve_domid, override_config_file,
@@ -545,9 +547,11 @@ int main_migrate(int argc, char **argv)
     int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0;
     int preserve_domid = 0;
     uint32_t max_iters = 0;
+    uint32_t min_remaining = 0;
     static struct option opts[] = {
         {"debug", 0, 0, 0x100},
         {"max_iters", 1, 0, 0x101},
+        {"min_remaining", 1, 0, 0x102},
         {"live", 0, 0, 0x200},
         COMMON_LONG_OPTS
     };
@@ -578,6 +582,9 @@ int main_migrate(int argc, char **argv)
     case 0x101: /* --max_iters */
         max_iters = atoi(optarg);
         break;
+    case 0x102: /* --min_remaining */
+        min_remaining = atoi(optarg);
+        break;
     case 0x200: /* --live */
         /* ignored for compatibility with xm */
         break;
@@ -613,7 +620,7 @@ int main_migrate(int argc, char **argv)
     }
 
     migrate_domain(domid, preserve_domid, rune, debug,
-                   max_iters, config_filename);
+                   max_iters, min_remaining, config_filename);
     return EXIT_SUCCESS;
 }
 



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.