[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC v2 20/23] tools: expose postcopy live migration support in libxl and xl
From: Joshua Otto <jtotto@xxxxxxxxxxxx> - Add a 'memory_strategy' parameter to libxl_domain_live_migrate(), which specifies how the remainder of the memory migration should be approached after the iterative precopy phase is completed. - Plug this parameter into the libxl migration precopy policy implementation. - Add --postcopy to xl migrate, and skip the xl-level handshaking at both sides when postcopy migration occurs. Signed-off-by: Joshua Otto <jtotto@xxxxxxxxxxxx> --- tools/libxl/libxl.h | 5 ++++ tools/libxl/libxl_dom_save.c | 17 ++++++++---- tools/libxl/libxl_domain.c | 8 ++++-- tools/libxl/libxl_internal.h | 1 + tools/xl/xl.h | 7 ++++- tools/xl/xl_cmdtable.c | 3 ++ tools/xl/xl_migrate.c | 65 ++++++++++++++++++++++++++++++++++++++++---- tools/xl/xl_vmcontrol.c | 8 ++++-- 8 files changed, 97 insertions(+), 17 deletions(-) diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index 70441cf..b569734 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -1413,9 +1413,14 @@ int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd, int flags, /* LIBXL_SUSPEND_* */ int recv_fd, bool *postcopy_transitioned, /* OUT */ + int memory_strategy, const libxl_asyncop_how *ao_how) LIBXL_EXTERNAL_CALLERS_ONLY; +#define LIBXL_LM_MEMORY_STOP_AND_COPY 0 +#define LIBXL_LM_MEMORY_POSTCOPY 1 +#define LIBXL_LM_MEMORY_DEFAULT LIBXL_LM_MEMORY_STOP_AND_COPY + /* @param suspend_cancel [from xenctrl.h:xc_domain_resume( @param fast )] * If this parameter is true, use co-operative resume. The guest * must support this. diff --git a/tools/libxl/libxl_dom_save.c b/tools/libxl/libxl_dom_save.c index 75ab523..c54f728 100644 --- a/tools/libxl/libxl_dom_save.c +++ b/tools/libxl/libxl_dom_save.c @@ -338,14 +338,19 @@ int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss, * the live migration when there are either fewer than 50 dirty pages, or more * than 5 precopy rounds have completed. */ -static int libxl__save_live_migration_precopy_policy( - struct precopy_stats stats, void *user) +static int libxl__save_live_migration_precopy_policy(struct precopy_stats stats, + void *user) { - if (stats.dirty_count >= 0 && stats.dirty_count < 50) - return XGS_POLICY_STOP_AND_COPY; + libxl__save_helper_state *shs = user; + libxl__domain_save_state *dss = shs->caller_state; - if (stats.iteration >= 5) - return XGS_POLICY_STOP_AND_COPY; + if ((stats.dirty_count >= 0 && + stats.dirty_count <= 50) || + (stats.iteration >= 5)) { + return (dss->memory_strategy == LIBXL_LM_MEMORY_POSTCOPY) + ? XGS_POLICY_POSTCOPY + : XGS_POLICY_STOP_AND_COPY; + } return XGS_POLICY_CONTINUE_PRECOPY; } diff --git a/tools/libxl/libxl_domain.c b/tools/libxl/libxl_domain.c index fc37f47..e211b88 100644 --- a/tools/libxl/libxl_domain.c +++ b/tools/libxl/libxl_domain.c @@ -488,6 +488,7 @@ static void domain_suspend_cb(libxl__egc *egc, static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags, int recv_fd, bool *postcopy_transitioned, + int memory_strategy, const libxl_asyncop_how *ao_how) { AO_CREATE(ctx, domid, ao_how); @@ -509,6 +510,7 @@ static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags, dss->fd = fd; dss->recv_fd = recv_fd; dss->postcopy_transitioned = postcopy_transitioned; + dss->memory_strategy = memory_strategy; dss->type = type; dss->live = flags & LIBXL_SUSPEND_LIVE; dss->debug = flags & LIBXL_SUSPEND_DEBUG; @@ -529,12 +531,14 @@ static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags, int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags, const libxl_asyncop_how *ao_how) { - return do_domain_suspend(ctx, domid, fd, flags, -1, NULL, ao_how); + return do_domain_suspend(ctx, domid, fd, flags, -1, NULL, + LIBXL_LM_MEMORY_DEFAULT, ao_how); } int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd, int flags, int recv_fd, bool *postcopy_transitioned, + int memory_strategy, const libxl_asyncop_how *ao_how) { if (!postcopy_transitioned) { @@ -545,7 +549,7 @@ int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd, flags |= LIBXL_SUSPEND_LIVE; return do_domain_suspend(ctx, domid, send_fd, flags, recv_fd, - postcopy_transitioned, ao_how); + postcopy_transitioned, memory_strategy, ao_how); } int libxl_domain_pause(libxl_ctx *ctx, uint32_t domid) diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 54ad16a..5c4f139 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3324,6 +3324,7 @@ struct libxl__domain_save_state { int fdfl; /* original flags on fd */ int recv_fd; bool *postcopy_transitioned; + int memory_strategy; libxl_domain_type type; int live; int debug; diff --git a/tools/xl/xl.h b/tools/xl/xl.h index aa95b77..279c716 100644 --- a/tools/xl/xl.h +++ b/tools/xl/xl.h @@ -48,6 +48,7 @@ struct domain_create { bool userspace_colo_proxy; int migrate_fd; /* -1 means none */ int send_back_fd; /* -1 means none */ + bool *postcopy_resumed; char **migration_domname_r; /* from malloc */ }; @@ -66,7 +67,6 @@ static const char migrate_permission_to_go[]= "domain is yours, you are cleared to unpause"; static const char migrate_report[]= "my copy unpause results are as follows"; -#endif /* followed by one byte: * 0: everything went well, domain is running @@ -76,6 +76,11 @@ static const char migrate_report[]= * from target to source */ +static const char migrate_postcopy_sync[]= + "postcopy migration completed successfully"; + +#endif + #define XL_MANDATORY_FLAG_JSON (1U << 0) /* config data is in JSON format */ #define XL_MANDATORY_FLAG_STREAMv2 (1U << 1) /* stream is v2 */ #define XL_MANDATORY_FLAG_ALL (XL_MANDATORY_FLAG_JSON | \ diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c index 30eb93c..9e7ec83 100644 --- a/tools/xl/xl_cmdtable.c +++ b/tools/xl/xl_cmdtable.c @@ -166,6 +166,9 @@ struct cmd_spec cmd_table[] = { " of the domain.\n" "--debug Print huge (!) amount of debug during the migration process.\n" "-p Do not unpause domain after migrating it." + "--postcopy At the end of the iterative precopy phase, transition to a\n" + " postcopy memory migration rather than performing a stop-and-copy\n" + " migration of the outstanding dirty pages.\n" }, { "restore", &main_restore, 0, 1, diff --git a/tools/xl/xl_migrate.c b/tools/xl/xl_migrate.c index 9656204..80d7321 100644 --- a/tools/xl/xl_migrate.c +++ b/tools/xl/xl_migrate.c @@ -177,7 +177,8 @@ static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child, } static void migrate_domain(uint32_t domid, const char *rune, int debug, - const char *override_config_file) + const char *override_config_file, + int memory_strategy) { pid_t child = -1; int rc; @@ -207,18 +208,34 @@ static void migrate_domain(uint32_t domid, const char *rune, int debug, if (debug) flags |= LIBXL_SUSPEND_DEBUG; rc = libxl_domain_live_migrate(ctx, domid, send_fd, flags, - recv_fd, &postcopy_transitioned, NULL); - assert(!postcopy_transitioned); - + recv_fd, &postcopy_transitioned, + memory_strategy, NULL); if (rc) { fprintf(stderr, "migration sender: libxl_domain_suspend failed" " (rc=%d)\n", rc); - if (rc == ERROR_GUEST_TIMEDOUT) + if (postcopy_transitioned) + goto failed_postcopy; + else if (rc == ERROR_GUEST_TIMEDOUT) goto failed_suspend; else goto failed_resume; } + /* + * No need for additional ceremony if we already resumed the guest as part + * of a postcopy live migration. + */ + if (postcopy_transitioned) { + /* It doesn't matter if something happens to the pipe after we get to + * this point - we only bother to synchronize here for tidiness. */ + migrate_read_fixedmessage(recv_fd, migrate_postcopy_sync, + sizeof(migrate_postcopy_sync), + "postcopy sync", rune); + libxl_domain_destroy(ctx, domid, 0); + fprintf(stderr, "Migration successful.\n"); + exit(EXIT_SUCCESS); + } + //fprintf(stderr, "migration sender: Transfer complete.\n"); // Should only be printed when debugging as it's a bit messy with // progress indication. @@ -317,6 +334,21 @@ static void migrate_domain(uint32_t domid, const char *rune, int debug, close(send_fd); migration_child_report(recv_fd); exit(EXIT_FAILURE); + + failed_postcopy: + if (common_domname) { + xasprintf(&away_domname, "%s--postcopy-inconsistent", common_domname); + libxl_domain_rename(ctx, domid, common_domname, away_domname); + } + + fprintf(stderr, + "** Migration failed during memory postcopy **\n" + "It's possible that the guest has executed/is executing at the destination,\n" + " so resuming it here now may be unsafe.\n"); + + close(send_fd); + migration_child_report(recv_fd); + exit(EXIT_FAILURE); } static void migrate_receive(int debug, int daemonize, int monitor, @@ -330,6 +362,7 @@ static void migrate_receive(int debug, int daemonize, int monitor, int rc, rc2; char rc_buf; char *migration_domname; + bool postcopy_resumed; struct domain_create dom_info; signal(SIGPIPE, SIG_IGN); @@ -349,6 +382,7 @@ static void migrate_receive(int debug, int daemonize, int monitor, dom_info.paused = 1; dom_info.migrate_fd = recv_fd; dom_info.send_back_fd = send_fd; + dom_info.postcopy_resumed = &postcopy_resumed; dom_info.migration_domname_r = &migration_domname; dom_info.checkpointed_stream = checkpointed; dom_info.colo_proxy_script = colo_proxy_script; @@ -411,6 +445,20 @@ static void migrate_receive(int debug, int daemonize, int monitor, break; } + /* + * No need for additional ceremony if we already resumed the guest as part + * of a postcopy live migration. + */ + if (postcopy_resumed) { + libxl_write_exactly(ctx, send_fd, migrate_postcopy_sync, + sizeof(migrate_postcopy_sync), + "migration ack stream", "postcopy sync"); + fprintf(stderr, "migration target: Domain started successsfully.\n"); + libxl_domain_rename(ctx, domid, migration_domname, common_domname); + exit(EXIT_SUCCESS); + } + + fprintf(stderr, "migration target: Transfer complete," " requesting permission to start domain.\n"); @@ -541,9 +589,11 @@ int main_migrate(int argc, char **argv) char *rune = NULL; char *host; int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0; + int memory_strategy = LIBXL_LM_MEMORY_DEFAULT; static struct option opts[] = { {"debug", 0, 0, 0x100}, {"live", 0, 0, 0x200}, + {"postcopy", 0, 0, 0x400}, COMMON_LONG_OPTS }; @@ -570,6 +620,9 @@ int main_migrate(int argc, char **argv) case 0x200: /* --live */ /* ignored for compatibility with xm */ break; + case 0x400: /* --postcopy */ + memory_strategy = LIBXL_LM_MEMORY_POSTCOPY; + break; } domid = find_domain(argv[optind]); @@ -600,7 +653,7 @@ int main_migrate(int argc, char **argv) pause_after_migration ? " -p" : ""); } - migrate_domain(domid, rune, debug, config_filename); + migrate_domain(domid, rune, debug, config_filename, memory_strategy); return EXIT_SUCCESS; } diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c index 47ba9f3..62e09c1 100644 --- a/tools/xl/xl_vmcontrol.c +++ b/tools/xl/xl_vmcontrol.c @@ -655,6 +655,7 @@ int create_domain(struct domain_create *dom_info) const char *config_source = NULL; const char *restore_source = NULL; int migrate_fd = dom_info->migrate_fd; + bool *postcopy_resumed = dom_info->postcopy_resumed; bool config_in_json; int i; @@ -675,6 +676,9 @@ int create_domain(struct domain_create *dom_info) int restoring = (restore_file || (migrate_fd >= 0)); + if (postcopy_resumed) + *postcopy_resumed = false; + libxl_domain_config_init(&d_config); if (restoring) { @@ -882,8 +886,8 @@ start: ret = libxl_domain_create_restore(ctx, &d_config, &domid, restore_fd, - send_back_fd, NULL, ¶ms, - 0, autoconnect_console_how); + send_back_fd, postcopy_resumed, + ¶ms, 0, autoconnect_console_how); libxl_domain_restore_params_dispose(¶ms); -- 2.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |