[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC 18/20] libxl/migration: implement the sender side of postcopy live migration



To make the libxl sender capable of supporting postcopy live migration:
- Add a postcopy transition callback chain through the stream writer (this
  callback chain is nearly identical to the checkpoint callback chain, and
  differs meaningfully only in its failure/completion behaviour)
- Wire this callback chain up to the xc postcopy callback entries in the domain
  save logic.
- Add parameters to libxl_domain_live_migrate() to permit bidirectional
  communication between the sender and receiver and enable the caller to reason
  about the safety of recovery from a postcopy failure.

No mechanism is introduced yet to enable library clients to induce a postcopy
live migration - this will follow after the libxl postcopy receiver logic.

Signed-off-by: Joshua Otto <jtotto@xxxxxxxxxxxx>
---
 docs/specs/libxl-migration-stream.pandoc | 19 ++++++++-
 tools/libxl/libxl.h                      |  4 +-
 tools/libxl/libxl_dom_save.c             | 25 +++++++++++-
 tools/libxl/libxl_domain.c               | 25 ++++++++----
 tools/libxl/libxl_internal.h             | 21 ++++++++--
 tools/libxl/libxl_sr_stream_format.h     | 13 +++---
 tools/libxl/libxl_stream_write.c         | 69 ++++++++++++++++++++++++++++++--
 tools/xl/xl_migrate.c                    |  5 ++-
 8 files changed, 155 insertions(+), 26 deletions(-)

diff --git a/docs/specs/libxl-migration-stream.pandoc 
b/docs/specs/libxl-migration-stream.pandoc
index a1ba1ac..8d00cd7 100644
--- a/docs/specs/libxl-migration-stream.pandoc
+++ b/docs/specs/libxl-migration-stream.pandoc
@@ -2,7 +2,8 @@
 % Andrew Cooper <<andrew.cooper3@xxxxxxxxxx>>
   Wen Congyang <<wency@xxxxxxxxxxxxxx>>
   Yang Hongyang <<hongyang.yang@xxxxxxxxxxxx>>
-% Revision 2
+  Joshua Otto <<jtotto@xxxxxxxxxxxx>>
+% Revision 3
 
 Introduction
 ============
@@ -123,7 +124,9 @@ type         0x00000000: END
 
              0x00000005: CHECKPOINT_STATE
 
-             0x00000006 - 0x7FFFFFFF: Reserved for future _mandatory_
+             0x00000006: POSTCOPY_TRANSITION_END
+
+             0x00000007 - 0x7FFFFFFF: Reserved for future _mandatory_
              records.
 
              0x80000000 - 0xFFFFFFFF: Reserved for future _optional_
@@ -304,6 +307,18 @@ While Secondary is running in below loop:
     b. Send _CHECKPOINT\_SVM\_SUSPENDED_ to primary
 4. Checkpoint
 
+POSTCOPY\_TRANSITION\_END
+-------------------------
+
+A postcopy transition end record marks the end of a postcopy transition in a
+libxl live migration stream.  It indicates that control of the stream should be
+returned to libxc for the postcopy memory migration phase.
+
+     0     1     2     3     4     5     6     7 octet
+    +-------------------------------------------------+
+
+The postcopy transition end record contains no fields; its body_length is 0.
+
 Future Extensions
 =================
 
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 84ac96a..99d187b 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1375,10 +1375,12 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t 
domid, int fd,
 #define LIBXL_SUSPEND_DEBUG 1
 #define LIBXL_SUSPEND_LIVE 2
 
-int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int fd,
+int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd,
                               int flags, /* LIBXL_SUSPEND_* */
                               unsigned int precopy_iterations,
                               unsigned int precopy_dirty_threshold,
+                              int recv_fd,
+                              bool *postcopy_transitioned, /* OUT */
                               const libxl_asyncop_how *ao_how)
                               LIBXL_EXTERNAL_CALLERS_ONLY;
 
diff --git a/tools/libxl/libxl_dom_save.c b/tools/libxl/libxl_dom_save.c
index 4ef9ca5..9e565ae 100644
--- a/tools/libxl/libxl_dom_save.c
+++ b/tools/libxl/libxl_dom_save.c
@@ -349,10 +349,31 @@ static int 
libxl__save_live_migration_simple_precopy_policy(
     return XGS_POLICY_CONTINUE_PRECOPY;
 }
 
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *sws, int rc);
+
 static void libxl__save_live_migration_postcopy_transition_callback(void *user)
 {
-    /* XXX we're not yet ready to deal with this */
-    assert(0);
+    libxl__save_helper_state *shs = user;
+    libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+    sws->postcopy_transition_callback = postcopy_transition_done;
+    libxl__stream_write_start_postcopy_transition(shs->egc, sws);
+}
+
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *sws,
+                                     int rc)
+{
+    libxl__domain_save_state *dss = sws->dss;
+
+    /* Past here, it's _possible_ that the domain may execute at the
+     * destination, so - unless we're given positive confirmation by the
+     * destination that it failed to resume there - we must assume it has. */
+    assert(dss->postcopy_transitioned);
+    *dss->postcopy_transitioned = !rc;
+
+    /* Return control to libxc. */
+    libxl__xc_domain_saverestore_async_callback_done(egc, &sws->shs, !rc);
 }
 
 /*----- main code for saving, in order of execution -----*/
diff --git a/tools/libxl/libxl_domain.c b/tools/libxl/libxl_domain.c
index b1cf643..ea778a6 100644
--- a/tools/libxl/libxl_domain.c
+++ b/tools/libxl/libxl_domain.c
@@ -488,7 +488,8 @@ static void domain_suspend_cb(libxl__egc *egc,
 
 static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
                              unsigned int precopy_iterations,
-                             unsigned int precopy_dirty_threshold,
+                             unsigned int precopy_dirty_threshold, int recv_fd,
+                             bool *postcopy_transitioned,
                              const libxl_asyncop_how *ao_how)
 {
     AO_CREATE(ctx, domid, ao_how);
@@ -508,6 +509,8 @@ static int do_domain_suspend(libxl_ctx *ctx, uint32_t 
domid, int fd, int flags,
 
     dss->domid = domid;
     dss->fd = fd;
+    dss->recv_fd = recv_fd;
+    dss->postcopy_transitioned = postcopy_resumed_remotely;
     dss->type = type;
     dss->live = flags & LIBXL_SUSPEND_LIVE;
     dss->debug = flags & LIBXL_SUSPEND_DEBUG;
@@ -532,18 +535,26 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, 
int fd, int flags,
 {
     return do_domain_suspend(ctx, domid, fd, flags,
                              LIBXL_LM_PRECOPY_ITERATIONS_DEFAULT,
-                             LIBXL_LM_DIRTY_THRESHOLD_DEFAULT, ao_how);
+                             LIBXL_LM_DIRTY_THRESHOLD_DEFAULT, -1,
+                             NULL, ao_how);
 }
 
-int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int fd, int 
flags,
-                              unsigned int precopy_iterations,
-                              unsigned int precopy_dirty_threshold,
+int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd,
+                              int flags, unsigned int precopy_iterations,
+                              unsigned int precopy_dirty_threshold, int 
recv_fd,
+                              bool *postcopy_transitioned,
                               const libxl_asyncop_how *ao_how)
 {
+    if (!postcopy_transitioned) {
+        errno = EINVAL;
+        return -1;
+    }
+
     flags |= LIBXL_SUSPEND_LIVE;
 
-    return do_domain_suspend(ctx, domid, fd, flags, precopy_iterations,
-                             precopy_dirty_threshold, ao_how);
+    return do_domain_suspend(ctx, domid, send_fd, flags, precopy_iterations,
+                             precopy_dirty_threshold, recv_fd,
+                             postcopy_transitioned, ao_how);
 }
 
 int libxl_domain_pause(libxl_ctx *ctx, uint32_t domid)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index c754706..ae272d7 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3194,17 +3194,25 @@ struct libxl__stream_write_state {
     void (*completion_callback)(libxl__egc *egc,
                                 libxl__stream_write_state *sws,
                                 int rc);
-    void (*checkpoint_callback)(libxl__egc *egc,
-                                libxl__stream_write_state *sws,
-                                int rc);
+    /* Checkpointing and postcopy live migration are mutually exclusive. */
+    union {
+        void (*checkpoint_callback)(libxl__egc *egc,
+                                    libxl__stream_write_state *sws,
+                                    int rc);
+        void (*postcopy_transition_callback)(libxl__egc *egc,
+                                             libxl__stream_write_state *sws,
+                                             int rc);
+    };
     /* Private */
     int rc;
     bool running;
     enum {
         SWS_PHASE_NORMAL,
         SWS_PHASE_CHECKPOINT,
-        SWS_PHASE_CHECKPOINT_STATE
+        SWS_PHASE_CHECKPOINT_STATE,
+        SWS_PHASE_POSTCOPY_TRANSITION
     } phase;
+    bool postcopy_transitioned;
     bool sync_teardown;  /* Only used to coordinate shutdown on error path. */
     libxl__save_helper_state shs;
 
@@ -3227,6 +3235,10 @@ _hidden void 
libxl__stream_write_init(libxl__stream_write_state *stream);
 _hidden void libxl__stream_write_start(libxl__egc *egc,
                                        libxl__stream_write_state *stream);
 _hidden void
+libxl__stream_write_start_postcopy_transition(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream);
+_hidden void
 libxl__stream_write_start_checkpoint(libxl__egc *egc,
                                      libxl__stream_write_state *stream);
 _hidden void
@@ -3290,6 +3302,7 @@ struct libxl__domain_save_state {
     int fd;
     int fdfl; /* original flags on fd */
     int recv_fd;
+    bool *postcopy_transitioned;
     libxl_domain_type type;
     int live;
     int debug;
diff --git a/tools/libxl/libxl_sr_stream_format.h 
b/tools/libxl/libxl_sr_stream_format.h
index 75f5190..a789126 100644
--- a/tools/libxl/libxl_sr_stream_format.h
+++ b/tools/libxl/libxl_sr_stream_format.h
@@ -31,12 +31,13 @@ typedef struct libxl__sr_rec_hdr
 /* All records must be aligned up to an 8 octet boundary */
 #define REC_ALIGN_ORDER              3U
 
-#define REC_TYPE_END                    0x00000000U
-#define REC_TYPE_LIBXC_CONTEXT          0x00000001U
-#define REC_TYPE_EMULATOR_XENSTORE_DATA 0x00000002U
-#define REC_TYPE_EMULATOR_CONTEXT       0x00000003U
-#define REC_TYPE_CHECKPOINT_END         0x00000004U
-#define REC_TYPE_CHECKPOINT_STATE       0x00000005U
+#define REC_TYPE_END                     0x00000000U
+#define REC_TYPE_LIBXC_CONTEXT           0x00000001U
+#define REC_TYPE_EMULATOR_XENSTORE_DATA  0x00000002U
+#define REC_TYPE_EMULATOR_CONTEXT        0x00000003U
+#define REC_TYPE_CHECKPOINT_END          0x00000004U
+#define REC_TYPE_CHECKPOINT_STATE        0x00000005U
+#define REC_TYPE_POSTCOPY_TRANSITION_END 0x00000006U
 
 typedef struct libxl__sr_emulator_hdr
 {
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index 8f2a1c9..1c4b1f1 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -22,6 +22,9 @@
  * Entry points from outside:
  *  - libxl__stream_write_start()
  *     - Start writing a stream from the start.
+ *  - libxl__stream_write_postcopy_transition()
+ *     - Write the records required to permit postcopy resumption at the
+ *       migration target.
  *  - libxl__stream_write_start_checkpoint()
  *     - Write the records which form a checkpoint into a stream.
  *
@@ -65,6 +68,9 @@ static void stream_complete(libxl__egc *egc,
                             libxl__stream_write_state *stream, int rc);
 static void stream_done(libxl__egc *egc,
                         libxl__stream_write_state *stream, int rc);
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *stream,
+                                     int rc);
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_write_state *stream,
                             int rc);
@@ -91,7 +97,9 @@ static void emulator_context_record_done(libxl__egc *egc,
                                          libxl__stream_write_state *stream);
 static void write_phase_end_record(libxl__egc *egc,
                                    libxl__stream_write_state *stream);
-
+static void postcopy_transition_end_record_done(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream);
 static void checkpoint_end_record_done(libxl__egc *egc,
                                        libxl__stream_write_state *stream);
 
@@ -211,6 +219,7 @@ void libxl__stream_write_init(libxl__stream_write_state 
*stream)
     stream->rc = 0;
     stream->running = false;
     stream->phase = SWS_PHASE_NORMAL;
+    stream->postcopy_transitioned = false;
     stream->sync_teardown = false;
     FILLZERO(stream->dc);
     stream->record_done_callback = NULL;
@@ -287,6 +296,22 @@ void libxl__stream_write_start(libxl__egc *egc,
     stream_complete(egc, stream, rc);
 }
 
+void libxl__stream_write_start_postcopy_transition(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream)
+{
+    libxl__domain_save_state *dss = stream->dss;
+
+    assert(stream->running);
+    assert(dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE);
+    assert(stream->phase == SWS_PHASE_NORMAL);
+    assert(!stream->postcopy_transitioned);
+
+    stream->phase = SWS_PHASE_POSTCOPY_TRANSITION;
+
+    write_emulator_xenstore_record(egc, stream);
+}
+
 void libxl__stream_write_start_checkpoint(libxl__egc *egc,
                                           libxl__stream_write_state *stream)
 {
@@ -369,7 +394,7 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void 
*dss_void,
      * If the stream is not still alive, we must not continue any work.
      */
     if (libxl__stream_write_inuse(stream)) {
-        if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE)
+        if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE) {
             /*
              * For remus, if libxl__xc_domain_save_done() completes,
              * there was an error sending data to the secondary.
@@ -377,8 +402,17 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void 
*dss_void,
              * return value (Please refer to libxl__remus_teardown())
              */
             stream_complete(egc, stream, 0);
-        else
+        } else if (stream->postcopy_transitioned) {
+            /*
+             * If, on the other hand, this is a normal migration that had a
+             * postcopy migration stage, we're completely done at this point 
and
+             * want to report any error received here to our caller.
+             */
+            assert(stream->phase == SWS_PHASE_NORMAL);
+            write_phase_end_record(egc, stream);
+        } else {
             write_emulator_xenstore_record(egc, stream);
+        }
     }
 }
 
@@ -550,6 +584,11 @@ static void write_phase_end_record(libxl__egc *egc,
         what     = "checkpoint end record";
         cb       = checkpoint_end_record_done;
         break;
+    case SWS_PHASE_POSTCOPY_TRANSITION:
+        rec.type = REC_TYPE_POSTCOPY_TRANSITION_END;
+        what     = "postcopy transition end record";
+        cb       = postcopy_transition_end_record_done;
+        break;
     default:
         /* SWS_PHASE_CHECKPOINT_STATE has no end record */
         assert(false);
@@ -558,6 +597,13 @@ static void write_phase_end_record(libxl__egc *egc,
     setup_write(egc, stream, what, &rec, NULL, cb);
 }
 
+static void postcopy_transition_end_record_done(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream)
+{
+    postcopy_transition_done(egc, stream, 0);
+}
+
 static void checkpoint_end_record_done(libxl__egc *egc,
                                        libxl__stream_write_state *stream)
 {
@@ -600,6 +646,13 @@ static void stream_complete(libxl__egc *egc,
          */
         checkpoint_state_done(egc, stream, rc);
         break;
+    case SWS_PHASE_POSTCOPY_TRANSITION:
+        /*
+         * To deal with errors during the postcopy transition, we use the same
+         * strategy as during checkpoints.
+         */
+        postcopy_transition_done(egc, stream, rc);
+        break;
     }
 }
 
@@ -627,6 +680,16 @@ static void stream_done(libxl__egc *egc,
     }
 }
 
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *stream,
+                                     int rc)
+{
+    assert(stream->phase == SWS_PHASE_POSTCOPY_TRANSITION);
+    stream->postcopy_transitioned = true;
+    stream->phase = SWS_PHASE_NORMAL;
+    stream->postcopy_transition_callback(egc, stream, rc);
+}
+
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_write_state *stream,
                             int rc)
diff --git a/tools/xl/xl_migrate.c b/tools/xl/xl_migrate.c
index 1bb3fb4..1ffc32b 100644
--- a/tools/xl/xl_migrate.c
+++ b/tools/xl/xl_migrate.c
@@ -188,6 +188,7 @@ static void migrate_domain(uint32_t domid, const char 
*rune, int debug,
     char rc_buf;
     uint8_t *config_data;
     int config_len, flags = LIBXL_SUSPEND_LIVE;
+    bool postcopy_transitioned;
 
     save_domain_core_begin(domid, override_config_file,
                            &config_data, &config_len);
@@ -209,7 +210,9 @@ static void migrate_domain(uint32_t domid, const char 
*rune, int debug,
         flags |= LIBXL_SUSPEND_DEBUG;
     rc = libxl_domain_live_migrate(ctx, domid, send_fd, flags,
                                    precopy_iterations, precopy_dirty_threshold,
-                                   NULL);
+                                   recv_fd, &postcopy_transitioned, NULL);
+    assert(!postcopy_transitioned);
+
     if (rc) {
         fprintf(stderr, "migration sender: libxl_domain_suspend failed"
                 " (rc=%d)\n", rc);
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.