|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 25/27] tools/libxl: Handle checkpoint records in a libxl migration v2 stream
This is the final bit of untangling for Remus.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
---
As before, Remus functionality is untested, but the new logic here should
handle failovers correctly. The patch has changed greatly from v1, both in a
functional sence, and because of the knockon effects from earlier changes.
---
tools/libxl/libxl_create.c | 27 +++++++++++
tools/libxl/libxl_internal.h | 8 ++++
tools/libxl/libxl_stream_read.c | 97 +++++++++++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+)
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 2a0063a..0325bf1 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -672,6 +672,29 @@ static int store_libxl_entry(libxl__gc *gc, uint32_t domid,
libxl_device_model_version_to_string(b_info->device_model_version));
}
+/*----- remus asynchronous checkpoint callback -----*/
+
+static void remus_checkpoint_stream_done(
+ libxl__egc *egc, libxl__stream_read_state *srs, int rc);
+
+static void libxl__remus_domain_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+ libxl__egc *egc = dcs->shs.egc;
+ STATE_AO_GC(dcs->ao);
+
+ libxl__stream_read_start_checkpoint(egc, &dcs->srs);
+}
+
+static void remus_checkpoint_stream_done(
+ libxl__egc *egc, libxl__stream_read_state *srs, int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, rc);
+}
+
/*----- main domain creation -----*/
/* We have a linear control flow; only one event callback is
@@ -939,6 +962,8 @@ static void domcreate_bootloader_done(libxl__egc *egc,
libxl_domain_config *const d_config = dcs->guest_config;
const int restore_fd = dcs->restore_fd;
libxl__domain_build_state *const state = &dcs->build_state;
+ libxl__srm_restore_autogen_callbacks *const callbacks =
+ &dcs->shs.callbacks.restore.a;
if (rc) {
domcreate_rebuild_done(egc, dcs, rc);
@@ -966,6 +991,7 @@ static void domcreate_bootloader_done(libxl__egc *egc,
}
/* Restore */
+ callbacks->checkpoint = libxl__remus_domain_checkpoint_callback;
rc = libxl__build_pre(gc, domid, d_config, state);
if (rc)
@@ -975,6 +1001,7 @@ static void domcreate_bootloader_done(libxl__egc *egc,
dcs->srs.fd = restore_fd;
dcs->srs.legacy = (dcs->restore_params.stream_version == 1);
dcs->srs.completion_callback = domcreate_stream_done;
+ dcs->srs.checkpoint_callback = remus_checkpoint_stream_done;
libxl__stream_read_start(egc, &dcs->srs);
return;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 84e22c2..1b62f25 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3332,9 +3332,13 @@ struct libxl__stream_read_state {
void (*completion_callback)(libxl__egc *egc,
libxl__stream_read_state *srs,
int rc);
+ void (*checkpoint_callback)(libxl__egc *egc,
+ libxl__stream_read_state *srs,
+ int rc);
/* Private */
int rc;
bool running;
+ bool in_checkpoint;
/* Active-stuff handling */
int joined_rc;
@@ -3349,6 +3353,8 @@ struct libxl__stream_read_state {
LIBXL_STAILQ_HEAD(, libxl__sr_record_buf) record_queue;
enum {
SRS_PHASE_NORMAL,
+ SRS_PHASE_BUFFERING,
+ SRS_PHASE_UNBUFFERING,
} phase;
bool recursion_guard;
@@ -3362,6 +3368,8 @@ _hidden void libxl__stream_read_start(libxl__egc *egc,
_hidden void libxl__stream_read_continue(libxl__egc *egc,
libxl__stream_read_state *stream);
+_hidden void libxl__stream_read_start_checkpoint(
+ libxl__egc *egc, libxl__stream_read_state *stream);
_hidden void libxl__stream_read_abort(libxl__egc *egc,
libxl__stream_read_state *stream, int
rc);
diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 81095cd..6cfa05c 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -123,6 +123,10 @@ static int setup_read(libxl__stream_read_state *stream,
return libxl__datacopier_start(dc);
}
+/* Error handling for checkpoint mini-loop. */
+static void checkpoint_done(libxl__egc *egc,
+ libxl__stream_read_state *stream, int rc);
+
void libxl__stream_read_start(libxl__egc *egc,
libxl__stream_read_state *stream)
{
@@ -186,6 +190,18 @@ void libxl__stream_read_start(libxl__egc *egc,
stream_failed(egc, stream, ret);
}
+void libxl__stream_read_start_checkpoint(libxl__egc *egc,
+ libxl__stream_read_state *stream)
+{
+ assert(stream->running);
+ assert(!stream->in_checkpoint);
+
+ stream->in_checkpoint = true;
+ stream->phase = SRS_PHASE_BUFFERING;
+
+ setup_read_record(egc, stream);
+}
+
void libxl__stream_read_abort(libxl__egc *egc,
libxl__stream_read_state *stream, int rc)
{
@@ -206,6 +222,16 @@ static void stream_failed(libxl__egc *egc,
stream->rc = rc;
if (stream->running) {
+
+ /*
+ * If we are in a checkpoint, pass the failure to libxc, which will
+ * come back around to us via libxl__xc_domain_restore_done().
+ */
+ if (stream->in_checkpoint) {
+ checkpoint_done(egc, stream, rc);
+ return;
+ }
+
stream_done(egc, stream);
}
}
@@ -215,6 +241,7 @@ static void stream_done(libxl__egc *egc,
{
libxl__sr_record_buf *rec, *trec;
+ assert(!stream->in_checkpoint);
assert(stream->running);
stream->running = false;
@@ -296,6 +323,8 @@ static void stream_continue(libxl__egc *egc,
* processing the record. At no point should there ever be two
* records in the queue.
*/
+ assert(!stream->in_checkpoint);
+
if (LIBXL_STAILQ_EMPTY(&stream->record_queue))
setup_read_record(egc, stream);
else {
@@ -304,6 +333,45 @@ static void stream_continue(libxl__egc *egc,
}
break;
+ case SRS_PHASE_BUFFERING: {
+ /*
+ * Buffer phase of a checkpoint in the stream. Collect records read
+ * from the stream without processing them. We need to peek at the
+ * tail to spot the CHECKPOINT_END record, and switch to the
+ * unbuffering phase.
+ */
+ libxl__sr_record_buf *rec = LIBXL_STAILQ_LAST(
+ &stream->record_queue, libxl__sr_record_buf, entry);
+
+ assert(stream->in_checkpoint);
+
+ if ( !rec || rec->hdr.type != REC_TYPE_CHECKPOINT_END ) {
+ setup_read_record(egc, stream);
+ break;
+ }
+
+ /*
+ * There are now some number of buffered records, with a
+ * CHECKPOINT_END at the end. Start processing them all.
+ */
+ stream->phase = SRS_PHASE_UNBUFFERING;
+ }
+ /* FALLTHROUGH */
+ case SRS_PHASE_UNBUFFERING:
+ /*
+ * Unbuffering phase of a checkpoint in the stream. Process all
+ * records collected during the buffering phase.
+ */
+ assert(stream->in_checkpoint);
+
+ while (process_record(egc, stream))
+ ; /*
+ * Nothing! process_record() helpfully tells us if no specific
+ * futher actions have been set up, in which case we want to go
+ * ahead and process the next record.
+ */
+ break;
+
default:
abort();
}
@@ -528,6 +596,15 @@ static bool process_record(libxl__egc *egc,
write_emulator_blob(egc, stream, rec);
break;
+ case REC_TYPE_CHECKPOINT_END:
+ if (!stream->in_checkpoint) {
+ LOG(ERROR, "Unexpected CHECKPOINT_END record in stream");
+ ret = ERROR_FAIL;
+ goto err;
+ }
+ checkpoint_done(egc, stream, 0);
+ break;
+
default:
LOG(ERROR, "Unrecognised record 0x%08x", rec->hdr.type);
ret = ERROR_FAIL;
@@ -625,6 +702,26 @@ static void write_emulator_done(libxl__egc *egc,
stream_failed(egc, stream, ret);
}
+static void checkpoint_done(libxl__egc *egc,
+ libxl__stream_read_state *stream, int rc)
+{
+ int ret;
+
+ assert(stream->in_checkpoint);
+
+ if (rc == 0)
+ ret = 0; /* Success */
+ else if (stream->phase == SRS_PHASE_BUFFERING)
+ ret = 2; /* Failover */
+ else
+ ret = 1; /* Error (fatal) */
+
+ stream->checkpoint_callback(egc, stream, ret);
+
+ stream->in_checkpoint = false;
+ stream->phase = SRS_PHASE_NORMAL;
+}
+
/*
* Local variables:
* mode: C
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |