[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v8 --for 4.6 COLO 19/25] COLO: use qemu block replication



From: Wen Congyang <wency@xxxxxxxxxxxxxx>

Use qemu block replication as our block replication solution.
Note that guest must be paused before starting COLO, otherwise,
the disk won't be consistent between primary and secondary.

Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
for commit message,
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
---
 tools/libxl/Makefile             |   1 +
 tools/libxl/libxl_colo_qdisk.c   | 209 +++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_colo_restore.c |  20 +++-
 tools/libxl/libxl_colo_save.c    |  36 ++++++-
 tools/libxl/libxl_internal.h     |  18 ++++
 tools/libxl/libxl_qmp.c          |  31 ++++++
 6 files changed, 311 insertions(+), 4 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_qdisk.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 71bf7a2..e91ae79 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -64,6 +64,7 @@ endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
+LIBXL_OBJS-y += libxl_colo_qdisk.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c
new file mode 100644
index 0000000..d73572e
--- /dev/null
+++ b/tools/libxl/libxl_colo_qdisk.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+typedef struct libxl__colo_qdisk {
+    libxl__checkpoint_device *dev;
+} libxl__colo_qdisk;
+
+/* ========== init() and cleanup() ========== */
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+    /*
+     * We don't know if we use qemu block replication, so
+     * we cannot start block replication here.
+     */
+    return 0;
+}
+
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* ========== setup() and teardown() ========== */
+static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev,
+                             bool primary)
+{
+    const libxl_device_disk *disk = dev->backend_dev;
+    const char *addr = NULL;
+    const char *export_name;
+    int ret, rc = 0;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const char *colo_params = disk->colo_params;
+    const int domid = cds->domid;
+
+    EGC_GC;
+
+    if (disk->backend != LIBXL_DISK_BACKEND_QDISK ||
+        !libxl_defbool_val(disk->colo_enable)) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    export_name = strstr(colo_params, ":exportname=");
+    if (!export_name) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+    export_name += strlen(":exportname=");
+    if (export_name[0] == 0) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    dev->matched = 1;
+
+    if (primary) {
+        /* NBD server is not ready, so we cannot start block replication now */
+        goto out;
+    } else {
+        libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+        int len;
+
+        if (crs->qdisk_setuped)
+            goto out;
+
+        crs->qdisk_setuped = true;
+
+        len = export_name - strlen(":exportname=") - colo_params;
+        addr = libxl__strndup(gc, colo_params, len);
+    }
+
+    ret = libxl__qmp_block_start_replication(gc, domid, primary, addr);
+    if (ret)
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev,
+                                bool primary)
+{
+    int ret, rc = 0;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const int domid = cds->domid;
+
+    EGC_GC;
+
+    if (primary) {
+        libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+
+        if (!css->qdisk_setuped)
+            goto out;
+
+        css->qdisk_setuped = false;
+    } else {
+        libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+
+        if (!crs->qdisk_setuped)
+            goto out;
+
+        crs->qdisk_setuped = false;
+    }
+
+    ret = libxl__qmp_block_stop_replication(gc, domid, primary);
+    if (ret)
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ========== checkpointing APIs ========== */
+/* should be called after libxl__checkpoint_device_instance_ops.preresume */
+int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid)
+{
+    GC_INIT(ctx);
+    int ret;
+
+    ret = libxl__qmp_block_do_checkpoint(gc, domid);
+
+    GC_FREE;
+    return ret;
+}
+
+static void colo_qdisk_save_preresume(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(dev->cds, *css, cds);
+    int ret, rc = 0;
+
+    /* Convenience aliases */
+    const int domid = dev->cds->domid;
+
+    EGC_GC;
+
+    if (css->qdisk_setuped)
+        goto out;
+
+    css->qdisk_setuped = true;
+
+    ret = libxl__qmp_block_start_replication(gc, domid, true, NULL);
+    if (ret)
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ======== primary ======== */
+static void colo_qdisk_save_setup(libxl__egc *egc,
+                                  libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, true);
+}
+
+static void colo_qdisk_save_teardown(libxl__egc *egc,
+                                   libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, true);
+}
+
+const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_save_setup,
+    .teardown = colo_qdisk_save_teardown,
+    .preresume = colo_qdisk_save_preresume,
+};
+
+/* ======== secondary ======== */
+static void colo_qdisk_restore_setup(libxl__egc *egc,
+                                     libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, false);
+}
+
+static void colo_qdisk_restore_teardown(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, false);
+}
+
+const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_restore_setup,
+    .teardown = colo_qdisk_restore_teardown,
+};
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 99f06ab..96ea0b9 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -49,7 +49,10 @@ static void 
libxl__colo_restore_domain_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_should_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_suspend_callback(void *data);
 
+extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+    &colo_restore_device_qdisk,
     NULL,
 };
 
@@ -148,7 +151,11 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc)  goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -156,6 +163,8 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 
@@ -215,6 +224,7 @@ void libxl__colo_restore_setup(libxl__egc *egc,
     GCNEW(crcs);
     crs->crcs = crcs;
     crcs->crs = crs;
+    crs->qdisk_setuped = false;
 
     /* setup dsps */
     crcs->dsps.ao = ao;
@@ -519,6 +529,12 @@ static void colo_restore_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    rc = colo_qdisk_preresume(CTX, crs->domid);
+    if (rc) {
+        LOG(ERROR, "colo_qdisk_preresume() fails");
+        goto out;
+    }
+
     colo_restore_resume_vm(egc, crcs);
 
     return;
@@ -674,8 +690,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc,
 
     STATE_AO_GC(crs->ao);
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->callback = colo_restore_setup_cds_done;
     cds->ao = ao;
     cds->domid = crs->domid;
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index f0ab565..1245da7 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -19,7 +19,10 @@
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+    &colo_save_device_qdisk,
     NULL,
 };
 
@@ -30,7 +33,11 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc) goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -38,6 +45,8 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 /* ================= colo: setup save environment ================= */
@@ -65,9 +74,11 @@ void libxl__colo_save_setup(libxl__egc *egc, 
libxl__colo_save_state *css)
     css->send_fd = dss->fd;
     css->recv_fd = dss->recv_fd;
     css->svm_running = false;
+    css->paused = true;
+    css->qdisk_setuped = false;
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->ops = colo_ops;
     cds->callback = colo_save_setup_done;
     cds->ao = ao;
@@ -391,12 +402,33 @@ static void colo_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    if (!css->paused) {
+        rc = colo_qdisk_preresume(CTX, dss->domid);
+        if (rc) {
+            LOG(ERROR, "colo_qdisk_preresume() fails");
+            goto out;
+        }
+    }
+
     /* Resumes the domain and the device model */
     if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
         LOG(ERROR, "cannot resume primary vm");
         goto out;
     }
 
+    /*
+     * The guest should be paused before doing colo because there is
+     * no disk migration.
+     */
+    if (css->paused) {
+        rc = libxl_domain_unpause(CTX, dss->domid);
+        if (rc) {
+            LOG(ERROR, "cannot unpause primary vm");
+            goto out;
+        }
+        css->paused = false;
+    }
+
     /* read COLO_SVM_RESUMED */
     css->callback = colo_read_svm_resumed_done;
     css->srs.checkpoint_callback = colo_common_read_stream_done;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index c429852..898e42c 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1710,6 +1710,14 @@ _hidden int libxl__qmp_set_global_dirty_log(libxl__gc 
*gc, int domid, bool enabl
 _hidden int libxl__qmp_insert_cdrom(libxl__gc *gc, int domid, const 
libxl_device_disk *disk);
 /* Add a virtual CPU */
 _hidden int libxl__qmp_cpu_add(libxl__gc *gc, int domid, int index);
+/* Start block replication */
+_hidden int libxl__qmp_block_start_replication(libxl__gc *gc, int domid,
+                                               bool primary, const char *addr);
+/* Do block checkpoint */
+_hidden int libxl__qmp_block_do_checkpoint(libxl__gc *gc, int domid);
+/* Stop block replication */
+_hidden int libxl__qmp_block_stop_replication(libxl__gc *gc, int domid,
+                                              bool primary);
 /* close and free the QMP handler */
 _hidden void libxl__qmp_close(libxl__qmp_handler *qmp);
 /* remove the socket file, if the file has already been removed,
@@ -2825,6 +2833,9 @@ int init_subkind_nic(libxl__checkpoint_devices_state 
*cds);
 void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds);
 int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds);
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds);
+int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid);
 
 typedef void libxl__checkpoint_callback(libxl__egc *,
                                         libxl__checkpoint_devices_state *,
@@ -3044,6 +3055,10 @@ struct libxl__colo_save_state {
     libxl__stream_read_state srs;
     void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
     bool svm_running;
+    bool paused;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_setuped;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3441,6 +3456,9 @@ struct libxl__colo_restore_state {
     libxl__domain_create_cb *saved_cb;
     void *crcs;
     libxl__checkpoint_devices_state cds;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_setuped;
 };
 
 struct libxl__domain_create_state {
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 080cb9f..a8e7a8f 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -977,6 +977,37 @@ int libxl__qmp_cpu_add(libxl__gc *gc, int domid, int idx)
     return qmp_run_command(gc, domid, "cpu-add", args, NULL, NULL);
 }
 
+int libxl__qmp_block_start_replication(libxl__gc *gc, int domid,
+                                       bool primary, const char *addr)
+{
+    libxl__json_object *args = NULL;
+
+    qmp_parameters_add_bool(gc, &args, "enable", true);
+    qmp_parameters_add_bool(gc, &args, "primary", primary);
+    if (!primary)
+        qmp_parameters_add_string(gc, &args, "addr", addr);
+
+    return qmp_run_command(gc, domid, "xen-set-block-replication", args,
+                           NULL, NULL);
+}
+
+int libxl__qmp_block_do_checkpoint(libxl__gc *gc, int domid)
+{
+    return qmp_run_command(gc, domid, "xen-do-block-checkpoint", NULL,
+                           NULL, NULL);
+}
+
+int libxl__qmp_block_stop_replication(libxl__gc *gc, int domid, bool primary)
+{
+    libxl__json_object *args = NULL;
+
+    qmp_parameters_add_bool(gc, &args, "enable", false);
+    qmp_parameters_add_bool(gc, &args, "primary", primary);
+
+    return qmp_run_command(gc, domid, "xen-set-block-replication", args,
+                           NULL, NULL);
+}
+
 int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid,
                                const libxl_domain_config *guest_config)
 {
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.