[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 08/15] libxl: wait for qemu to acknowledge logdirty command



The current migration code in libxl instructs qemu to start or stop
logdirty, but it does not wait for an acknowledgement from qemu before
continuing.  This might lead to memory corruption (!)

Fix this by waiting for qemu to acknowledge the command.

Unfortunately the necessary ao arrangements for waiting for this
command are unique because qemu has a special protocol for this
particular operation.

Also, this change means that the switch_qemu_logdirty callback
implementation in libxl can no longer synchronously produce its return
value, as it now needs to wait for xenstore.  So we tell the
marshalling code generator that it is a message which does not need a
reply.  This turns the callback function called by the marshaller into
one which returns void; the callback function arranges to later
explicitly sends the reply to the helper, when the xs watch triggers
and the appropriate value is read from xenstore.

Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
 tools/libxl/libxl_dom.c            |  176 +++++++++++++++++++++++++++++++++---
 tools/libxl/libxl_internal.h       |   18 ++++-
 tools/libxl/libxl_save_callout.c   |    8 ++
 tools/libxl/libxl_save_msgs_gen.pl |    7 +-
 4 files changed, 193 insertions(+), 16 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 3e89dd7..b13ed51 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -526,30 +526,180 @@ int libxl__toolstack_restore(uint32_t domid, const 
uint8_t *buf,
 static void domain_suspend_done(libxl__egc *egc,
                         libxl__domain_suspend_state *dss, int rc);
 
-/*----- callbacks, called by xc_domain_save -----*/
+/*----- complicated callback, called by xc_domain_save -----*/
+
+/*
+ * We implement the other end of protocol for controlling qemu-dm's
+ * logdirty.  There is no documentation for this protocol, but our
+ * counterparty's implementation is in
+ * qemu-xen-traditional.git:xenstore.c in the function
+ * xenstore_process_logdirty_event
+ */
+
+static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
+                                    const struct timeval *requested_abs);
+static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
+                            const char *watch_path, const char *event_path);
+static void switch_logdirty_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dss, int ok);
 
-int libxl__domain_suspend_common_switch_qemu_logdirty
+static void logdirty_init(libxl__logdirty_switch *lds)
+{
+    lds->cmd_path = 0;
+    libxl__ev_xswatch_init(&lds->watch);
+    libxl__ev_time_init(&lds->timeout);
+}
+
+void libxl__domain_suspend_common_switch_qemu_logdirty
                                (int domid, unsigned enable, void *user)
 {
     libxl__save_helper_state *shs = user;
+    libxl__egc *egc = shs->egc;
     libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+    libxl__logdirty_switch *lds = &dss->logdirty;
     STATE_AO_GC(dss->ao);
-    char *path;
-    bool rc;
+    int rc;
+    xs_transaction_t t = 0;
+    const char *got;
 
-    path = libxl__sprintf(gc,
+    if (!lds->cmd_path) {
+        lds->cmd_path = GCSPRINTF(
                    "/local/domain/0/device-model/%u/logdirty/cmd", domid);
-    if (!path)
-        return 1;
+        lds->ret_path = GCSPRINTF(
+                   "/local/domain/0/device-model/%u/logdirty/ret", domid);
+    }
+    lds->cmd = enable ? "enable" : "disable";
 
-    if (enable)
-        rc = xs_write(CTX->xsh, XBT_NULL, path, "enable", strlen("enable"));
-    else
-        rc = xs_write(CTX->xsh, XBT_NULL, path, "disable", strlen("disable"));
+    rc = libxl__ev_xswatch_register(gc, &lds->watch,
+                                switch_logdirty_xswatch, lds->ret_path);
+    if (rc) goto out;
+
+    rc = libxl__ev_time_register_rel(gc, &lds->timeout,
+                                switch_logdirty_timeout, 10*1000);
+    if (rc) goto out;
+
+    for (;;) {
+        rc = libxl__xs_transaction_start(gc, &t);
+        if (rc) goto out;
+
+        rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
+        if (rc) goto out;
+
+        if (got) {
+            const char *got_ret;
+            rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
+            if (rc) goto out;
 
-    return rc ? 0 : 1;
+            if (strcmp(got, got_ret)) {
+                LOG(ERROR,"controlling logdirty: qemu was already sent"
+                    " command `%s' (xenstore path `%s') but result is `%s'",
+                    got, lds->cmd_path, got_ret ? got_ret : "<none>");
+                rc = ERROR_FAIL;
+                goto out;
+            }
+            rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
+            if (rc) goto out;
+        }
+
+        rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
+        if (rc) goto out;
+
+        rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
+        if (rc) goto out;
+
+        rc = libxl__xs_transaction_commit(gc, &t);
+        if (!rc) break;
+        if (rc<0) goto out;
+    }
+
+    /* OK, wait for some callback */
+    return;
+
+ out:
+    LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
+    switch_logdirty_done(egc,dss,0);
+}
+
+static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
+                                    const struct timeval *requested_abs)
+{
+    libxl__domain_suspend_state *dss = CONTAINER_OF(ev, *dss, 
logdirty.timeout);
+    STATE_AO_GC(dss->ao);
+    LOG(ERROR,"logdirty switch: wait for device model timed out");
+    switch_logdirty_done(egc,dss,0);
 }
 
+static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
+                            const char *watch_path, const char *event_path)
+{
+    libxl__domain_suspend_state *dss =
+        CONTAINER_OF(watch, *dss, logdirty.watch);
+    libxl__logdirty_switch *lds = &dss->logdirty;
+    STATE_AO_GC(dss->ao);
+    const char *got;
+    xs_transaction_t t = 0;
+    int rc;
+
+    for (;;) {
+        rc = libxl__xs_transaction_start(gc, &t);
+        if (rc) goto out;
+
+        rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
+        if (rc) goto out;
+
+        if (!got) {
+            rc = +1;
+            goto out;
+        }
+
+        if (strcmp(got, lds->cmd)) {
+            LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
+                " (xenstore paths `%s' / `%s')", lds->cmd, got,
+                lds->cmd_path, lds->ret_path);
+            rc = ERROR_FAIL;
+            goto out;
+        }
+
+        rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
+        if (rc) goto out;
+
+        rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
+        if (rc) goto out;
+
+        rc = libxl__xs_transaction_commit(gc, &t); 
+        if (!rc) break;
+        if (rc<0) goto out;
+    }
+
+ out:
+    /* rc < 0: error
+     * rc == 0: ok, we are done
+     * rc == +1: need to keep waiting
+     */
+    libxl__xs_transaction_abort(gc, &t);
+
+    if (!rc) {
+        switch_logdirty_done(egc,dss,1);
+    } else if (rc < 0) {
+        LOG(ERROR,"logdirty switch: response read failed (rc=%d)",rc);
+        switch_logdirty_done(egc,dss,0);
+    }
+}
+
+static void switch_logdirty_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dss, int ok)
+{
+    STATE_AO_GC(dss->ao);
+    libxl__logdirty_switch *lds = &dss->logdirty;
+
+    libxl__ev_xswatch_deregister(gc, &lds->watch);
+    libxl__ev_time_deregister(gc, &lds->timeout);
+
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+/*----- callbacks, called by xc_domain_save -----*/
+
 int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -872,6 +1022,8 @@ void libxl__domain_suspend(libxl__egc *egc, 
libxl__domain_suspend_state *dss)
     libxl__srm_save_autogen_callbacks *const callbacks =
         &dss->shs.callbacks.save.a;
 
+    logdirty_init(&dss->logdirty);
+
     switch (type) {
     case LIBXL_DOMAIN_TYPE_HVM: {
         char *path;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index ee7f66a..8f4f45d 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1864,6 +1864,14 @@ typedef struct libxl__domain_suspend_state 
libxl__domain_suspend_state;
 typedef void libxl__domain_suspend_cb(libxl__egc*,
                                       libxl__domain_suspend_state*, int rc);
 
+typedef struct libxl__logdirty_switch {
+    const char *cmd;
+    const char *cmd_path;
+    const char *ret_path;
+    libxl__ev_xswatch watch;
+    libxl__ev_time timeout;
+} libxl__logdirty_switch;
+
 struct libxl__domain_suspend_state {
     /* set by caller of libxl__domain_suspend */
     libxl__ao *ao;
@@ -1882,6 +1890,7 @@ struct libxl__domain_suspend_state {
     int guest_responded;
     int interval; /* checkpoint interval (for Remus) */
     libxl__save_helper_state shs;
+    libxl__logdirty_switch logdirty;
 };
 
 
@@ -2013,8 +2022,15 @@ _hidden void libxl__xc_domain_save(libxl__egc*, 
libxl__domain_suspend_state*,
 _hidden void libxl__xc_domain_save_done(libxl__egc*, void *dss_void,
                                         int rc, int retval, int errnoval);
 
+/* Used by asynchronous callbacks: ie ones which xc regards as
+ * returning a value, but which we want to handle asynchronously.
+ * Such functions' actual callback function return void in libxl
+ * When they are ready to indicate completion, they call this. */
+void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
+                           libxl__save_helper_state *shs, int return_value);
+
 _hidden int libxl__domain_suspend_common_callback(void *data);
-_hidden int libxl__domain_suspend_common_switch_qemu_logdirty
+_hidden void libxl__domain_suspend_common_switch_qemu_logdirty
                                (int domid, unsigned int enable, void *data);
 _hidden int libxl__toolstack_save(uint32_t domid, uint8_t **buf,
         uint32_t *len, void *data);
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index ff7dfb6..4413e97 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -129,6 +129,14 @@ void libxl__xc_domain_save(libxl__egc *egc, 
libxl__domain_suspend_state *dss,
 }
 
 
+void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
+                           libxl__save_helper_state *shs, int return_value)
+{
+    shs->egc = egc;
+    libxl__srm_callout_sendreply(return_value, shs);
+    shs->egc = 0;
+}
+
 /*----- helper execution -----*/
 
 static void run_helper(libxl__egc *egc, libxl__save_helper_state *shs,
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index cd0837e..8832c46 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -14,6 +14,7 @@ our @msgs = (
     #   x  - function pointer is in struct {save,restore}_callbacks
     #         and its null-ness needs to be passed through to the helper's xc
     #   W  - needs a return value; callback is synchronous
+    #   A  - needs a return value; callback is asynchronous
     [  1, 'sr',     "log",                   [qw(uint32_t level
                                                  uint32_t errnoval
                                                  STRING context
@@ -25,7 +26,7 @@ our @msgs = (
     [  3, 'scxW',   "suspend", [] ],         
     [  4, 'scxW',   "postcopy", [] ],        
     [  5, 'scxW',   "checkpoint", [] ],      
-    [  6, 'scxW',   "switch_qemu_logdirty",  [qw(int domid
+    [  6, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
                                               unsigned enable)] ],
     #                toolstack_save          done entirely `by hand'
     [  7, 'rcxW',   "toolstack_restore",     [qw(uint32_t domid
@@ -260,7 +261,7 @@ foreach my $msginfo (@msgs) {
         $f_more_sr->("        int r;\n");
     }
 
-    my $c_rtype_helper = $flags =~ m/W/ ? 'int' : 'void';
+    my $c_rtype_helper = $flags =~ m/[WA]/ ? 'int' : 'void';
     my $c_rtype_callout = $flags =~ m/W/ ? 'int' : 'void';
     my $c_decl = '(';
     my $c_callback_args = '';
@@ -348,7 +349,7 @@ END
     assert(len == allocd);
     ${transmit}(buf, len, user);
 ");
-    if ($flags =~ m/W/) {
+    if ($flags =~ m/[WA]/) {
        f_more("${encode}_$name", (<<END.($debug ? <<END : '').<<END));
     int r = ${helper}_getreply(user);
 END
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.