[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC v2 8/9] HACK libxl_exec: Check QEMU status via QMP instead of xenstore



When QEMU is restricted, the qemu on the receiving side cann't write
anything to xenstore once the migration is started. So it cann't tell
libxl that it is ready to continue running the guest.

In order to find out if QEMU is ready, we can issue QMP commands and
check if it respond.

But there is no QMP socket to connect to before qemu is started. But we
can uses different facility from qemu in order to setup some kind of
callback before starting QEMU. For that, we open a file descriptor and
give it to qemu.

This patch creates a pipe, give the write-end to qemu, and wait for
something to be written to it. (We could check if it is actually the QMP
greeting message.)

QEMU is asked to setup a QMP server on this pipe, but even if it is a
one-way only, qemu will write the QMP greeting message to the pipe.
This is done with:
-add-fd, to create a fdset which is use later.
-chardev 'file,path=/dev/fdset/1,append=true', this open a char device
on the write-end of the pipe, tell qemu that the FD is write-only, and
not to run truncate on it.
-mon, just start the QMP server on this new chardev.

With that, qemu will start the QMP server on the write-only fd, which is
enough to have the QMP greeting message. At this point, the QMP socket
is ready, and I think qemu is in the main-loop and ready to start the
emulation and respond to QMP commands.

This patch calls 'query-status', any response to that without error
means that QEMU is ready. If the status is "running", QEMU would already
have written the xenstore node if it could and is doing emulation.
(Any subsequent QMP command 'cont', libxl__qmp_resume(),  is not
changing anything.  If one adds '-S' to the QEMU command line, QEMU will
have the status "prelaunch" as a response to 'query-status', then QEMU
can be asked to start emulation with 'cont' via QMP.)

This patch copies most of "xswait" and call it "qmpwait". This is
probably not the best way forward due to duplication.

Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---
 tools/libxl/libxl_aoutils.c  | 156 +++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_dm.c       |  44 ++++++++--
 tools/libxl/libxl_exec.c     |  28 +++++--
 tools/libxl/libxl_internal.h |  30 +++++++
 4 files changed, 246 insertions(+), 12 deletions(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 9e493cd487..7d654996c3 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -626,6 +626,162 @@ void libxl__kill(libxl__gc *gc, pid_t pid, int sig, const 
char *what)
                 what, (unsigned long)pid, sig);
 }
 
+/*----- qmpwait -----*/
+
+static libxl__ev_fd_callback qmpwait_fd_read_callback;
+static libxl__ev_time_callback qmpwait_timeout_callback;
+static void qmpwait_report_error(libxl__egc*, libxl__qmpwait_state*, int rc);
+
+void libxl__qmpwait_init(libxl__qmpwait_state *qmpwa)
+{
+    libxl__ev_time_init(&qmpwa->time_ev);
+    libxl__ev_fd_init(&qmpwa->notify_efd);
+}
+
+void libxl__qmpwait_stop(libxl__gc *gc, libxl__qmpwait_state *qmpwa)
+{
+    libxl__ev_time_deregister(gc, &qmpwa->time_ev);
+    libxl__ev_fd_deregister(gc, &qmpwa->notify_efd);
+    libxl__carefd_close(qmpwa->notify_cfd);
+    qmpwa->notify_cfd = NULL;
+}
+
+bool libxl__qmpwait_inuse(const libxl__qmpwait_state *qmpwa)
+{
+    bool time_inuse = libxl__ev_time_isregistered(&qmpwa->time_ev);
+    bool watch_inuse = libxl__ev_fd_isregistered(&qmpwa->notify_efd);
+    assert(time_inuse == watch_inuse);
+    return time_inuse;
+}
+
+int libxl__qmpwait_start(libxl__gc *gc, libxl__qmpwait_state *qmpwa)
+{
+    int rc;
+
+    rc = libxl__ev_time_register_rel(qmpwa->ao, &qmpwa->time_ev,
+                                     qmpwait_timeout_callback, 
qmpwa->timeout_ms);
+    if (rc) goto err;
+
+    rc = libxl__ev_fd_register(gc, &qmpwa->notify_efd,
+                               qmpwait_fd_read_callback,
+                               libxl__carefd_fd(qmpwa->notify_cfd),
+                               POLLIN);
+    if (rc) goto err;
+
+    return 0;
+
+ err:
+    libxl__qmpwait_stop(gc, qmpwa);
+    return rc;
+}
+
+static void qmpwait_fd_read_callback(libxl__egc *egc, libxl__ev_fd *ev,
+                                     int fd, short events, short revents)
+{
+    // checkout:
+    // - datacopier_readable
+    // - watchfd_callback
+
+    libxl__qmpwait_state *qmpwa = CONTAINER_OF(ev, *qmpwa, notify_efd);
+    libxl__spawn_state *ss = CONTAINER_OF(ev, *ss, qmpwait.notify_efd);
+    STATE_AO_GC(qmpwa->ao);
+
+    // need to handle POLLERR
+    // POLLHUP and POLLIN might be both set.
+    if (revents & POLLHUP && !(revents & POLLIN)) {
+        LOG(DEBUG, "received POLLHUP on fd %d: read for %s",
+            fd, qmpwa->what);
+        libxl__ev_fd_deregister(gc, &qmpwa->notify_efd);
+        if (!qmpwa->life_time_read)
+            ss->failure_cb(egc, ss, ERROR_FAIL);
+        return;
+    }
+    if (revents & ~(POLLIN|POLLHUP)) {
+        LOG(ERROR, "unexpected poll event 0x%x on fd %d (expected POLLIN "
+            "and/or POLLHUP) reading %s",
+            revents, fd, qmpwa->what);
+        libxl__ev_fd_deregister(gc, &qmpwa->notify_efd);
+        return;
+    }
+    for (;;) {
+        int r;
+        char buffer[128];
+
+
+        r = read(fd, buffer, sizeof(buffer) - 1);
+        if (r < 0) {
+            if (errno == EINTR) continue;
+            assert(errno);
+            if (errno == EWOULDBLOCK) {
+                break;
+            }
+            LOGE(ERROR, "error reading %s", qmpwa->what);
+            /*datacopier_callback(egc, dc, ERROR_FAIL, 0, errno);*/
+            return;
+        }
+        if (r == 0) {
+            LOG(ERROR, "eof maybe on fd %d for %s",
+                fd, qmpwa->what);
+            return;
+        }
+        LOG(DEBUG, "have read %d from fd %d for %s",
+            r, fd, qmpwa->what);
+        buffer[r] = '\0';
+        LOG(DEBUG, "read: '%s'", buffer);
+        if (!qmpwa->life_time_read) {
+            qmpwa->life_time_read++;
+
+            int rc;
+            libxl__dm_spawn_state *dmss = CONTAINER_OF(ss, *dmss, spawn);
+
+            LOGD(DEBUG, dmss->guest_domid, "will query status via qmp");
+            rc = libxl__qmp_query_status(gc, dmss->guest_domid, "running");
+            if (rc) {
+                LOGD(DEBUG, dmss->guest_domid, "query status failed with: %d", 
rc);
+
+                if (rc == ERROR_NOT_READY){
+                    LOGD(DEBUG, dmss->guest_domid, "retry for prelaunch 
state");
+                    rc = libxl__qmp_query_status(gc, dmss->guest_domid, 
"prelaunch");
+                }
+            }
+            if (!rc)
+                libxl__spawn_initiate_detach(gc, ss);
+        }
+        // end of a QMP result
+        if (strstr(buffer, "\r\n")) {
+            libxl__dm_spawn_state *dmss = CONTAINER_OF(ss, *dmss, spawn);
+            LOGD(DEBUG, dmss->guest_domid, "have a full QMP line, close fd");
+            libxl__ev_fd_deregister(gc, &qmpwa->notify_efd);
+            libxl__carefd_close(qmpwa->notify_cfd);
+            qmpwa->notify_cfd = NULL;
+        }
+        break;
+    }
+}
+
+void qmpwait_timeout_callback(libxl__egc *egc, libxl__ev_time *ev,
+                             const struct timeval *requested_abs,
+                             int rc)
+{
+    EGC_GC;
+    libxl__qmpwait_state *qmpwa = CONTAINER_OF(ev, *qmpwa, time_ev);
+    LOG(DEBUG, "%s: qmpwait timeout", qmpwa->what);
+    qmpwait_report_error(egc, qmpwa, rc);
+}
+
+static void qmpwait_report_error(libxl__egc *egc, libxl__qmpwait_state *qmpwa,
+                                int rc)
+{
+    EGC_GC;
+    libxl__qmpwait_stop(gc, qmpwa);
+    /*qmpwa->callback(egc, qmpwa, rc, 0);*/
+    {
+        // reuse the xswait callback for the error handling.
+        libxl__spawn_state *ss = CONTAINER_OF(qmpwa, *ss, qmpwait);
+        qmpwa->callback(egc, &ss->xswait, rc, 0);
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index b662395b2e..772350abd4 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -926,7 +926,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
                                         const libxl_domain_config 
*guest_config,
                                         char ***args, char ***envs,
                                         const libxl__domain_build_state *state,
-                                        int *dm_state_fd)
+                                        int *dm_state_fd, int dm_notify_fd)
 {
     const libxl_domain_create_info *c_info = &guest_config->c_info;
     const libxl_domain_build_info *b_info = &guest_config->b_info;
@@ -973,6 +973,20 @@ static int libxl__build_device_model_args_new(libxl__gc 
*gc,
     flexarray_append(dm_args, "-mon");
     flexarray_append(dm_args, "chardev=libxenstat-cmd,mode=control");
 
+    /* Monitor used at boot time */
+    if (dm_notify_fd >= 0) {
+        flexarray_vappend(dm_args,
+                          "-add-fd", GCSPRINTF("fd=%d,set=1", dm_notify_fd),
+                          // can't be a pipe, because pipe would needs to be 
r/w
+                          // but it's write only
+                          // append is because otherwise qemu will attempt to
+                          // truncate the fd
+                          "-chardev",
+                          "file,id=notify,path=/dev/fdset/1,append=on",
+                          "-mon", "chardev=notify,mode=control",
+                          NULL);
+    }
+
     for (i = 0; i < guest_config->num_channels; i++) {
         connection = guest_config->channels[i].connection;
         devid = guest_config->channels[i].devid;
@@ -1735,7 +1749,8 @@ static int libxl__build_device_model_args(libxl__gc *gc,
                                         const libxl_domain_config 
*guest_config,
                                         char ***args, char ***envs,
                                         const libxl__domain_build_state *state,
-                                        int *dm_state_fd)
+                                        int *dm_state_fd,
+                                        int dm_notify_fd)
 /* dm_state_fd may be NULL iff caller knows we are using old stubdom
  * and therefore will be passing a filename rather than a fd. */
 {
@@ -1751,7 +1766,7 @@ static int libxl__build_device_model_args(libxl__gc *gc,
         return libxl__build_device_model_args_new(gc, dm,
                                                   guest_domid, guest_config,
                                                   args, envs,
-                                                  state, dm_state_fd);
+                                                  state, dm_state_fd, 
dm_notify_fd);
     default:
         LOGED(ERROR, guest_domid, "unknown device model version %d",
               guest_config->b_info.device_model_version);
@@ -1971,7 +1986,7 @@ void libxl__spawn_stub_dm(libxl__egc *egc, 
libxl__stub_dm_spawn_state *sdss)
 
     ret = libxl__build_device_model_args(gc, "stubdom-dm", guest_domid,
                                          guest_config, &args, NULL,
-                                         d_state, NULL);
+                                         d_state, NULL, -1);
     if (ret) {
         ret = ERROR_FAIL;
         goto out;
@@ -2257,6 +2272,8 @@ void libxl__spawn_local_dm(libxl__egc *egc, 
libxl__dm_spawn_state *dmss)
     char **pass_stuff;
     const char *dm;
     int dm_state_fd = -1;
+    int dm_notify_fd[2] = { -1, -1 };
+    libxl__carefd *cf_read_end;
 
     if (libxl_defbool_val(b_info->device_model_stubdomain)) {
         abort();
@@ -2272,9 +2289,20 @@ void libxl__spawn_local_dm(libxl__egc *egc, 
libxl__dm_spawn_state *dmss)
         rc = ERROR_FAIL;
         goto out;
     }
+    if (b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
+        libxl__carefd_begin();
+        rc = libxl_pipe(ctx, dm_notify_fd);
+        cf_read_end = libxl__carefd_opened(ctx, dm_notify_fd[0]);
+        if (rc)
+            goto out;
+        rc = libxl_fd_set_nonblock(CTX, libxl__carefd_fd(cf_read_end), 1);
+        if (rc)
+            goto out;
+    }
     rc = libxl__build_device_model_args(gc, dm, domid, guest_config,
                                           &args, &envs, state,
-                                          &dm_state_fd);
+                                          &dm_state_fd,
+                                          dm_notify_fd[1]);
     if (rc)
         goto out;
 
@@ -2357,6 +2385,9 @@ retry_transaction:
     spawn->failure_cb = device_model_startup_failed;
     spawn->detached_cb = device_model_detached;
 
+    LOGD(DEBUG, domid, "Using QMP instead of xenstore for QEMU startup 
notification");
+    spawn->qmpwait.notify_cfd = cf_read_end;
+
     rc = libxl__spawn_spawn(egc, spawn);
     if (rc < 0)
         goto out_close;
@@ -2372,6 +2403,9 @@ out_close:
     if (logfile_w >= 0) close(logfile_w);
 out:
     if (dm_state_fd >= 0) close(dm_state_fd);
+    if (rc && cf_read_end)
+        libxl__carefd_close(cf_read_end);
+    if (dm_notify_fd[1] >= 0) close(dm_notify_fd[1]);
     if (rc)
         device_model_spawn_outcome(egc, dmss, rc);
 }
diff --git a/tools/libxl/libxl_exec.c b/tools/libxl/libxl_exec.c
index 02e6c917f0..165b6e8410 100644
--- a/tools/libxl/libxl_exec.c
+++ b/tools/libxl/libxl_exec.c
@@ -272,6 +272,7 @@ void libxl__spawn_init(libxl__spawn_state *ss)
 {
     libxl__ev_child_init(&ss->mid);
     libxl__xswait_init(&ss->xswait);
+    libxl__qmpwait_init(&ss->qmpwait);
 }
 
 int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss)
@@ -284,13 +285,24 @@ int libxl__spawn_spawn(libxl__egc *egc, 
libxl__spawn_state *ss)
     libxl__spawn_init(ss);
     ss->rc = ss->detaching = 0;
 
-    ss->xswait.ao = ao;
-    ss->xswait.what = GCSPRINTF("%s startup", ss->what);
-    ss->xswait.path = ss->xspath;
-    ss->xswait.timeout_ms = ss->timeout_ms;
-    ss->xswait.callback = spawn_watch_event;
-    rc = libxl__xswait_start(gc, &ss->xswait);
-    if (rc) goto out_err;
+    if (ss->qmpwait.notify_cfd) {
+        ss->qmpwait.ao = ao;
+        ss->qmpwait.what = GCSPRINTF("%s startup (QMP)", ss->what);
+        ss->qmpwait.timeout_ms = ss->timeout_ms;
+        ss->qmpwait.callback = spawn_watch_event;
+
+        rc = libxl__qmpwait_start(gc, &ss->qmpwait);
+        if (rc) goto out_err;
+
+    } else {
+        ss->xswait.ao = ao;
+        ss->xswait.what = GCSPRINTF("%s startup", ss->what);
+        ss->xswait.path = ss->xspath;
+        ss->xswait.timeout_ms = ss->timeout_ms;
+        ss->xswait.callback = spawn_watch_event;
+        rc = libxl__xswait_start(gc, &ss->xswait);
+        if (rc) goto out_err;
+    }
 
     pid_t middle = libxl__ev_child_fork(gc, &ss->mid, spawn_middle_death);
     if (middle ==-1) { rc = ERROR_FAIL; goto out_err; }
@@ -347,6 +359,7 @@ static void spawn_cleanup(libxl__gc *gc, libxl__spawn_state 
*ss)
 {
     assert(!libxl__ev_child_inuse(&ss->mid));
     libxl__xswait_stop(gc, &ss->xswait);
+    libxl__qmpwait_stop(gc, &ss->qmpwait);
 }
 
 static void spawn_detach(libxl__gc *gc, libxl__spawn_state *ss)
@@ -359,6 +372,7 @@ static void spawn_detach(libxl__gc *gc, libxl__spawn_state 
*ss)
     assert(libxl__ev_child_inuse(&ss->mid));
     assert(ss->detaching || ss->rc);
     libxl__xswait_stop(gc, &ss->xswait);
+    libxl__qmpwait_stop(gc, &ss->qmpwait);
 
     pid_t child = ss->mid.pid;
     r = kill(child, SIGKILL);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 3764d26463..aa78b8e369 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -201,6 +201,7 @@ typedef struct libxl__ao libxl__ao;
 typedef struct libxl__aop_occurred libxl__aop_occurred;
 typedef struct libxl__osevent_hook_nexus libxl__osevent_hook_nexus;
 typedef struct libxl__osevent_hook_nexi libxl__osevent_hook_nexi;
+typedef struct libxl__carefd libxl__carefd;
 
 typedef struct libxl__domain_create_state libxl__domain_create_state;
 typedef void libxl__domain_create_cb(struct libxl__egc *egc,
@@ -1328,6 +1329,34 @@ bool libxl__xswait_inuse(const libxl__xswait_state *ss);
 
 int libxl__xswait_start(libxl__gc*, libxl__xswait_state*);
 
+/*-------- qmpwait: wait for QEMU to start QMP on a pipe -------*/
+
+typedef struct libxl__qmpwait_state  libxl__qmpwait_state;
+
+struct libxl__qmpwait_state {
+    /* caller must fill these in, and they must all remain valid */
+    libxl__ao *ao;
+    const char *what; /* for error msgs: noun phrase, what we're waiting for */
+    int timeout_ms; /* as for poll(2) */
+    /* read-end of a pipe where QEMU is going to write once QMP is available */
+    libxl__carefd *notify_cfd;
+    /* remaining fields are private to qmpwait */
+    libxl__ev_time time_ev;
+    libxl__ev_fd notify_efd;
+    // until better implementation
+    libxl__xswait_callback *callback;
+
+    /* Used to find out if something has been written to the pipe */
+    int life_time_read;
+};
+
+void libxl__qmpwait_init(libxl__qmpwait_state*);
+void libxl__qmpwait_stop(libxl__gc*, libxl__qmpwait_state*); /*idempotent*/
+bool libxl__qmpwait_inuse(const libxl__qmpwait_state *ss);
+
+int libxl__qmpwait_start(libxl__gc*, libxl__qmpwait_state*);
+
+
 /*
  * libxl__ev_devstate - waits a given time for a device to
  * reach a given state.  Follows the libxl_ev_* conventions.
@@ -1568,6 +1597,7 @@ struct libxl__spawn_state {
     int rc; /* might be non-0 whenever we are not Idle */
     libxl__ev_child mid; /* always in use whenever we are not Idle */
     libxl__xswait_state xswait;
+    libxl__qmpwait_state qmpwait;
 };
 
 static inline int libxl__spawn_inuse(const libxl__spawn_state *ss)
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.