[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 02/11] libxl: react correctly to bootloader pty master POLLHUP



Receive POLLHUP on the bootloader master pty is not an error.
Hopefully it means that the bootloader has exited and therefore the
pty slave side has no process group any more.  (At least NetBSD
indicates POLLHUP on the master in this case.)

So send the bootloader SIGTERM; if it has already exited then this has
no effect (except that on some versions of NetBSD it erroneously
returns ESRCH and we print a harmless warning) and we will then
collect the bootloader's exit status and be satisfied.

However, we remember that we have done this so that if we got POLLHUP
for some other reason than that the bootloader exited we report
something resembling a useful message.

In order to implement this we need to provide a way for users of
datacopier to handle POLLHUP rather than treating it as fatal.

We rename bootloader_abort to bootloader_stop since it now no longer
only applies to error situations.

Signed-off-by: Roger Pau Monne <roger.pau@xxxxxxxxxx>
Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>

-
Changes in v4:
 * Track whether we sent SIGTERM due to POLLHUP so we can report
   messages properly.

Changes in v3:
 * datacopier provides new interface for handling POLLHUP
 * Do not ignore errors on the xenconsole pty
 * Rename bootloader_abort.
---
 tools/libxl/libxl_aoutils.c    |   23 +++++++++++++++++++++++
 tools/libxl/libxl_bootloader.c |   39 +++++++++++++++++++++++++++++----------
 tools/libxl/libxl_internal.h   |    7 +++++--
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 99972a2..4bd5484 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -97,11 +97,31 @@ void libxl__datacopier_prefixdata(libxl__egc *egc, 
libxl__datacopier_state *dc,
     LIBXL_TAILQ_INSERT_TAIL(&dc->bufs, buf, entry);
 }
 
+static int datacopier_pollhup_handled(libxl__egc *egc,
+                                      libxl__datacopier_state *dc,
+                                      short revents, int onwrite)
+{
+    STATE_AO_GC(dc->ao);
+
+    if (dc->callback_pollhup && (revents & POLLHUP)) {
+        LOG(DEBUG, "received POLLHUP on %s during copy of %s",
+            onwrite ? dc->writewhat : dc->readwhat,
+            dc->copywhat);
+        libxl__datacopier_kill(dc);
+        dc->callback(egc, dc, onwrite, -1);
+        return 1;
+    }
+    return 0;
+}
+
 static void datacopier_readable(libxl__egc *egc, libxl__ev_fd *ev,
                                 int fd, short events, short revents) {
     libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, toread);
     STATE_AO_GC(dc->ao);
 
+    if (datacopier_pollhup_handled(egc, dc, revents, 0))
+        return;
+
     if (revents & ~POLLIN) {
         LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)"
             " on %s during copy of %s", revents, dc->readwhat, dc->copywhat);
@@ -163,6 +183,9 @@ static void datacopier_writable(libxl__egc *egc, 
libxl__ev_fd *ev,
     libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, towrite);
     STATE_AO_GC(dc->ao);
 
+    if (datacopier_pollhup_handled(egc, dc, revents, 1))
+        return;
+
     if (revents & ~POLLOUT) {
         LOG(ERROR, "unexpected poll event 0x%x (should be POLLOUT)"
             " on %s during copy of %s", revents, dc->writewhat, dc->copywhat);
diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index ef5a91b..bfc1b56 100644
--- a/tools/libxl/libxl_bootloader.c
+++ b/tools/libxl/libxl_bootloader.c
@@ -215,6 +215,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
     libxl__domaindeathcheck_init(&bl->deathcheck);
     bl->keystrokes.ao = bl->ao;  libxl__datacopier_init(&bl->keystrokes);
     bl->display.ao = bl->ao;     libxl__datacopier_init(&bl->display);
+    bl->got_pollhup = 0;
 }
 
 static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
@@ -275,7 +276,7 @@ static void bootloader_local_detached_cb(libxl__egc *egc,
 }
 
 /* might be called at any time, provided it's init'd */
-static void bootloader_abort(libxl__egc *egc,
+static void bootloader_stop(libxl__egc *egc,
                              libxl__bootloader_state *bl, int rc)
 {
     STATE_AO_GC(bl->ao);
@@ -285,8 +286,8 @@ static void bootloader_abort(libxl__egc *egc,
     libxl__datacopier_kill(&bl->display);
     if (libxl__ev_child_inuse(&bl->child)) {
         r = kill(bl->child.pid, SIGTERM);
-        if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
-                    (unsigned long)bl->child.pid);
+        if (r) LOGE(WARN, "%sfailed to kill bootloader [%lu]",
+                    rc ? "after failure, " : "", (unsigned long)bl->child.pid);
     }
     bl->rc = rc;
 }
@@ -508,7 +509,10 @@ static void bootloader_gotptys(libxl__egc *egc, 
libxl__openpty_state *op)
     bl->keystrokes.maxsz = BOOTLOADER_BUF_OUT;
     bl->keystrokes.copywhat =
         GCSPRINTF("bootloader input for domain %"PRIu32, bl->domid);
-    bl->keystrokes.callback = bootloader_keystrokes_copyfail;
+    bl->keystrokes.callback =         bootloader_keystrokes_copyfail;
+    bl->keystrokes.callback_pollhup = bootloader_keystrokes_copyfail;
+        /* pollhup gets called with errnoval==-1 which is not otherwise
+         * possible since errnos are nonnegative, so it's unambiguous */
     rc = libxl__datacopier_start(&bl->keystrokes);
     if (rc) goto out;
 
@@ -516,7 +520,8 @@ static void bootloader_gotptys(libxl__egc *egc, 
libxl__openpty_state *op)
     bl->display.maxsz = BOOTLOADER_BUF_IN;
     bl->display.copywhat =
         GCSPRINTF("bootloader output for domain %"PRIu32, bl->domid);
-    bl->display.callback = bootloader_display_copyfail;
+    bl->display.callback =         bootloader_display_copyfail;
+    bl->display.callback_pollhup = bootloader_display_copyfail;
     rc = libxl__datacopier_start(&bl->display);
     if (rc) goto out;
 
@@ -562,30 +567,42 @@ static void bootloader_gotptys(libxl__egc *egc, 
libxl__openpty_state *op)
 
 /* perhaps one of these will be called, but perhaps not */
 static void bootloader_copyfail(libxl__egc *egc, const char *which,
-       libxl__bootloader_state *bl, int onwrite, int errnoval)
+        libxl__bootloader_state *bl, int ondisplay, int onwrite, int errnoval)
 {
     STATE_AO_GC(bl->ao);
+    int rc = ERROR_FAIL;
+
+    if (errnoval==-1) {
+        /* POLLHUP */
+        if (!!ondisplay != !!onwrite) {
+            rc = 0;
+            bl->got_pollhup = 1;
+        } else {
+            LOG(ERROR, "unexpected POLLHUP on %s", which);
+        }
+    }
     if (!onwrite && !errnoval)
         LOG(ERROR, "unexpected eof copying %s", which);
-    bootloader_abort(egc, bl, ERROR_FAIL);
+
+    bootloader_stop(egc, bl, rc);
 }
 static void bootloader_keystrokes_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval)
 {
     libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, keystrokes);
-    bootloader_copyfail(egc, "bootloader input", bl, onwrite, errnoval);
+    bootloader_copyfail(egc, "bootloader input", bl, 0, onwrite, errnoval);
 }
 static void bootloader_display_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval)
 {
     libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
-    bootloader_copyfail(egc, "bootloader output", bl, onwrite, errnoval);
+    bootloader_copyfail(egc, "bootloader output", bl, 1, onwrite, errnoval);
 }
 
 static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck 
*dc)
 {
     libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
-    bootloader_abort(egc, bl, ERROR_FAIL);
+    bootloader_stop(egc, bl, ERROR_FAIL);
 }
 
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
@@ -599,6 +616,8 @@ static void bootloader_finished(libxl__egc *egc, 
libxl__ev_child *child,
     libxl__datacopier_kill(&bl->display);
 
     if (status) {
+        if (bl->got_pollhup && WIFSIGNALED(status) && 
WTERMSIG(status)==SIGTERM)
+            LOG(ERROR, "got POLLHUP, sent SIGTERM");
         LOG(ERROR, "bootloader failed - consult logfile %s", bl->logfile);
         libxl_report_child_exitstatus(CTX, XTL_ERROR, "bootloader",
                                       pid, status);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 691b4f6..c57503f 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2070,7 +2070,9 @@ typedef struct libxl__datacopier_buf 
libxl__datacopier_buf;
  *     errnoval==0 means we got eof and all data was written
  *     errnoval!=0 means we had a read error, logged
  * onwrite==-1 means some other internal failure, errnoval not valid, logged
- * in all cases copier is killed before calling this callback */
+ * If we get POLLHUP, we call callback_pollhup(..., onwrite, -1);
+ * or if callback_pollhup==0 this is an internal failure, as above.
+ * In all cases copier is killed before calling this callback */
 typedef void libxl__datacopier_callback(libxl__egc *egc,
      libxl__datacopier_state *dc, int onwrite, int errnoval);
 
@@ -2089,6 +2091,7 @@ struct libxl__datacopier_state {
     const char *copywhat, *readwhat, *writewhat; /* for error msgs */
     FILE *log; /* gets a copy of everything */
     libxl__datacopier_callback *callback;
+    libxl__datacopier_callback *callback_pollhup;
     /* remaining fields are private to datacopier */
     libxl__ev_fd toread, towrite;
     ssize_t used;
@@ -2273,7 +2276,7 @@ struct libxl__bootloader_state {
     int nargs, argsspace;
     const char **args;
     libxl__datacopier_state keystrokes, display;
-    int rc;
+    int rc, got_pollhup;
 };
 
 _hidden void libxl__bootloader_init(libxl__bootloader_state *bl);
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.