[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 3/9] libxl_internal: Introduce libxl__ev_lock for devices hotplug via QMP



The current lock `domain_userdata_lock' can't be used when modification
to a guest is done by sending command to QEMU, this is a slow process
and requires to call CTX_UNLOCK, which is not possible while holding
the `domain_userdata_lock'.

To resolve this issue, we create a new lock which can take over part
of the job of the json_lock.

This lock is outside CTX_LOCK in the lock hierarchy.
libxl__ev_lock_get will have CTX_UNLOCK before trying to grab the
ev_lock. The callback is used to notify when the ev_lock have been
acquired.

Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---

Notes:
    Should libxl__ev_unlock() kill the child?
    
    That would mean that we would need to handle the case where the process
    trying to grab the lock got impatient and decided that it was taking too
    long.
    
    v2:
    - new patch, to replace 2 patch
      implement a different lock

 tools/libxl/libxl_internal.c | 174 +++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h |  76 ++++++++++++++-
 2 files changed, 246 insertions(+), 4 deletions(-)

diff --git a/tools/libxl/libxl_internal.c b/tools/libxl/libxl_internal.c
index f492dae5ff..3906a0512d 100644
--- a/tools/libxl/libxl_internal.c
+++ b/tools/libxl/libxl_internal.c
@@ -575,6 +575,180 @@ void libxl__update_domain_configuration(libxl__gc *gc,
     dst->b_info.video_memkb = src->b_info.video_memkb;
 }
 
+void libxl__ev_lock_init(libxl__ev_lock *lock)
+{
+    libxl__ev_child_init(&lock->child);
+    lock->path = NULL;
+    lock->fd = -1;
+    lock->held = false;
+}
+
+static void ev_lock_prepare_fork(libxl__egc *egc, libxl__ev_lock *lock);
+static void ev_lock_child_callback(libxl__egc *egc, libxl__ev_child *child,
+                                   pid_t pid, int status);
+
+void libxl__ev_lock_get(libxl__egc *egc, libxl__ev_lock *lock)
+{
+    STATE_AO_GC(lock->ao);
+    const char *lockfile;
+
+    lockfile = libxl__userdata_path(gc, lock->domid,
+                                    "libxl-device-changes-lock", "l");
+    if (!lockfile) goto out;
+    lock->path = libxl__strdup(NOGC, lockfile);
+
+    ev_lock_prepare_fork(egc, lock);
+    return;
+out:
+    lock->callback(egc, lock, ERROR_LOCK_FAIL);
+}
+
+static void ev_lock_prepare_fork(libxl__egc *egc, libxl__ev_lock *lock)
+{
+    STATE_AO_GC(lock->ao);
+    pid_t pid;
+    int fd;
+
+    /* Convenience aliases */
+    libxl_domid domid = lock->domid;
+    const char *lockfile = lock->path;
+
+    lock->fd = open(lockfile, O_RDWR|O_CREAT, 0666);
+    if (lock->fd < 0) {
+        LOGED(ERROR, domid, "cannot open lockfile %s", lockfile);
+        goto out;
+    }
+    fd = lock->fd;
+
+    pid = libxl__ev_child_fork(gc, &lock->child, ev_lock_child_callback);
+    if (pid < 0)
+        goto out;
+    if (!pid) {
+        /* child */
+        int exit_val = 0;
+
+        /* Lock the file in exclusive mode, wait indefinitely to
+         * acquire the lock */
+        while (flock(fd, LOCK_EX)) {
+            switch (errno) {
+            case EINTR:
+                /* Signal received, retry */
+                continue;
+            default:
+                /* All other errno: EBADF, EINVAL, ENOLCK, EWOULDBLOCK */
+                LOGED(ERROR, domid,
+                      "unexpected error while trying to lock %s, fd=%d, 
errno=%d",
+                      lockfile, fd, errno);
+                exit_val = 1;
+                break;
+            }
+        }
+        _exit(exit_val);
+    }
+
+    /* Now that the child has the fd, set cloexec in the parent to prevent
+     * more leakage than necessary */
+    libxl_fd_set_cloexec(CTX, fd, 1);
+    return;
+out:
+    libxl__ev_unlock(gc, lock);
+    lock->callback(egc, lock, ERROR_LOCK_FAIL);
+}
+
+static void ev_lock_child_callback(libxl__egc *egc, libxl__ev_child *child,
+                                   pid_t pid, int status)
+{
+    EGC_GC;
+    libxl__ev_lock *lock = CONTAINER_OF(child, *lock, child);
+    struct stat stab, fstab;
+    int rc = ERROR_LOCK_FAIL;
+
+    /* Convenience aliases */
+    int fd = lock->fd;
+    const char *lockfile = lock->path;
+    libxl_domid domid = lock->domid;
+
+    if (status) {
+        libxl_report_child_exitstatus(CTX, XTL_ERROR, "flock child",
+                                      pid, status);
+        goto out;
+    }
+
+    if (fstat(fd, &fstab)) {
+        LOGED(ERROR, domid, "cannot fstat %s, fd=%d", lockfile, fd);
+        goto out;
+    }
+    if (stat(lockfile, &stab)) {
+        if (errno != ENOENT) {
+            LOGED(ERROR, domid, "cannot stat %s", lockfile);
+            goto out;
+        }
+    } else {
+        if (stab.st_dev == fstab.st_dev && stab.st_ino == fstab.st_ino) {
+            /* We held the lock */
+            lock->held = true;
+            rc = 0;
+            goto out;
+        }
+    }
+
+    /* We didn't grab the lock, let's try again */
+    flock(lock->fd, LOCK_UN);
+    close(lock->fd);
+    lock->fd = -1;
+    ev_lock_prepare_fork(egc, lock);
+    return;
+
+out:
+    if (lock->held) {
+        /* Check the domain is still there, if not we should release the
+         * lock and clean up.  */
+        if (libxl_domain_info(CTX, NULL, domid))
+            rc = ERROR_LOCK_FAIL;
+    }
+    if (rc) {
+        LOGD(ERROR, domid, "Failed to grab qmp-lock");
+        libxl__ev_unlock(gc, lock);
+    }
+    lock->callback(egc, lock, rc);
+}
+
+void libxl__ev_unlock(libxl__gc *gc, libxl__ev_lock *lock)
+{
+    int r;
+
+    assert(!libxl__ev_child_inuse(&lock->child));
+
+    /* It's important to unlink the file before releasing the lock to avoid
+     * the following race (if unlock/close before unlink):
+     *
+     *   P1 LOCK                         P2 UNLOCK
+     *   fd1 = open(lockfile)
+     *                                   unlock(fd2)
+     *   flock(fd1)
+     *   fstat and stat check success
+     *                                   unlink(lockfile)
+     *   return lock
+     *
+     * In above case P1 thinks it has got hold of the lock but
+     * actually lock is released by P2 (lockfile unlinked).
+     */
+    if (lock->path && lock->held)
+        unlink(lock->path);
+
+    if (lock->fd >= 0) {
+        /* We need to call unlock as the fd may have leaked into other
+         * processes */
+        r = flock(lock->fd, LOCK_UN);
+        if (r)
+            LOGED(ERROR, lock->domid, "failed to unlock fd=%d, path=%s",
+                  lock->fd, lock->path);
+        close(lock->fd);
+    }
+    free(lock->path);
+    libxl__ev_lock_init(lock);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index ca7206aaac..2c2476b55b 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -194,6 +194,7 @@ typedef struct libxl__osevent_hook_nexus 
libxl__osevent_hook_nexus;
 typedef struct libxl__osevent_hook_nexi libxl__osevent_hook_nexi;
 typedef struct libxl__json_object libxl__json_object;
 typedef struct libxl__carefd libxl__carefd;
+typedef struct libxl__ev_lock libxl__ev_lock;
 
 typedef struct libxl__domain_create_state libxl__domain_create_state;
 typedef void libxl__domain_create_cb(struct libxl__egc *egc,
@@ -2723,11 +2724,11 @@ struct libxl__multidev {
  * device information, in JSON files, so that we can use this JSON
  * file as a template to reconstruct domain configuration.
  *
- * In essense there are now two views of device state, one is xenstore,
- * the other is JSON file. We use xenstore as primary reference.
+ * In essense there are now two views of device state, one is the
+ * primary config (xenstore or QEMU), the other is JSON file.
  *
- * Here we maintain one invariant: every device in xenstore must have
- * an entry in JSON file.
+ * Here we maintain one invariant: every device in the primary config
+ * must have an entry in JSON file.
  *
  * All device hotplug routines should comply to following pattern:
  *   lock json config (json_lock)
@@ -2742,6 +2743,24 @@ struct libxl__multidev {
  *       end for loop
  *   unlock json config
  *
+ * Or in case QEMU is the primary config, this pattern can be use:
+ *   qmp_lock (libxl__ev_lock)
+ *      lock json config (json_lock)
+ *          read json config
+ *          update in-memory json config with new entry, replacing
+ *             any stale entry
+ *      unlock json config
+ *      apply new config to primary config
+ *      lock json config (json_lock)
+ *          read json config
+ *          update in-memory json config with new entry, replacing
+ *             any stale entry
+ *          write in-memory json config to disk
+ *      unlock json config
+ *   unlock qmp_lock
+ *   (CTX_LOCK can be acquired and released several time while holding the
+ *    qmp_lock)
+ *
  * Device removal routines are not touched.
  *
  * Here is the proof that we always maintain that invariant and we
@@ -4600,6 +4619,55 @@ static inline const char *libxl__qemu_qmp_path(libxl__gc 
*gc, int domid)
 {
     return GCSPRINTF("%s/qmp-libxl-%d", libxl__run_dir_path(), domid);
 }
+
+/*
+ * Lock for device hotplug, qmp_lock.
+ *
+ * libxl__ev_lock implement a lock that is outside of CTX_LOCK in the
+ * lock hierarchy. It can be used when one want to make QMP calls to QEMU,
+ * which may take a significant amount time.
+ * It is to be acquired by an ao event callback.
+ *
+ * It is to be acquired when adding/removing devices or making changes
+ * to them when this is a slow operation and json_lock isn't appropriate.
+ *
+ * Possible states of libxl__ev_lock:
+ *   Undefined
+ *    Might contain anything.
+ *  Idle
+ *    Struct contents are defined enough to pass to any
+ *    libxl__ev_lock_* function.
+ *    The struct does not contain references to any allocated private
+ *    resources so can be thrown away.
+ *  Active
+ *    Waiting to get a lock.
+ *    Needs to wait until the callback is called.
+ *  LockAcquired
+ *    libxl__ev_unlock will need to be called to release the lock and the
+ *    resources of libxl__ev_lock.
+ *
+ *  libxl__ev_lock_init: Undefined/Idle -> Idle
+ *  libxl__ev_lock_get:  Idle -> Active
+ *      May call callback synchronously.
+ *  libxl__ev_unlock:    LockAcquired/Idle -> Idle
+ *  callback:     When called: Active -> LockAcquired (on error: Idle)
+ *    The callback is only called once.
+ */
+struct libxl__ev_lock {
+    /* filled by user */
+    libxl__ao *ao;
+    libxl_domid domid;
+    void (*callback)(libxl__egc *, libxl__ev_lock *, int rc);
+    /* private to libxl__ev_lock* */
+    libxl__ev_child child;
+    char *path; /* path of the lock file itself */
+    int fd;
+    bool held;
+};
+_hidden void libxl__ev_lock_init(libxl__ev_lock *);
+_hidden void libxl__ev_lock_get(libxl__egc *, libxl__ev_lock *);
+_hidden void libxl__ev_unlock(libxl__gc *, libxl__ev_lock *);
+
 #endif
 
 /*
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.