[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 3/9] libxl_internal: Introduce libxl__ev_devlock for devices hotplug via QMP



The current lock `domain_userdata_lock' can't be used when modification
to a guest is done by sending command to QEMU, this is a slow process
and requires to call CTX_UNLOCK, which is not possible while holding
the `domain_userdata_lock'.

To resolve this issue, we create a new lock which can take over part
of the job of the json_lock.

This lock is outside CTX_LOCK in the lock hierarchy.
libxl__ev_devlock_lock will have CTX_UNLOCK before trying to grab the
ev_devlock. The callback is used to notify when the ev_devlock have
been acquired.

Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---

Notes:
    v3:
    - fix use of LOGED where errno numeric value was printed.
    - change comment in libxl__ev_unlock to refer to the one in
      libxl__unlock_domain_userdata() instead of making a copy of it.
    - rename ev_lock to ev_devlock. (And _get to _lock.)
    
    v2:
    - new patch, to replace 2 patch
      implement a different lock

 tools/libxl/libxl_internal.c | 163 +++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h |  76 +++++++++++++++-
 2 files changed, 235 insertions(+), 4 deletions(-)

diff --git a/tools/libxl/libxl_internal.c b/tools/libxl/libxl_internal.c
index f492dae5ff70..28a126ccc342 100644
--- a/tools/libxl/libxl_internal.c
+++ b/tools/libxl/libxl_internal.c
@@ -575,6 +575,169 @@ void libxl__update_domain_configuration(libxl__gc *gc,
     dst->b_info.video_memkb = src->b_info.video_memkb;
 }
 
+void libxl__ev_devlock_init(libxl__ev_devlock *lock)
+{
+    libxl__ev_child_init(&lock->child);
+    lock->path = NULL;
+    lock->fd = -1;
+    lock->held = false;
+}
+
+static void ev_lock_prepare_fork(libxl__egc *egc, libxl__ev_devlock *lock);
+static void ev_lock_child_callback(libxl__egc *egc, libxl__ev_child *child,
+                                   pid_t pid, int status);
+
+void libxl__ev_devlock_lock(libxl__egc *egc, libxl__ev_devlock *lock)
+{
+    STATE_AO_GC(lock->ao);
+    const char *lockfile;
+
+    lockfile = libxl__userdata_path(gc, lock->domid,
+                                    "libxl-device-changes-lock", "l");
+    if (!lockfile) goto out;
+    lock->path = libxl__strdup(NOGC, lockfile);
+
+    ev_lock_prepare_fork(egc, lock);
+    return;
+out:
+    lock->callback(egc, lock, ERROR_LOCK_FAIL);
+}
+
+static void ev_lock_prepare_fork(libxl__egc *egc, libxl__ev_devlock *lock)
+{
+    STATE_AO_GC(lock->ao);
+    pid_t pid;
+    int fd;
+
+    /* Convenience aliases */
+    libxl_domid domid = lock->domid;
+    const char *lockfile = lock->path;
+
+    lock->fd = open(lockfile, O_RDWR|O_CREAT, 0666);
+    if (lock->fd < 0) {
+        LOGED(ERROR, domid, "cannot open lockfile %s", lockfile);
+        goto out;
+    }
+    fd = lock->fd;
+
+    pid = libxl__ev_child_fork(gc, &lock->child, ev_lock_child_callback);
+    if (pid < 0)
+        goto out;
+    if (!pid) {
+        /* child */
+        int exit_val = 0;
+
+        /* Lock the file in exclusive mode, wait indefinitely to
+         * acquire the lock */
+        while (flock(fd, LOCK_EX)) {
+            switch (errno) {
+            case EINTR:
+                /* Signal received, retry */
+                continue;
+            default:
+                /* All other errno: EBADF, EINVAL, ENOLCK, EWOULDBLOCK */
+                LOGED(ERROR, domid,
+                      "unexpected error while trying to lock %s, fd=%d",
+                      lockfile, fd);
+                exit_val = 1;
+                break;
+            }
+        }
+        _exit(exit_val);
+    }
+
+    /* Now that the child has the fd, set cloexec in the parent to prevent
+     * more leakage than necessary */
+    libxl_fd_set_cloexec(CTX, fd, 1);
+    return;
+out:
+    libxl__ev_devlock_unlock(gc, lock);
+    lock->callback(egc, lock, ERROR_LOCK_FAIL);
+}
+
+static void ev_lock_child_callback(libxl__egc *egc, libxl__ev_child *child,
+                                   pid_t pid, int status)
+{
+    EGC_GC;
+    libxl__ev_devlock *lock = CONTAINER_OF(child, *lock, child);
+    struct stat stab, fstab;
+    int rc = ERROR_LOCK_FAIL;
+
+    /* Convenience aliases */
+    int fd = lock->fd;
+    const char *lockfile = lock->path;
+    libxl_domid domid = lock->domid;
+
+    if (status) {
+        libxl_report_child_exitstatus(CTX, XTL_ERROR, "flock child",
+                                      pid, status);
+        goto out;
+    }
+
+    if (fstat(fd, &fstab)) {
+        LOGED(ERROR, domid, "cannot fstat %s, fd=%d", lockfile, fd);
+        goto out;
+    }
+    if (stat(lockfile, &stab)) {
+        if (errno != ENOENT) {
+            LOGED(ERROR, domid, "cannot stat %s", lockfile);
+            goto out;
+        }
+    } else {
+        if (stab.st_dev == fstab.st_dev && stab.st_ino == fstab.st_ino) {
+            /* We held the lock */
+            lock->held = true;
+            rc = 0;
+            goto out;
+        }
+    }
+
+    /* We didn't grab the lock, let's try again */
+    flock(lock->fd, LOCK_UN);
+    close(lock->fd);
+    lock->fd = -1;
+    ev_lock_prepare_fork(egc, lock);
+    return;
+
+out:
+    if (lock->held) {
+        /* Check the domain is still there, if not we should release the
+         * lock and clean up.  */
+        if (libxl_domain_info(CTX, NULL, domid))
+            rc = ERROR_LOCK_FAIL;
+    }
+    if (rc) {
+        LOGD(ERROR, domid, "Failed to grab qmp-lock");
+        libxl__ev_devlock_unlock(gc, lock);
+    }
+    lock->callback(egc, lock, rc);
+}
+
+void libxl__ev_devlock_unlock(libxl__gc *gc, libxl__ev_devlock *lock)
+{
+    int r;
+
+    assert(!libxl__ev_child_inuse(&lock->child));
+
+    /* See the rationale in libxl__unlock_domain_userdata()
+     * about why we do unlink() before unlock(). */
+
+    if (lock->path && lock->held)
+        unlink(lock->path);
+
+    if (lock->fd >= 0) {
+        /* We need to call unlock as the fd may have leaked into other
+         * processes */
+        r = flock(lock->fd, LOCK_UN);
+        if (r)
+            LOGED(ERROR, lock->domid, "failed to unlock fd=%d, path=%s",
+                  lock->fd, lock->path);
+        close(lock->fd);
+    }
+    free(lock->path);
+    libxl__ev_devlock_init(lock);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 03e086480ca3..c7bcde5edae7 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -194,6 +194,7 @@ typedef struct libxl__osevent_hook_nexus 
libxl__osevent_hook_nexus;
 typedef struct libxl__osevent_hook_nexi libxl__osevent_hook_nexi;
 typedef struct libxl__json_object libxl__json_object;
 typedef struct libxl__carefd libxl__carefd;
+typedef struct libxl__ev_devlock libxl__ev_devlock;
 
 typedef struct libxl__domain_create_state libxl__domain_create_state;
 typedef void libxl__domain_create_cb(struct libxl__egc *egc,
@@ -2724,11 +2725,11 @@ struct libxl__multidev {
  * device information, in JSON files, so that we can use this JSON
  * file as a template to reconstruct domain configuration.
  *
- * In essense there are now two views of device state, one is xenstore,
- * the other is JSON file. We use xenstore as primary reference.
+ * In essense there are now two views of device state, one is the
+ * primary config (xenstore or QEMU), the other is JSON file.
  *
- * Here we maintain one invariant: every device in xenstore must have
- * an entry in JSON file.
+ * Here we maintain one invariant: every device in the primary config
+ * must have an entry in JSON file.
  *
  * All device hotplug routines should comply to following pattern:
  *   lock json config (json_lock)
@@ -2743,6 +2744,24 @@ struct libxl__multidev {
  *       end for loop
  *   unlock json config
  *
+ * Or in case QEMU is the primary config, this pattern can be use:
+ *   qmp_lock (libxl__ev_devlock)
+ *      lock json config (json_lock)
+ *          read json config
+ *          update in-memory json config with new entry, replacing
+ *             any stale entry
+ *      unlock json config
+ *      apply new config to primary config
+ *      lock json config (json_lock)
+ *          read json config
+ *          update in-memory json config with new entry, replacing
+ *             any stale entry
+ *          write in-memory json config to disk
+ *      unlock json config
+ *   unlock qmp_lock
+ *   (CTX_LOCK can be acquired and released several time while holding the
+ *    qmp_lock)
+ *
  * Device removal routines are not touched.
  *
  * Here is the proof that we always maintain that invariant and we
@@ -4603,6 +4622,55 @@ static inline const char *libxl__qemu_qmp_path(libxl__gc 
*gc, int domid)
 {
     return GCSPRINTF("%s/qmp-libxl-%d", libxl__run_dir_path(), domid);
 }
+
+/*
+ * Lock for device hotplug, qmp_lock.
+ *
+ * libxl__ev_devlock implement a lock that is outside of CTX_LOCK in the
+ * lock hierarchy. It can be used when one want to make QMP calls to QEMU,
+ * which may take a significant amount time.
+ * It is to be acquired by an ao event callback.
+ *
+ * It is to be acquired when adding/removing devices or making changes
+ * to them when this is a slow operation and json_lock isn't appropriate.
+ *
+ * Possible states of libxl__ev_devlock:
+ *   Undefined
+ *    Might contain anything.
+ *  Idle
+ *    Struct contents are defined enough to pass to any
+ *    libxl__ev_devlock_* function.
+ *    The struct does not contain references to any allocated private
+ *    resources so can be thrown away.
+ *  Active
+ *    Waiting to get a lock.
+ *    Needs to wait until the callback is called.
+ *  LockAcquired
+ *    libxl__ev_devlock_unlock will need to be called to release the lock
+ *    and the resources of libxl__ev_devlock.
+ *
+ *  libxl__ev_devlock_init: Undefined/Idle -> Idle
+ *  libxl__ev_devlock_lock: Idle -> Active
+ *    May call callback synchronously.
+ *  libxl__ev_devlock_unlock: LockAcquired/Idle -> Idle
+ *  callback:     When called: Active -> LockAcquired (on error: Idle)
+ *    The callback is only called once.
+ */
+struct libxl__ev_devlock {
+    /* filled by user */
+    libxl__ao *ao;
+    libxl_domid domid;
+    void (*callback)(libxl__egc *, libxl__ev_devlock *, int rc);
+    /* private to libxl__ev_devlock* */
+    libxl__ev_child child;
+    char *path; /* path of the lock file itself */
+    int fd;
+    bool held;
+};
+_hidden void libxl__ev_devlock_init(libxl__ev_devlock *);
+_hidden void libxl__ev_devlock_lock(libxl__egc *, libxl__ev_devlock *);
+_hidden void libxl__ev_devlock_unlock(libxl__gc *, libxl__ev_devlock *);
+
 #endif
 
 /*
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.