[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 06/13 V6] remus: implement the API to setup network buffering



From: Shriram Rajagopalan <rshriram@xxxxxxxxx>

The following steps are taken during setup:
 a) call the hotplug script for each vif to setup its network buffer

 b) establish a dedicated remus context containing libnl related
    state (netlink sockets, qdisc caches, etc.,)

 c) Obtain handles to plug qdiscs installed on the IFB devices
    chosen by the hotplug scripts.

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 docs/misc/xenstore-paths.markdown |   4 +
 tools/libxl/Makefile              |   2 +
 tools/libxl/libxl_dom.c           |   7 +-
 tools/libxl/libxl_internal.h      |  11 +
 tools/libxl/libxl_netbuffer.c     | 419 ++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_nonetbuffer.c   |   6 +
 tools/libxl/libxl_remus.c         |  35 ++++
 7 files changed, 479 insertions(+), 5 deletions(-)
 create mode 100644 tools/libxl/libxl_remus.c

diff --git a/docs/misc/xenstore-paths.markdown 
b/docs/misc/xenstore-paths.markdown
index 70ab7f4..7a0d2c9 100644
--- a/docs/misc/xenstore-paths.markdown
+++ b/docs/misc/xenstore-paths.markdown
@@ -385,6 +385,10 @@ The guest's virtual time offset from UTC in seconds.
 
 The device model version for a domain.
 
+#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL]
+
+IFB device used by Remus to buffer network output from the associated vif.
+
 [BLKIF]: 
http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html
 [FBIF]: 
http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html
 [HVMPARAMS]: 
http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 84a467c..218f55e 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -52,6 +52,8 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
+LIBXL_OBJS-y += libxl_remus.o
+
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
 
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 8d63f90..e3e9f6f 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -753,9 +753,6 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t 
*buf,
 
 /*==================== Domain suspend (save) ====================*/
 
-static void domain_suspend_done(libxl__egc *egc,
-                        libxl__domain_suspend_state *dss, int rc);
-
 /*----- complicated callback, called by xc_domain_save -----*/
 
 /*
@@ -1508,8 +1505,8 @@ static void save_device_model_datacopier_done(libxl__egc 
*egc,
     dss->save_dm_callback(egc, dss, our_rc);
 }
 
-static void domain_suspend_done(libxl__egc *egc,
-                        libxl__domain_suspend_state *dss, int rc)
+void domain_suspend_done(libxl__egc *egc,
+                         libxl__domain_suspend_state *dss, int rc)
 {
     STATE_AO_GC(dss->ao);
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2f64382..0430307 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2313,6 +2313,17 @@ typedef struct libxl__remus_state {
 
 _hidden int libxl__netbuffer_enabled(libxl__gc *gc);
 
+_hidden void domain_suspend_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dss,
+                                 int rc);
+
+_hidden void libxl__remus_setup_done(libxl__egc *egc,
+                                     libxl__domain_suspend_state *dss,
+                                     int rc);
+
+_hidden void libxl__remus_netbuf_setup(libxl__egc *egc,
+                                       libxl__domain_suspend_state *dss);
+
 struct libxl__domain_suspend_state {
     /* set by caller of libxl__domain_suspend */
     libxl__ao *ao;
diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c
index 8e23d75..0be876c 100644
--- a/tools/libxl/libxl_netbuffer.c
+++ b/tools/libxl/libxl_netbuffer.c
@@ -17,11 +17,430 @@
 
 #include "libxl_internal.h"
 
+#include <netlink/cache.h>
+#include <netlink/socket.h>
+#include <netlink/attr.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/qdisc.h>
+#include <netlink/route/qdisc/plug.h>
+
+typedef struct libxl__remus_netbuf_state {
+    struct rtnl_qdisc **netbuf_qdisc_list;
+    struct nl_sock *nlsock;
+    struct nl_cache *qdisc_cache;
+    const char **vif_list;
+    const char **ifb_list;
+    uint32_t num_netbufs;
+    uint32_t unused;
+} libxl__remus_netbuf_state;
+
 int libxl__netbuffer_enabled(libxl__gc *gc)
 {
     return 1;
 }
 
+/* If the device has a vifname, then use that instead of
+ * the vifX.Y format.
+ */
+static const char *get_vifname(libxl__gc *gc, uint32_t domid,
+                               libxl_device_nic *nic)
+{
+    const char *vifname = NULL;
+    const char *path;
+    int rc;
+
+    path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname",
+                          libxl__xs_get_dompath(gc, 0), domid, nic->devid);
+    rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
+    if (rc < 0) {
+        /* use the default name */
+        vifname = libxl__device_nic_devname(gc, domid,
+                                            nic->devid,
+                                            nic->nictype);
+    }
+
+    return vifname;
+}
+
+static const char **get_guest_vif_list(libxl__gc *gc, uint32_t domid,
+                                       int *num_vifs)
+{
+    libxl_device_nic *nics = NULL;
+    int nb, i = 0;
+    const char **vif_list = NULL;
+
+    *num_vifs = 0;
+    nics = libxl_device_nic_list(CTX, domid, &nb);
+    if (!nics)
+        return NULL;
+
+    /* Ensure that none of the vifs are backed by driver domains */
+    for (i = 0; i < nb; i++) {
+        if (nics[i].backend_domid != LIBXL_TOOLSTACK_DOMID) {
+            LOG(ERROR, "vif %s has driver domain (%u) as its backend. "
+                "Network buffering is not supported with driver domains",
+                get_vifname(gc, domid, &nics[i]), nics[i].backend_domid);
+            *num_vifs = -1;
+            goto out;
+        }
+    }
+
+    GCNEW_ARRAY(vif_list, nb);
+    for (i = 0; i < nb; ++i) {
+        vif_list[i] = get_vifname(gc, domid, &nics[i]);
+        if (!vif_list[i]) {
+            vif_list = NULL;
+            goto out;
+        }
+    }
+    *num_vifs = nb;
+
+ out:
+    for (i = 0; i < nb; i++)
+        libxl_device_nic_dispose(&nics[i]);
+    free(nics);
+    return vif_list;
+}
+
+static void free_qdiscs(libxl__remus_netbuf_state *netbuf_state)
+{
+    int i;
+    struct rtnl_qdisc *qdisc = NULL;
+
+    /* free qdiscs */
+    for (i = 0; i < netbuf_state->num_netbufs; i++) {
+        qdisc = netbuf_state->netbuf_qdisc_list[i];
+        if (!qdisc)
+            break;
+
+        nl_object_put((struct nl_object *)qdisc);
+    }
+
+    /* free qdisc cache */
+    nl_cache_clear(netbuf_state->qdisc_cache);
+    nl_cache_free(netbuf_state->qdisc_cache);
+
+    /* close nlsock */
+    nl_close(netbuf_state->nlsock);
+
+    /* free nlsock */
+    nl_socket_free(netbuf_state->nlsock);
+}
+
+static int init_qdiscs(libxl__gc *gc,
+                       libxl__remus_state *remus_state)
+{
+    int i, ret, ifindex;
+    struct rtnl_link *ifb = NULL;
+    struct rtnl_qdisc *qdisc = NULL;
+
+    /* Convenience aliases */
+    libxl__remus_netbuf_state * const netbuf_state = remus_state->netbuf_state;
+    const int num_netbufs = netbuf_state->num_netbufs;
+    const char ** const ifb_list = netbuf_state->ifb_list;
+
+    /* Now that we have brought up IFB devices with plug qdisc for
+     * each vif, lets get a netlink handle on the plug qdisc for use
+     * during checkpointing.
+     */
+    netbuf_state->nlsock = nl_socket_alloc();
+    if (!netbuf_state->nlsock) {
+        LOG(ERROR, "cannot allocate nl socket");
+        goto out;
+    }
+
+    ret = nl_connect(netbuf_state->nlsock, NETLINK_ROUTE);
+    if (ret) {
+        LOG(ERROR, "failed to open netlink socket: %s",
+            nl_geterror(ret));
+        goto out;
+    }
+
+    /* get list of all qdiscs installed on network devs. */
+    ret = rtnl_qdisc_alloc_cache(netbuf_state->nlsock,
+                                 &netbuf_state->qdisc_cache);
+    if (ret) {
+        LOG(ERROR, "failed to allocate qdisc cache: %s",
+            nl_geterror(ret));
+        goto out;
+    }
+
+    /* list of handles to plug qdiscs */
+    GCNEW_ARRAY(netbuf_state->netbuf_qdisc_list, num_netbufs);
+
+    for (i = 0; i < num_netbufs; ++i) {
+
+        /* get a handle to the IFB interface */
+        ifb = NULL;
+        ret = rtnl_link_get_kernel(netbuf_state->nlsock, 0,
+                                   ifb_list[i], &ifb);
+        if (ret) {
+            LOG(ERROR, "cannot obtain handle for %s: %s", ifb_list[i],
+                nl_geterror(ret));
+            goto out;
+        }
+
+        ifindex = rtnl_link_get_ifindex(ifb);
+        if (!ifindex) {
+            LOG(ERROR, "interface %s has no index", ifb_list[i]);
+            goto out;
+        }
+
+        /* Get a reference to the root qdisc installed on the IFB, by
+         * querying the qdisc list we obtained earlier. The netbufscript
+         * sets up the plug qdisc as the root qdisc, so we don't have to
+         * search the entire qdisc tree on the IFB dev.
+
+         * There is no need to explicitly free this qdisc as its just a
+         * reference from the qdisc cache we allocated earlier.
+         */
+        qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache, ifindex,
+                                         TC_H_ROOT);
+
+        if (qdisc) {
+            const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
+            /* Sanity check: Ensure that the root qdisc is a plug qdisc. */
+            if (!tc_kind || strcmp(tc_kind, "plug")) {
+                nl_object_put((struct nl_object *)qdisc);
+                LOG(ERROR, "plug qdisc is not installed on %s", ifb_list[i]);
+                goto out;
+            }
+            netbuf_state->netbuf_qdisc_list[i] = qdisc;
+        } else {
+            LOG(ERROR, "Cannot get qdisc handle from ifb %s", ifb_list[i]);
+            goto out;
+        }
+        rtnl_link_put(ifb);
+    }
+
+    return 0;
+
+ out:
+    if (ifb)
+        rtnl_link_put(ifb);
+    free_qdiscs(netbuf_state);
+    return ERROR_FAIL;
+}
+
+static void netbuf_setup_timeout_cb(libxl__egc *egc,
+                                    libxl__ev_time *ev,
+                                    const struct timeval *requested_abs)
+{
+    libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state, timeout);
+
+    /* Convenience aliases */
+    const int devid = remus_state->dev_id;
+    libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+    const char *const vif = netbuf_state->vif_list[devid];
+
+    STATE_AO_GC(remus_state->dss->ao);
+
+    libxl__ev_time_deregister(gc, &remus_state->timeout);
+    assert(libxl__ev_child_inuse(&remus_state->child));
+
+    LOG(DEBUG, "killing hotplug script %s (on vif %s) because of timeout",
+        remus_state->netbufscript, vif);
+
+    if (kill(remus_state->child.pid, SIGKILL)) {
+        LOGEV(ERROR, errno, "unable to kill hotplug script %s [%ld]",
+              remus_state->netbufscript,
+              (unsigned long)remus_state->child.pid);
+    }
+
+    return;
+}
+
+/* the script needs the following env & args
+ * $vifname
+ * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
+ * $IFB (for teardown)
+ * setup/teardown as command line arg.
+ * In return, the script writes the name of IFB device (during setup) to be
+ * used for output buffering into XENBUS_PATH/ifb
+ */
+static int exec_netbuf_script(libxl__gc *gc, libxl__remus_state *remus_state,
+                              char *op, libxl__ev_child_callback *death)
+{
+    int arraysize = 7, nr = 0;
+    char **env = NULL, **args = NULL;
+    pid_t pid;
+
+    /* Convenience aliases */
+    libxl__ev_child *const child = &remus_state->child;
+    libxl__ev_time *const timeout = &remus_state->timeout;
+    char *const script = libxl__strdup(gc, remus_state->netbufscript);
+    const uint32_t domid = remus_state->dss->domid;
+    const int devid = remus_state->dev_id;
+    libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+    const char *const vif = netbuf_state->vif_list[devid];
+    const char *const ifb = netbuf_state->ifb_list[devid];
+
+    GCNEW_ARRAY(env, arraysize);
+    env[nr++] = "vifname";
+    env[nr++] = libxl__strdup(gc, vif);
+    env[nr++] = "XENBUS_PATH";
+    env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
+                          libxl__xs_libxl_path(gc, domid), devid);
+    if (!strcmp(op, "teardown")) {
+        env[nr++] = "IFB";
+        env[nr++] = libxl__strdup(gc, ifb);
+    }
+    env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3; nr = 0;
+    GCNEW_ARRAY(args, arraysize);
+    args[nr++] = script;
+    args[nr++] = op;
+    args[nr++] = NULL;
+    assert(nr == arraysize);
+
+    /* Set hotplug timeout */
+    if (libxl__ev_time_register_rel(gc, timeout,
+                                    netbuf_setup_timeout_cb,
+                                    LIBXL_HOTPLUG_TIMEOUT * 1000)) {
+        LOG(ERROR, "unable to register timeout for "
+            "netbuf setup script %s on vif %s", script, vif);
+        return ERROR_FAIL;
+    }
+
+    LOG(DEBUG, "Calling netbuf script: %s %s on vif %s",
+        script, op, vif);
+
+    /* Fork and exec netbuf script */
+    pid = libxl__ev_child_fork(gc, child, death);
+    if (pid == -1) {
+        LOG(ERROR, "unable to fork netbuf script %s", script);
+        return ERROR_FAIL;
+    }
+
+    if (!pid) {
+        /* child: Launch netbuf script */
+        libxl__exec(gc, -1, -1, -1, args[0], args, env);
+        /* notreached */
+        abort();
+    }
+
+    return 0;
+}
+
+static void netbuf_setup_script_cb(libxl__egc *egc,
+                                   libxl__ev_child *child,
+                                   pid_t pid, int status)
+{
+    libxl__remus_state *remus_state = CONTAINER_OF(child, *remus_state, child);
+    const char *out_path_base, *hotplug_error = NULL;
+    int rc = ERROR_FAIL;
+
+    /* Convenience aliases */
+    const uint32_t domid = remus_state->dss->domid;
+    const int devid = remus_state->dev_id;
+    libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+    const char *const vif = netbuf_state->vif_list[devid];
+    const char **const ifb = &netbuf_state->ifb_list[devid];
+
+    STATE_AO_GC(remus_state->dss->ao);
+
+    libxl__ev_time_deregister(gc, &remus_state->timeout);
+
+    out_path_base = GCSPRINTF("%s/remus/netbuf/%d",
+                              libxl__xs_libxl_path(gc, domid), devid);
+
+    rc = libxl__xs_read_checked(gc, XBT_NULL,
+                                GCSPRINTF("%s/hotplug-error", out_path_base),
+                                &hotplug_error);
+    if (rc)
+        goto out;
+
+    if (hotplug_error) {
+        LOG(ERROR, "netbuf script %s setup failed for vif %s: %s",
+            remus_state->netbufscript,
+            netbuf_state->vif_list[devid], hotplug_error);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    if (status) {
+        libxl_report_child_exitstatus(CTX, LIBXL__LOG_ERROR,
+                                      remus_state->netbufscript,
+                                      pid, status);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = libxl__xs_read_checked(gc, XBT_NULL,
+                                GCSPRINTF("%s/remus/netbuf/%d/ifb",
+                                          libxl__xs_libxl_path(gc, domid),
+                                          devid),
+                                ifb);
+    if (rc)
+        goto out;
+
+    if (!(*ifb)) {
+        LOG(ERROR, "Cannot get ifb dev name for domain %u dev %s",
+            domid, vif);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    LOG(DEBUG, "%s will buffer packets from vif %s", *ifb, vif);
+    remus_state->dev_id++;
+    if (remus_state->dev_id < netbuf_state->num_netbufs) {
+        rc = exec_netbuf_script(gc, remus_state,
+                                "setup", netbuf_setup_script_cb);
+        if (rc)
+            goto out;
+
+        return;
+    }
+
+    rc = init_qdiscs(gc, remus_state);
+ out:
+    libxl__remus_setup_done(egc, remus_state->dss, rc);
+}
+
+/* Scan through the list of vifs belonging to domid and
+ * invoke the netbufscript to setup the IFB device & plug qdisc
+ * for each vif. Then scan through the list of IFB devices to obtain
+ * a handle on the plug qdisc installed on these IFB devices.
+ * Network output buffering is controlled via these qdiscs.
+ */
+void libxl__remus_netbuf_setup(libxl__egc *egc,
+                               libxl__domain_suspend_state *dss)
+{
+    libxl__remus_netbuf_state *netbuf_state = NULL;
+    int num_netbufs = 0;
+    int rc = ERROR_FAIL;
+
+    /* Convenience aliases */
+    const uint32_t domid = dss->domid;
+    libxl__remus_state *const remus_state = dss->remus_state;
+
+    STATE_AO_GC(dss->ao);
+
+    GCNEW(netbuf_state);
+    netbuf_state->vif_list = get_guest_vif_list(gc, domid, &num_netbufs);
+    if (!num_netbufs) {
+        rc = 0;
+        goto out;
+    }
+
+    if (num_netbufs < 0) goto out;
+
+    GCNEW_ARRAY(netbuf_state->ifb_list, num_netbufs);
+    netbuf_state->num_netbufs = num_netbufs;
+    remus_state->netbuf_state = netbuf_state;
+    remus_state->dev_id = 0;
+    if (exec_netbuf_script(gc, remus_state, "setup",
+                           netbuf_setup_script_cb))
+        goto out;
+    return;
+
+ out:
+    libxl__remus_setup_done(egc, dss, rc);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_nonetbuffer.c b/tools/libxl/libxl_nonetbuffer.c
index 6aa4bf1..acfa534 100644
--- a/tools/libxl/libxl_nonetbuffer.c
+++ b/tools/libxl/libxl_nonetbuffer.c
@@ -22,6 +22,12 @@ int libxl__netbuffer_enabled(libxl__gc *gc)
     return 0;
 }
 
+/* Remus network buffer related stubs */
+void libxl__remus_netbuf_setup(libxl__egc *egc,
+                               libxl__domain_suspend_state *dss)
+{
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_remus.c b/tools/libxl/libxl_remus.c
new file mode 100644
index 0000000..b3342b3
--- /dev/null
+++ b/tools/libxl/libxl_remus.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014
+ * Author Shriram Rajagopalan <rshriram@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*----- remus setup/teardown code -----*/
+
+void libxl__remus_setup_done(libxl__egc *egc,
+                             libxl__domain_suspend_state *dss,
+                             int rc)
+{
+    STATE_AO_GC(dss->ao);
+    if (!rc) {
+        libxl__domain_suspend(egc, dss);
+        return;
+    }
+
+    LOG(ERROR, "Remus: failed to setup network buffering"
+        " for guest with domid %u", dss->domid);
+    domain_suspend_done(egc, dss, rc);
+}
-- 
1.8.4.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.