[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1 of 4 RFC] blktap3/sring: connect to/disconnect from the shared ring



This patch introduces the functions that allows tapdisk to connect to/disconnect
from the shared ring. They are message handlers executed when the tapback
daemon sends a TAPDISK_MESSAGE_XENBLKIF_CONNECT/DISCONNECT message to the
tapdisk, as a result of running the Xenbus protocol. The connection to the ring
is effectively established by mapping the grant references and binding to the
guest domain's event channel port (both the grant references and the port are
supplied by the tapback daemon). After the connection is established, a
callback is registered to be executed when a notification for the ring arrives.

Signed-off-by: Thanos Makatos <thanos.makatos@xxxxxxxxxx>

diff -r 1712e6f8bb34 -r 93727c736ff0 tools/blktap3/drivers/sring/blkif.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap3/drivers/sring/blkif.h       Tue Mar 12 12:54:23 2013 +0000
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2012      Citrix Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#ifndef __SRING_BLKIF_H__
+#define __SRING_BLKIF_H__
+
+#include <xen/io/blkif.h>
+
+/* Not a real protocol.  Used to generate ring structs which contain
+ * the elements common to all protocols only.  This way we get a
+ * compiler-checkable way to use common struct elements, so we can
+ * avoid using switch(protocol) in a number of places.  */
+struct blkif_common_request {
+    char dummy;
+};
+struct blkif_common_response {
+    char dummy;
+};
+
+/* i386 protocol version */
+#pragma pack(push, 4)
+struct blkif_x86_32_request {
+    uint8_t operation;          /* BLKIF_OP_???                         */
+    uint8_t nr_segments;        /* number of segments                   */
+    blkif_vdev_t handle;        /* only for read/write requests         */
+    uint64_t id;                /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;   /* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_32_response {
+    uint64_t id;                /* copied from request */
+    uint8_t operation;          /* copied from request */
+    int16_t status;             /* BLKIF_RSP_???       */
+};
+typedef struct blkif_x86_32_request blkif_x86_32_request_t;
+typedef struct blkif_x86_32_response blkif_x86_32_response_t;
+#pragma pack(pop)
+
+/* x86_64 protocol version */
+struct blkif_x86_64_request {
+    uint8_t operation;          /* BLKIF_OP_???                         */
+    uint8_t nr_segments;        /* number of segments                   */
+    blkif_vdev_t handle;        /* only for read/write requests         */
+    uint64_t __attribute__ ((__aligned__(8))) id;
+    blkif_sector_t sector_number;   /* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_64_response {
+    uint64_t __attribute__ ((__aligned__(8))) id;
+    uint8_t operation;          /* copied from request */
+    int16_t status;             /* BLKIF_RSP_???       */
+};
+typedef struct blkif_x86_64_request blkif_x86_64_request_t;
+typedef struct blkif_x86_64_response blkif_x86_64_response_t;
+
+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
+        struct blkif_common_response);
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
+        struct blkif_x86_32_response);
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
+        struct blkif_x86_64_response);
+
+union blkif_back_rings {
+    blkif_back_ring_t native;
+    blkif_common_back_ring_t common;
+    blkif_x86_32_back_ring_t x86_32;
+    blkif_x86_64_back_ring_t x86_64;
+};
+typedef union blkif_back_rings blkif_back_rings_t;
+
+#endif /* __SRING_BLKIF_H__ */
diff -r 1712e6f8bb34 -r 93727c736ff0 tools/blktap3/drivers/sring/td-blkif.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap3/drivers/sring/td-blkif.c    Tue Mar 12 12:54:23 2013 +0000
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2012      Citrix Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <syslog.h>
+#include <sys/mman.h>
+
+#include "blktap3.h"
+#include "tapdisk.h"
+
+#include "td-blkif.h"
+#include "td-ctx.h"
+#include "td-req.h"
+
+struct td_xenblkif *
+tapdisk_xenblkif_find(const domid_t domid, const int devid)
+{
+    struct td_xenblkif *blkif = NULL;
+    struct td_xenio_ctx *ctx;
+
+    tapdisk_xenio_for_each_ctx(ctx) {
+        tapdisk_xenio_ctx_find_blkif(ctx, blkif,
+                                     blkif->domid == domid &&
+                                     blkif->devid == devid);
+        if (blkif)
+            return blkif;
+    }
+
+    return NULL;
+}
+
+void
+tapdisk_xenblkif_destroy(struct td_xenblkif * blkif)
+{
+    assert(blkif);
+
+    tapdisk_xenblkif_reqs_free(blkif);
+
+    if (blkif->ctx) {
+        if (blkif->port >= 0)
+            xc_evtchn_unbind(blkif->ctx->xce_handle, blkif->port);
+
+        if (blkif->rings.common.sring)
+            xc_gnttab_munmap(blkif->ctx->xcg_handle, blkif->rings.common.sring,
+                    blkif->ring_n_pages);
+
+        TAILQ_REMOVE(&blkif->ctx->blkifs, blkif, entry);
+        tapdisk_xenio_ctx_put(blkif->ctx);
+    }
+
+    free(blkif);
+}
+
+int
+tapdisk_xenblkif_disconnect(const domid_t domid, const int devid)
+{
+    struct td_xenblkif *blkif;
+
+    blkif = tapdisk_xenblkif_find(domid, devid);
+    if (!blkif)
+        return ESRCH;
+
+    if (blkif->n_reqs_free != blkif->ring_size)
+        return EBUSY;
+
+    tapdisk_xenblkif_destroy(blkif);
+
+    return 0;
+}
+
+int
+tapdisk_xenblkif_connect(domid_t domid, int devid, const grant_ref_t * grefs,
+        int order, evtchn_port_t port, int proto, const char *pool,
+        td_vbd_t * vbd)
+{
+    struct td_xenblkif *td_blkif = NULL;
+    struct td_xenio_ctx *td_ctx;
+    int err;
+    int i;
+    void *sring;
+    size_t sz;
+
+    /*
+     * Already connected?
+     */
+    if (tapdisk_xenblkif_find(domid, devid)) {
+        /* TODO log error */
+        return EEXIST;
+    }
+
+    err = tapdisk_xenio_ctx_get(pool, &td_ctx);
+    if (err) {
+        /* TODO log error */
+        goto fail;
+    }
+
+    td_blkif = calloc(1, sizeof(*td_blkif));
+    if (!td_blkif) {
+        /* TODO log error */
+        err = errno;
+        goto fail;
+    }
+
+    td_blkif->domid = domid;
+    td_blkif->devid = devid;
+    td_blkif->vbd = vbd;
+    td_blkif->ctx = td_ctx;
+    td_blkif->proto = proto;
+
+    /*
+     * Create the shared ring.
+     */
+    td_blkif->ring_n_pages = 1 << order;
+    if (td_blkif->ring_n_pages > ARRAY_SIZE(td_blkif->ring_ref)) {
+        syslog(LOG_ERR, "too many pages (%d), max %lu\n",
+                td_blkif->ring_n_pages, ARRAY_SIZE(td_blkif->ring_ref));
+        err = EINVAL;
+        goto fail;
+    }
+
+    /*
+     * TODO Why don't we just keep a copy of the array's address? There should
+     * be a reason for copying the addresses of the pages, figure out why.
+     * TODO Why do we even store it in the td_blkif in the first place?
+     */
+    for (i = 0; i < td_blkif->ring_n_pages; i++)
+        td_blkif->ring_ref[i] = grefs[i];
+
+    /*
+     * Map the grant references that will be holding the request descriptors.
+     */
+    sring = xc_gnttab_map_domain_grant_refs(td_blkif->ctx->xcg_handle,
+            td_blkif->ring_n_pages, td_blkif->domid, td_blkif->ring_ref,
+            PROT_READ | PROT_WRITE);
+    if (!sring) {
+        err = errno;
+        syslog(LOG_ERR, "failed to map domain's %d grant references: %s\n",
+                domid, strerror(err));
+        goto fail;
+    }
+
+    /*
+     * Size of the ring, in bytes.
+     */
+    sz = XC_PAGE_SIZE << order;
+
+    /*
+     * Initialize the mapped address into the shared ring.
+     *
+     * TODO Check for protocol support in the beginning of this function.
+     */
+    switch (td_blkif->proto) {
+        case BLKIF_PROTOCOL_NATIVE:
+            {
+                blkif_sring_t *__sring = sring;
+                BACK_RING_INIT(&td_blkif->rings.native, __sring, sz);
+                break;
+            }
+        case BLKIF_PROTOCOL_X86_32:
+            {
+                blkif_x86_32_sring_t *__sring = sring;
+                BACK_RING_INIT(&td_blkif->rings.x86_32, __sring, sz);
+                break;
+            }
+        case BLKIF_PROTOCOL_X86_64:
+            {
+                blkif_x86_64_sring_t *__sring = sring;
+                BACK_RING_INIT(&td_blkif->rings.x86_64, __sring, sz);
+                break;
+            }
+        default:
+            syslog(LOG_ERR, "unsupported protocol 0x%x\n", td_blkif->proto);
+            err = EPROTONOSUPPORT;
+            goto fail;
+    }
+
+    /*
+     * Bind to the remote port.
+     * FIXME elaborate
+     */
+    td_blkif->port = xc_evtchn_bind_interdomain(td_blkif->ctx->xce_handle,
+            td_blkif->domid, port);
+    if (td_blkif->port == -1) {
+        err = errno;
+        syslog(LOG_ERR, "failed to bind to event channel port %d of domain "
+                "%d: %s\n", port, td_blkif->domid, strerror(err));
+        goto fail;
+    }
+
+    err = tapdisk_xenblkif_reqs_init(td_blkif);
+    if (err) {
+        /* TODO log error */
+        goto fail;
+    }
+
+    TAILQ_INSERT_TAIL(&td_ctx->blkifs, td_blkif, entry);
+
+    return 0;
+
+fail:
+    if (td_blkif)
+        tapdisk_xenblkif_destroy(td_blkif);
+
+    return err;
+}
+
diff -r 1712e6f8bb34 -r 93727c736ff0 tools/blktap3/drivers/sring/td-blkif.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap3/drivers/sring/td-blkif.h    Tue Mar 12 12:54:23 2013 +0000
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2012      Citrix Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#ifndef __TD_BLKIF_H__
+#define __TD_BLKIF_H__
+
+#include <inttypes.h> /* TODO required by xen/event_channel.h */
+#include <xen/event_channel.h>
+
+#include "blkif.h"
+#include "td-req.h"
+#include "td-stats.h"
+
+struct td_xenio_ctx;
+
+struct td_xenblkif {
+
+    /**
+     * The domain ID where the front-end is running.
+     */
+    int domid;
+
+    /**
+     * The device ID of the VBD.
+     */
+    int devid;
+
+
+    /**
+        * Pointer to the context this block interface belongs to.
+        */
+    struct td_xenio_ctx *ctx;
+
+    /**
+        * for linked lists.
+        */
+    TAILQ_ENTRY(td_xenblkif) entry;
+
+    /**
+     * The local port corresponding to the remote port of the domain where the
+     * front-end is running. We use this to tell for which VBD a pending event
+     * is, and for notifying the front-end for responses we have produced and
+     * placed in the shared ring.
+     */
+    evtchn_port_t port;
+
+    /**
+     * protocol (native, x86, or x64)
+     * Need to keep around? Replace with function pointer?
+     */
+    int proto;
+
+    blkif_back_rings_t rings;
+
+    /**
+     * TODO Why 8 specifically?
+     * TODO Do we really need to keep it around?
+     */
+    grant_ref_t ring_ref[8];
+
+    /**
+     * Number of pages in the ring that holds the request descriptors.
+     */
+    int ring_n_pages;
+
+    /*
+     * Size of the ring, expressed in requests.
+     * TODO Do we really need to keep this around?
+     */
+    int ring_size;
+
+    /**
+     * Intermediate requests. The array is managed as a stack, with n_reqs_free
+     * pointing to the top of the stack, at the next available intermediate
+     * request.
+     */
+    struct td_xenblkif_req *reqs;
+
+    /**
+     * Stack pointer to the aforementioned stack.
+     */
+    int n_reqs_free;
+
+    blkif_request_t **reqs_free;
+
+    /*
+     * Pointer to the actual VBD.
+     */
+    td_vbd_t *vbd;
+
+    /*
+     * stats
+     */
+    struct td_xenblkif_stats stats;
+};
+
+#define tapdisk_xenio_for_each_ctx(_ctx) \
+       TAILQ_FOREACH(_ctx, &_td_xenio_ctxs, entry)
+
+/**
+ * Connects the tapdisk to the shared ring.
+ *
+ * @param domid the ID of the guest domain
+ * @param devid the device ID
+ * @param grefs the grant references
+ * @param order number of grant references
+ * @param port event channel port of the guest domain to use for ring
+ * notifications
+ * @param proto protocol (native, x86, or x64)
+ * @param pool name of the context
+ * @param vbd the VBD
+ * @returns 0 on success
+ */
+int
+tapdisk_xenblkif_connect(domid_t domid, int devid, const grant_ref_t * grefs,
+        int order, evtchn_port_t port, int proto, const char *pool,
+        td_vbd_t * vbd);
+
+/**
+ * Disconnects the tapdisk from the shared ring.
+ *
+ * @param domid the domain ID of the guest domain
+ * @param devid the device ID of the VBD
+ * @returns 0 on success
+ */
+int
+tapdisk_xenblkif_disconnect(const domid_t domid, const int devid);
+
+/**
+ * Destroys a XEN block interface.
+ *
+ * @param blkif the block interface to destroy
+ */
+void
+tapdisk_xenblkif_destroy(struct td_xenblkif * blkif);
+
+/**
+ * Searches all block interfaces in all contexts for a block interface
+ * having the specified domain and device ID.
+ *
+ * @param domid the domain ID
+ * @param devid the device ID
+ * @returns a pointer to the block interface if found, else NULL
+ */
+struct td_xenblkif *
+tapdisk_xenblkif_find(const domid_t domid, const int devid);
+
+#endif /* __TD_BLKIF_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.