[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3 of 4] blktap2: Separate tapdisk raw I/O into different backends



# HG changeset patch
# User Daniel Stodden <dns@xxxxxxxxxxxx>
# Date 1264750223 28800
# Node ID 2b8c81d25096575472974f1e81829b73f0fe7d69
# Parent  9746b010e2bf495a1c93c37ab98f9d1fdde559df
blktap2: Separate tapdisk raw I/O into different backends.

Hide tapdisk support for different raw I/O interfaces behind a new
struct tio. Libaio remains to dominate the interface, requiring
everyone to dispatch iocb/ioevent structs.

Backends:
 - lio:  Kernel AIO via libaio.
 - rwio: Canonical read/write() mode.

Misc:
 - Fixes a bug in tapdisk-vbd which locks up the sync io mode.
 - Wants a PERROR macro in blktaplib.h
 - Removes dead code in qcow2raw to make it link again.

Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
Signed-off-by: Jake Wires <jake.wires@xxxxxxxxxx>

diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-queue.c
--- a/tools/blktap2/drivers/tapdisk-queue.c     Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-queue.c     Thu Jan 28 23:30:23 2010 -0800
@@ -141,7 +141,7 @@
         * use a private linked list to keep track
         * of the tiocbs we're cancelling. 
         */
-       tiocb  = (struct tiocb *)queue->iocbs[0]->data;
+       tiocb  = queue->iocbs[0]->data;
        queued = queue->queued;
        queue->queued = 0;
 
@@ -165,8 +165,40 @@
        return cancel_tiocbs(queue, err);
 }
 
+/*
+ * rwio
+ */
+
+struct rwio {
+       struct io_event *aio_events;
+};
+
+static void
+tapdisk_rwio_destroy(struct tqueue *queue)
+{
+       struct rwio *rwio = queue->tio_data;
+
+       if (rwio->aio_events) {
+               free(rwio->aio_events);
+               rwio->aio_events = NULL;
+       }
+}
+
+static int
+tapdisk_rwio_setup(struct tqueue *queue, int size)
+{
+       struct rwio *rwio = queue->tio_data;
+       int err;
+
+       rwio->aio_events = calloc(size, sizeof(struct io_event));
+       if (!rwio->aio_events)
+               return -errno;
+
+       return 0;
+}
+
 static inline ssize_t
-iocb_rw(struct iocb *iocb)
+tapdisk_rwio_rw(const struct iocb *iocb)
 {
        int fd        = iocb->aio_fildes;
        char *buf     = iocb->u.c.buf;
@@ -177,7 +209,7 @@
 
        if (lseek(fd, off, SEEK_SET) == (off_t)-1)
                return -errno;
-       
+
        if (atomicio(func, fd, buf, size) != size)
                return -errno;
 
@@ -185,8 +217,9 @@
 }
 
 static int
-io_synchronous_rw(struct tqueue *queue)
+tapdisk_rwio_submit(struct tqueue *queue)
 {
+       struct rwio *rwio = queue->tio_data;
        int i, merged, split;
        struct iocb *iocb;
        struct tiocb *tiocb;
@@ -201,18 +234,18 @@
        queue->queued = 0;
 
        for (i = 0; i < merged; i++) {
-               ep      = queue->aio_events + i;
+               ep      = rwio->aio_events + i;
                iocb    = queue->iocbs[i];
                ep->obj = iocb;
-               ep->res = iocb_rw(iocb);
+               ep->res = tapdisk_rwio_rw(iocb);
        }
 
-       split = io_split(&queue->opioctx, queue->aio_events, merged);
-       tapdisk_filter_events(queue->filter, queue->aio_events, split);
+       split = io_split(&queue->opioctx, rwio->aio_events, merged);
+       tapdisk_filter_events(queue->filter, rwio->aio_events, split);
 
-       for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
+       for (i = split, ep = rwio->aio_events; i-- > 0; ep++) {
                iocb  = ep->obj;
-               tiocb = (struct tiocb *)iocb->data;
+               tiocb = iocb->data;
                complete_tiocb(queue, tiocb, ep->res);
        }
 
@@ -221,65 +254,258 @@
        return split;
 }
 
-static void tapdisk_tiocb_event(event_id_t id, char mode, void *private);
+static const struct tio td_tio_rwio = {
+       .name        = "rwio",
+       .data_size   = 0,
+       .tio_setup   = NULL,
+       .tio_destroy = NULL,
+       .tio_submit  = tapdisk_rwio_submit
+};
+
+/*
+ * libaio
+ */
+
+struct lio {
+       io_context_t     aio_ctx;
+       struct io_event *aio_events;
+
+       int              poll_fd;
+       int              event_id;
+};
+
+static void
+tapdisk_lio_destroy(struct tqueue *queue)
+{
+       struct lio *lio = queue->tio_data;
+
+       if (!lio)
+               return;
+
+       if (lio->event_id >= 0) {
+               tapdisk_server_unregister_event(lio->event_id);
+               lio->event_id = -1;
+       }
+
+       if (lio->aio_ctx) {
+               io_destroy(lio->aio_ctx);
+               lio->aio_ctx = NULL;
+       }
+
+       if (lio->aio_events) {
+               free(lio->aio_events);
+               lio->aio_events = NULL;
+       }
+}
+
+static void
+tapdisk_lio_event(event_id_t id, char mode, void *private)
+{
+       struct tqueue *queue = private;
+       struct lio *lio;
+       int i, ret, split;
+       struct iocb *iocb;
+       struct tiocb *tiocb;
+       struct io_event *ep;
+
+       lio   = queue->tio_data;
+       ret   = io_getevents(lio->aio_ctx, 0,
+                            queue->size, lio->aio_events, NULL);
+       split = io_split(&queue->opioctx, lio->aio_events, ret);
+       tapdisk_filter_events(queue->filter, lio->aio_events, split);
+
+       DBG("events: %d, tiocbs: %d\n", ret, split);
+
+       queue->iocbs_pending  -= ret;
+       queue->tiocbs_pending -= split;
+
+       for (i = split, ep = lio->aio_events; i-- > 0; ep++) {
+               iocb  = ep->obj;
+               tiocb = iocb->data;
+               complete_tiocb(queue, tiocb, ep->res);
+       }
+
+       queue_deferred_tiocbs(queue);
+}
+
+static int
+tapdisk_lio_setup(struct tqueue *queue, int qlen)
+{
+       struct lio *lio = queue->tio_data;
+       size_t sz;
+       int err;
+
+       lio->event_id = -1;
+       lio->aio_ctx  = REQUEST_ASYNC_FD;
+
+       lio->poll_fd = io_setup(qlen, &lio->aio_ctx);
+       err = lio->poll_fd;
+       if (err < 0) {
+               lio->aio_ctx = NULL;
+
+               if (err == -EAGAIN)
+                       goto fail_rsv;
+
+               goto fail_fd;
+       }
+
+       lio->event_id =
+               tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
+                                             lio->poll_fd, 0,
+                                             tapdisk_lio_event,
+                                             queue);
+       err = lio->event_id;
+       if (err < 0)
+               goto fail;
+
+       lio->aio_events = calloc(qlen, sizeof(struct io_event));
+       if (!lio->aio_events) {
+               err = -errno;
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       tapdisk_lio_destroy(queue);
+       return err;
+
+fail_rsv:
+       DPRINTF("Couldn't setup AIO context. If you are trying to "
+               "concurrently use a large number of blktap-based disks, you may 
"
+               "need to increase the system-wide aio request limit. "
+               "(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
+       goto fail;
+
+fail_fd:
+       DPRINTF("Couldn't get fd for AIO poll support. This is probably "
+               "because your kernel does not have  the aio-poll patch "
+               "applied.\n");
+       goto fail;
+}
+
+static int
+tapdisk_lio_submit(struct tqueue *queue)
+{
+       struct lio *lio = queue->tio_data;
+       int merged, submitted, err = 0;
+
+       if (!queue->queued)
+               return 0;
+
+       tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
+       merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
+       submitted = io_submit(lio->aio_ctx, merged, queue->iocbs);
+
+       DBG("queued: %d, merged: %d, submitted: %d\n",
+           queue->queued, merged, submitted);
+
+       if (submitted < 0) {
+               err = submitted;
+               submitted = 0;
+       } else if (submitted < merged)
+               err = -EIO;
+
+       queue->iocbs_pending  += submitted;
+       queue->tiocbs_pending += queue->queued;
+       queue->queued          = 0;
+
+       if (err)
+               queue->tiocbs_pending -= 
+                       fail_tiocbs(queue, submitted, merged, err);
+
+       return submitted;
+}
+
+static const struct tio td_tio_lio = {
+       .name        = "lio",
+       .data_size   = sizeof(struct lio),
+       .tio_setup   = tapdisk_lio_setup,
+       .tio_destroy = tapdisk_lio_destroy,
+       .tio_submit  = tapdisk_lio_submit,
+};
+
+static void
+tapdisk_queue_free_io(struct tqueue *queue)
+{
+       if (queue->tio) {
+               if (queue->tio->tio_destroy)
+                       queue->tio->tio_destroy(queue);
+               queue->tio = NULL;
+       }
+
+       if (queue->tio_data) {
+               free(queue->tio_data);
+               queue->tio_data = NULL;
+       }
+}
+
+static int
+tapdisk_queue_init_io(struct tqueue *queue, int drv)
+{
+       const struct tio *tio;
+       int err;
+
+       switch (drv) {
+       case TIO_DRV_LIO:
+               tio = &td_tio_lio;
+               break;
+       case TIO_DRV_RWIO:
+               tio = &td_tio_rwio;
+               break;
+       default:
+               err = -EINVAL;
+               goto fail;
+       }
+
+       queue->tio_data = calloc(1, tio->data_size);
+       if (!queue->tio_data) {
+               PERROR("malloc(%zu)", tio->data_size);
+               err = -errno;
+               goto fail;
+       }
+
+       queue->tio = tio;
+
+       if (tio->tio_setup) {
+               err = tio->tio_setup(queue, queue->size);
+               if (err)
+                       goto fail;
+       }
+
+       DPRINTF("I/O queue driver: %s\n", tio->name);
+
+       return 0;
+
+fail:
+       tapdisk_queue_free_io(queue);
+       return err;
+}
 
 int
 tapdisk_init_queue(struct tqueue *queue, int size,
-                  int sync, struct tfilter *filter)
+                  int drv, struct tfilter *filter)
 {
        int i, err;
 
        memset(queue, 0, sizeof(struct tqueue));
 
        queue->size   = size;
-       queue->sync   = sync;
        queue->filter = filter;
 
-       queue->event   = -1;
-       queue->aio_ctx = NULL;
-
        if (!size)
                return 0;
 
-       if (!sync) {
-               queue->aio_ctx = REQUEST_ASYNC_FD;
-               queue->poll_fd = io_setup(size, &queue->aio_ctx);
-               err = queue->poll_fd;
-               if (err < 0) {
-                       if (err == -EAGAIN)
-                               DPRINTF("Couldn't setup AIO context.  If you "
-                                       "are trying to concurrently use a "
-                                       "large number of blktap-based disks, "
-                                       "you may need to increase the "
-                                       "system-wide aio request limit. "
-                                       "(e.g. 'echo 1048576 > /proc/sys/fs/"
-                                       "aio-max-nr')\n");
-                       else
-                               DPRINTF("Couldn't get fd for AIO poll "
-                                       "support.  This is probably because "
-                                       "your kernel does not have the "
-                                       "aio-poll patch applied.\n");
-                       queue->aio_ctx = NULL;
-                       goto fail;
-               }
+       err = tapdisk_queue_init_io(queue, drv);
+       if (err)
+               goto fail;
 
-               queue->event =
-                       tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
-                                                     queue->poll_fd, 0,
-                                                     tapdisk_tiocb_event,
-                                                     queue);
-               err = queue->event;
-               if (err < 0)
-                       goto fail;
-
+       queue->iocbs = calloc(size, sizeof(struct iocb *));
+       if (!queue->iocbs) {
+               err = -errno;
+               goto fail;
        }
 
-       err               = -ENOMEM;
-       queue->iocbs      = calloc(size, sizeof(struct iocb *));
-       queue->aio_events = calloc(size, sizeof(struct io_event));
-       if (!queue->iocbs || !queue->aio_events)
-               goto fail;
-
        err = opio_init(&queue->opioctx, size);
        if (err)
                goto fail;
@@ -294,22 +520,11 @@
 void
 tapdisk_free_queue(struct tqueue *queue)
 {
-       if (queue->event >= 0) {
-               tapdisk_server_unregister_event(queue->event);
-               queue->event = -1;
-       }
-
-       if (queue->aio_ctx) {
-               io_destroy(queue->aio_ctx);
-               queue->aio_ctx = NULL;
-       }
+       tapdisk_queue_free_io(queue);
 
        free(queue->iocbs);
        queue->iocbs = NULL;
 
-       free(queue->aio_events);
-       queue->aio_events = NULL;
-
        opio_free(&queue->opioctx);
 }
 
@@ -319,9 +534,9 @@
        struct tiocb *tiocb = queue->deferred.head;
 
        WARN("TAPDISK QUEUE:\n");
-       WARN("size: %d, sync: %d, queued: %d, iocbs_pending: %d, "
+       WARN("size: %d, tio: %s, queued: %d, iocbs_pending: %d, "
             "tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %"PRIx64"\n",
-            queue->size, queue->sync, queue->queued, queue->iocbs_pending,
+            queue->size, queue->tio->name, queue->queued, queue->iocbs_pending,
             queue->tiocbs_pending, queue->tiocbs_deferred, queue->deferrals);
 
        if (tiocb) {
@@ -362,42 +577,14 @@
                defer_tiocb(queue, tiocb);
 }
 
+
 /*
  * fail_tiocbs may queue more tiocbs
  */
 int
 tapdisk_submit_tiocbs(struct tqueue *queue)
 {
-       int merged, submitted, err = 0;
-
-       if (!queue->queued)
-               return 0;
-
-       if (queue->sync)
-               return io_synchronous_rw(queue);
-
-       tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
-       merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
-       submitted = io_submit(queue->aio_ctx, merged, queue->iocbs);
-
-       DBG("queued: %d, merged: %d, submitted: %d\n",
-           queue->queued, merged, submitted);
-
-       if (submitted < 0) {
-               err = submitted;
-               submitted = 0;
-       } else if (submitted < merged)
-               err = -EIO;
-
-       queue->iocbs_pending  += submitted;
-       queue->tiocbs_pending += queue->queued;
-       queue->queued          = 0;
-
-       if (err)
-               queue->tiocbs_pending -= 
-                       fail_tiocbs(queue, submitted, merged, err);
-
-       return submitted;
+       return queue->tio->tio_submit(queue);
 }
 
 int
@@ -412,40 +599,6 @@
        return submitted;
 }
 
-static void
-tapdisk_complete_tiocbs(struct tqueue *queue)
-{
-       int i, ret, split;
-       struct iocb *iocb;
-       struct tiocb *tiocb;
-       struct io_event *ep;
-
-       ret   = io_getevents(queue->aio_ctx, 0,
-                            queue->size, queue->aio_events, NULL);
-       split = io_split(&queue->opioctx, queue->aio_events, ret);
-       tapdisk_filter_events(queue->filter, queue->aio_events, split);
-
-       DBG("events: %d, tiocbs: %d\n", ret, split);
-
-       queue->iocbs_pending  -= ret;
-       queue->tiocbs_pending -= split;
-
-       for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
-               iocb  = ep->obj;
-               tiocb = (struct tiocb *)iocb->data;
-               complete_tiocb(queue, tiocb, ep->res);
-       }
-
-       queue_deferred_tiocbs(queue);
-}
-
-static void
-tapdisk_tiocb_event(event_id_t id, char mode, void *private)
-{
-       struct tqueue *queue = private;
-       tapdisk_complete_tiocbs(queue);
-}
-
 /*
  * cancel_tiocbs may queue more tiocbs
  */
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-queue.h
--- a/tools/blktap2/drivers/tapdisk-queue.h     Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-queue.h     Thu Jan 28 23:30:23 2010 -0800
@@ -55,16 +55,14 @@
 
 struct tqueue {
        int                   size;
-       int                   sync;
 
-       int                   poll_fd;
-       event_id_t            event;
-       io_context_t          aio_ctx;
+       const struct tio     *tio;
+       void                 *tio_data;
+
        struct opioctx        opioctx;
 
        int                   queued;
        struct iocb         **iocbs;
-       struct io_event      *aio_events;
 
        /* number of iocbs pending in the aio layer */
        int                   iocbs_pending;
@@ -86,6 +84,20 @@
        uint64_t              deferrals;
 };
 
+struct tio {
+       const char           *name;
+       size_t                data_size;
+
+       int  (*tio_setup)    (struct tqueue *queue, int qlen);
+       void (*tio_destroy)  (struct tqueue *queue);
+       int  (*tio_submit)   (struct tqueue *queue);
+};
+
+enum {
+       TIO_DRV_LIO     = 1,
+       TIO_DRV_RWIO    = 2,
+};
+
 /*
  * Interface for request producer (i.e., tapdisk)
  * NB: the following functions may cause additional tiocbs to be queued:
@@ -99,7 +111,7 @@
 #define tapdisk_queue_empty(q) ((q)->queued == 0)
 #define tapdisk_queue_full(q)  \
        (((q)->tiocbs_pending + (q)->queued) >= (q)->size)
-int tapdisk_init_queue(struct tqueue *, int size, int sync, struct tfilter *);
+int tapdisk_init_queue(struct tqueue *, int size, int drv, struct tfilter *);
 void tapdisk_free_queue(struct tqueue *);
 void tapdisk_debug_queue(struct tqueue *);
 void tapdisk_queue_tiocb(struct tqueue *, struct tiocb *);
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-server.c
--- a/tools/blktap2/drivers/tapdisk-server.c    Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-server.c    Thu Jan 28 23:30:23 2010 -0800
@@ -236,7 +236,8 @@
 static int
 tapdisk_server_init_aio(void)
 {
-       return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS, 0, NULL);
+       return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS,
+                                 TIO_DRV_LIO, NULL);
 }
 
 static void
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c       Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-vbd.c       Thu Jan 28 23:30:23 2010 -0800
@@ -1260,6 +1260,8 @@
        int n;
        td_ring_t *ring;
 
+       tapdisk_vbd_check_state(vbd);
+
        ring = &vbd->ring;
        if (!ring->sring)
                return 0;
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/include/blktaplib.h
--- a/tools/blktap2/include/blktaplib.h Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/include/blktaplib.h Thu Jan 28 23:30:23 2010 -0800
@@ -43,6 +43,7 @@
 #endif
 
 #define EPRINTF(_f, _a...) syslog(LOG_ERR, "tap-err:%s: " _f, __func__, ##_a)
+#define PERROR(_f, _a...)  EPRINTF(_f ": %s", ##_a, strerror(errno))
 
 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE)
 
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.