WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] stubdom: add asynchronous disk flush supp

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] stubdom: add asynchronous disk flush support
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 08 Apr 2008 02:01:44 -0700
Delivery-date: Tue, 08 Apr 2008 02:05:53 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1207430425 -3600
# Node ID 6bf674bd386de8c3e4acd259c566aaf7206d6f11
# Parent  e0f9bee70cbfec0a14a3416beaadba9768f18eb6
stubdom: add asynchronous disk flush support

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
---
 extras/mini-os/blkfront.c         |  188 +++++++++++++++++++-------------------
 extras/mini-os/include/blkfront.h |   12 ++
 extras/mini-os/kernel.c           |   41 +++-----
 tools/ioemu/block-vbd.c           |   48 +++++++--
 4 files changed, 163 insertions(+), 126 deletions(-)

diff -r e0f9bee70cbf -r 6bf674bd386d extras/mini-os/blkfront.c
--- a/extras/mini-os/blkfront.c Sat Apr 05 22:19:02 2008 +0100
+++ b/extras/mini-os/blkfront.c Sat Apr 05 22:20:25 2008 +0100
@@ -48,11 +48,7 @@ struct blkfront_dev {
 
     char *nodename;
     char *backend;
-    unsigned sector_size;
-    unsigned sectors;
-    int mode;
-    int barrier;
-    int flush;
+    struct blkfront_info info;
 
 #ifdef HAVE_LIBC
     int fd;
@@ -70,7 +66,7 @@ void blkfront_handler(evtchn_port_t port
     wake_up(&blkfront_queue);
 }
 
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode, int *info)
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info)
 {
     xenbus_transaction_t xbt;
     char* err;
@@ -163,9 +159,9 @@ done:
             return NULL;
         }
         if (*c == 'w')
-            *mode = dev->mode = O_RDWR;
+            dev->info.mode = O_RDWR;
         else
-            *mode = dev->mode = O_RDONLY;
+            dev->info.mode = O_RDONLY;
         free(c);
 
         snprintf(path, sizeof(path), "%s/state", dev->backend);
@@ -177,24 +173,26 @@ done:
         xenbus_unwatch_path(XBT_NIL, path);
 
         snprintf(path, sizeof(path), "%s/info", dev->backend);
-        *info = xenbus_read_integer(path);
+        dev->info.info = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/sectors", dev->backend);
         // FIXME: read_integer returns an int, so disk size limited to 1TB for 
now
-        *sectors = dev->sectors = xenbus_read_integer(path);
+        dev->info.sectors = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
-        *sector_size = dev->sector_size = xenbus_read_integer(path);
+        dev->info.sector_size = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
-        dev->barrier = xenbus_read_integer(path);
+        dev->info.barrier = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
-        dev->flush = xenbus_read_integer(path);
+        dev->info.flush = xenbus_read_integer(path);
+
+        *info = dev->info;
     }
     unmask_evtchn(dev->evtchn);
 
-    printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+    printk("%u sectors of %u bytes\n", dev->info.sectors, 
dev->info.sector_size);
     printk("**************************\n");
 
     return dev;
@@ -258,11 +256,11 @@ void blkfront_aio(struct blkfront_aiocb 
     uintptr_t start, end;
 
     // Can't io at non-sector-aligned location
-    ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+    ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
     // Can't io non-sector-sized amounts
-    ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+    ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
     // Can't io non-sector-aligned buffer
-    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
 
     start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
     end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & 
PAGE_MASK;
@@ -280,7 +278,7 @@ void blkfront_aio(struct blkfront_aiocb 
     req->nr_segments = n;
     req->handle = dev->handle;
     req->id = (uintptr_t) aiocbp;
-    req->sector_number = aiocbp->aio_offset / dev->sector_size;
+    req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
 
     for (j = 0; j < n; j++) {
        uintptr_t data = start + j * PAGE_SIZE;
@@ -292,10 +290,10 @@ void blkfront_aio(struct blkfront_aiocb 
        aiocbp->gref[j] = req->seg[j].gref =
             gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
        req->seg[j].first_sect = 0;
-       req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
-    }
-    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 
dev->sector_size;
-    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + 
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+       req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
+    }
+    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 
dev->info.sector_size;
+    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + 
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
 
     dev->ring.req_prod_pvt = i + 1;
 
@@ -315,67 +313,7 @@ void blkfront_aio_read(struct blkfront_a
     blkfront_aio(aiocbp, 0);
 }
 
-int blkfront_aio_poll(struct blkfront_dev *dev)
-{
-    RING_IDX rp, cons;
-    struct blkif_response *rsp;
-    int more;
-
-moretodo:
-#ifdef HAVE_LIBC
-    files[dev->fd].read = 0;
-    mb(); /* Make sure to let the handler set read to 1 before we start 
looking at the ring */
-#endif
-
-    rp = dev->ring.sring->rsp_prod;
-    rmb(); /* Ensure we see queued responses up to 'rp'. */
-    cons = dev->ring.rsp_cons;
-
-    int nr_consumed = 0;
-    while ((cons != rp))
-    {
-       rsp = RING_GET_RESPONSE(&dev->ring, cons);
-       nr_consumed++;
-
-        if (rsp->status != BLKIF_RSP_OKAY)
-            printk("block error %d for op %d\n", rsp->status, rsp->operation);
-
-        switch (rsp->operation) {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-        {
-            struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
-            int status = rsp->status;
-            int j;
-
-            for (j = 0; j < aiocbp->n; j++)
-                gnttab_end_access(aiocbp->gref[j]);
-
-            dev->ring.rsp_cons = ++cons;
-            /* Nota: callback frees aiocbp itself */
-            aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
-            if (dev->ring.rsp_cons != cons)
-                /* We reentered, we must not continue here */
-                goto out;
-            break;
-        }
-        default:
-            printk("unrecognized block operation %d response\n", 
rsp->operation);
-        case BLKIF_OP_WRITE_BARRIER:
-        case BLKIF_OP_FLUSH_DISKCACHE:
-            dev->ring.rsp_cons = ++cons;
-            break;
-        }
-    }
-
-out:
-    RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
-    if (more) goto moretodo;
-
-    return nr_consumed;
-}
-
-static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, 
uint64_t id)
 {
     int i;
     struct blkif_request *req;
@@ -387,8 +325,7 @@ static void blkfront_push_operation(stru
     req->operation = op;
     req->nr_segments = 0;
     req->handle = dev->handle;
-    /* Not used */
-    req->id = 0;
+    req->id = id;
     /* Not needed anyway, but the backend will check it */
     req->sector_number = 0;
     dev->ring.req_prod_pvt = i + 1;
@@ -397,16 +334,22 @@ static void blkfront_push_operation(stru
     if (notify) notify_remote_via_evtchn(dev->evtchn);
 }
 
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
+{
+    struct blkfront_dev *dev = aiocbp->aio_dev;
+    blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
+}
+
 void blkfront_sync(struct blkfront_dev *dev)
 {
     unsigned long flags;
 
-    if (dev->mode == O_RDWR) {
-        if (dev->barrier == 1)
-            blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
-
-        if (dev->flush == 1)
-            blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
+    if (dev->info.mode == O_RDWR) {
+        if (dev->info.barrier == 1)
+            blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
+
+        if (dev->info.flush == 1)
+            blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
     }
 
     /* Note: This won't finish if another thread enqueues requests.  */
@@ -426,6 +369,69 @@ void blkfront_sync(struct blkfront_dev *
     local_irq_restore(flags);
 }
 
+int blkfront_aio_poll(struct blkfront_dev *dev)
+{
+    RING_IDX rp, cons;
+    struct blkif_response *rsp;
+    int more;
+
+moretodo:
+#ifdef HAVE_LIBC
+    files[dev->fd].read = 0;
+    mb(); /* Make sure to let the handler set read to 1 before we start 
looking at the ring */
+#endif
+
+    rp = dev->ring.sring->rsp_prod;
+    rmb(); /* Ensure we see queued responses up to 'rp'. */
+    cons = dev->ring.rsp_cons;
+
+    int nr_consumed = 0;
+    while ((cons != rp))
+    {
+       rsp = RING_GET_RESPONSE(&dev->ring, cons);
+       nr_consumed++;
+
+        struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+        int status = rsp->status;
+
+        if (status != BLKIF_RSP_OKAY)
+            printk("block error %d for op %d\n", status, rsp->operation);
+
+        switch (rsp->operation) {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+        {
+            int j;
+
+            for (j = 0; j < aiocbp->n; j++)
+                gnttab_end_access(aiocbp->gref[j]);
+
+            break;
+        }
+
+        case BLKIF_OP_WRITE_BARRIER:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            break;
+
+        default:
+            printk("unrecognized block operation %d response\n", 
rsp->operation);
+        }
+
+        dev->ring.rsp_cons = ++cons;
+        /* Nota: callback frees aiocbp itself */
+        if (aiocbp && aiocbp->aio_cb)
+            aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+        if (dev->ring.rsp_cons != cons)
+            /* We reentered, we must not continue here */
+            break;
+    }
+
+    RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
+    if (more) goto moretodo;
+
+    return nr_consumed;
+}
+
 #ifdef HAVE_LIBC
 int blkfront_open(struct blkfront_dev *dev)
 {
diff -r e0f9bee70cbf -r 6bf674bd386d extras/mini-os/include/blkfront.h
--- a/extras/mini-os/include/blkfront.h Sat Apr 05 22:19:02 2008 +0100
+++ b/extras/mini-os/include/blkfront.h Sat Apr 05 22:20:25 2008 +0100
@@ -15,13 +15,23 @@ struct blkfront_aiocb
 
     void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
 };
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode, int *info);
+struct blkfront_info
+{
+    uint64_t sectors;
+    unsigned sector_size;
+    int mode;
+    int info;
+    int barrier;
+    int flush;
+};
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info);
 #ifdef HAVE_LIBC
 int blkfront_open(struct blkfront_dev *dev);
 #endif
 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
 void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
 void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op);
 int blkfront_aio_poll(struct blkfront_dev *dev);
 void blkfront_sync(struct blkfront_dev *dev);
 void shutdown_blkfront(struct blkfront_dev *dev);
diff -r e0f9bee70cbf -r 6bf674bd386d extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Sat Apr 05 22:19:02 2008 +0100
+++ b/extras/mini-os/kernel.c   Sat Apr 05 22:20:25 2008 +0100
@@ -91,9 +91,7 @@ static void netfront_thread(void *p)
 }
 
 static struct blkfront_dev *blk_dev;
-static uint64_t blk_sectors;
-static unsigned blk_sector_size;
-static int blk_mode;
+static struct blkfront_info blk_info;
 static uint64_t blk_size_read;
 static uint64_t blk_size_write;
 
@@ -111,9 +109,9 @@ static struct blk_req *blk_alloc_req(uin
 {
     struct blk_req *req = xmalloc(struct blk_req);
     req->aiocb.aio_dev = blk_dev;
-    req->aiocb.aio_buf = _xmalloc(blk_sector_size, blk_sector_size);
-    req->aiocb.aio_nbytes = blk_sector_size;
-    req->aiocb.aio_offset = sector * blk_sector_size;
+    req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size);
+    req->aiocb.aio_nbytes = blk_info.sector_size;
+    req->aiocb.aio_offset = sector * blk_info.sector_size;
     req->aiocb.data = req;
     req->next = NULL;
     return req;
@@ -125,7 +123,7 @@ static void blk_read_completed(struct bl
     if (ret)
         printk("got error code %d when reading at offset %ld\n", ret, 
aiocb->aio_offset);
     else
-        blk_size_read += blk_sector_size;
+        blk_size_read += blk_info.sector_size;
     free(aiocb->aio_buf);
     free(req);
 }
@@ -154,10 +152,10 @@ static void blk_write_read_completed(str
         free(req);
         return;
     }
-    blk_size_read += blk_sector_size;
+    blk_size_read += blk_info.sector_size;
     buf = (int*) aiocb->aio_buf;
     rand_value = req->rand_value;
-    for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+    for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
         if (buf[i] != rand_value) {
             printk("bogus data at offset %ld\n", aiocb->aio_offset + i);
             break;
@@ -177,7 +175,7 @@ static void blk_write_completed(struct b
         free(req);
         return;
     }
-    blk_size_write += blk_sector_size;
+    blk_size_write += blk_info.sector_size;
     /* Push write check */
     req->next = blk_to_read;
     blk_to_read = req;
@@ -195,7 +193,7 @@ static void blk_write_sector(uint64_t se
     req->rand_value = rand_value = rand();
 
     buf = (int*) req->aiocb.aio_buf;
-    for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+    for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
         buf[i] = rand_value;
         rand_value *= RAND_MIX;
     }
@@ -207,35 +205,34 @@ static void blkfront_thread(void *p)
 static void blkfront_thread(void *p)
 {
     time_t lasttime = 0;
-    int blk_info;
-
-    blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode, 
&blk_info);
+
+    blk_dev = init_blkfront(NULL, &blk_info);
     if (!blk_dev)
         return;
 
-    if (blk_info & VDISK_CDROM)
+    if (blk_info.info & VDISK_CDROM)
         printk("Block device is a CDROM\n");
-    if (blk_info & VDISK_REMOVABLE)
+    if (blk_info.info & VDISK_REMOVABLE)
         printk("Block device is removable\n");
-    if (blk_info & VDISK_READONLY)
+    if (blk_info.info & VDISK_READONLY)
         printk("Block device is read-only\n");
 
 #ifdef BLKTEST_WRITE
-    if (blk_mode == O_RDWR) {
+    if (blk_info.mode == O_RDWR) {
         blk_write_sector(0);
-        blk_write_sector(blk_sectors-1);
+        blk_write_sector(blk_info.sectors-1);
     } else
 #endif
     {
         blk_read_sector(0);
-        blk_read_sector(blk_sectors-1);
+        blk_read_sector(blk_info.sectors-1);
     }
 
     while (1) {
-        uint64_t sector = rand() % blk_sectors;
+        uint64_t sector = rand() % blk_info.sectors;
         struct timeval tv;
 #ifdef BLKTEST_WRITE
-        if (blk_mode == O_RDWR)
+        if (blk_info.mode == O_RDWR)
             blk_write_sector(sector);
         else
 #endif
diff -r e0f9bee70cbf -r 6bf674bd386d tools/ioemu/block-vbd.c
--- a/tools/ioemu/block-vbd.c   Sat Apr 05 22:19:02 2008 +0100
+++ b/tools/ioemu/block-vbd.c   Sat Apr 05 22:20:25 2008 +0100
@@ -49,11 +49,7 @@ typedef struct BDRVVbdState {
 typedef struct BDRVVbdState {
     struct blkfront_dev *dev;
     int fd;
-    int type;
-    int mode;
-    int info;
-    uint64_t sectors;
-    unsigned sector_size;
+    struct blkfront_info info;
     QEMU_LIST_ENTRY(BDRVVbdState) list;
 } BDRVVbdState;
 
@@ -81,13 +77,13 @@ static int vbd_open(BlockDriverState *bs
     //handy to test posix access
     //return -EIO;
 
-    s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size, 
&s->mode, &s->info);
+    s->dev = init_blkfront((char *) filename, &s->info);
 
     if (!s->dev)
        return -EIO;
 
-    if (SECTOR_SIZE % s->sector_size) {
-       printf("sector size is %d, we only support sector sizes that divide 
%d\n", s->sector_size, SECTOR_SIZE);
+    if (SECTOR_SIZE % s->info.sector_size) {
+       printf("sector size is %d, we only support sector sizes that divide 
%d\n", s->info.sector_size, SECTOR_SIZE);
        return -EIO;
     }
 
@@ -267,6 +263,32 @@ static void vbd_aio_cancel(BlockDriverAI
     // Try to cancel. If can't, wait for it, drop the callback and call 
qemu_aio_release(acb)
 }
 
+static void vbd_nop_cb(void *opaque, int ret)
+{
+}
+
+static BlockDriverAIOCB *vbd_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVVbdState *s = bs->opaque;
+    VbdAIOCB *acb = NULL;
+
+    if (s->info.barrier == 1) {
+        acb = vbd_aio_setup(bs, 0, NULL, 0,
+                s->info.flush == 1 ? vbd_nop_cb : cb, opaque);
+        if (!acb)
+            return NULL;
+        blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_WRITE_BARRIER);
+    }
+    if (s->info.flush == 1) {
+        acb = vbd_aio_setup(bs, 0, NULL, 0, cb, opaque);
+        if (!acb)
+            return NULL;
+        blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_FLUSH_DISKCACHE);
+    }
+    return &acb->common;
+}
+
 static void vbd_close(BlockDriverState *bs)
 {
     BDRVVbdState *s = bs->opaque;
@@ -282,13 +304,14 @@ static int64_t  vbd_getlength(BlockDrive
 static int64_t  vbd_getlength(BlockDriverState *bs)
 {
     BDRVVbdState *s = bs->opaque;
-    return s->sectors * s->sector_size;
-}
-
-static void vbd_flush(BlockDriverState *bs)
+    return s->info.sectors * s->info.sector_size;
+}
+
+static int vbd_flush(BlockDriverState *bs)
 {
     BDRVVbdState *s = bs->opaque;
     blkfront_sync(s->dev);
+    return 0;
 }
 
 /***********************************************/
@@ -333,6 +356,7 @@ BlockDriver bdrv_vbd = {
     .bdrv_aio_read = vbd_aio_read,
     .bdrv_aio_write = vbd_aio_write,
     .bdrv_aio_cancel = vbd_aio_cancel,
+    .bdrv_aio_flush = vbd_aio_flush,
     .aiocb_size = sizeof(VbdAIOCB),
     .bdrv_read = vbd_read,
     .bdrv_write = vbd_write,

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] stubdom: add asynchronous disk flush support, Xen patchbot-unstable <=