stubdom: add asynchronous disk flush support
Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
diff -r 4558664bea4a extras/mini-os/blkfront.c
--- a/extras/mini-os/blkfront.c Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/blkfront.c Sat Apr 05 13:25:43 2008 +0100
@@ -48,11 +48,7 @@ struct blkfront_dev {
char *nodename;
char *backend;
- unsigned sector_size;
- unsigned sectors;
- int mode;
- int barrier;
- int flush;
+ struct blkfront_info info;
#ifdef HAVE_LIBC
int fd;
@@ -70,7 +66,7 @@ void blkfront_handler(evtchn_port_t port
wake_up(&blkfront_queue);
}
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned
*sector_size, int *mode, int *info)
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info)
{
xenbus_transaction_t xbt;
char* err;
@@ -163,9 +159,9 @@ done:
return NULL;
}
if (*c == 'w')
- *mode = dev->mode = O_RDWR;
+ dev->info.mode = O_RDWR;
else
- *mode = dev->mode = O_RDONLY;
+ dev->info.mode = O_RDONLY;
free(c);
snprintf(path, sizeof(path), "%s/state", dev->backend);
@@ -177,24 +173,26 @@ done:
xenbus_unwatch_path(XBT_NIL, path);
snprintf(path, sizeof(path), "%s/info", dev->backend);
- *info = xenbus_read_integer(path);
+ dev->info.info = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/sectors", dev->backend);
// FIXME: read_integer returns an int, so disk size limited to 1TB for
now
- *sectors = dev->sectors = xenbus_read_integer(path);
+ dev->info.sectors = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
- *sector_size = dev->sector_size = xenbus_read_integer(path);
+ dev->info.sector_size = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
- dev->barrier = xenbus_read_integer(path);
+ dev->info.barrier = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
- dev->flush = xenbus_read_integer(path);
+ dev->info.flush = xenbus_read_integer(path);
+
+ *info = dev->info;
}
unmask_evtchn(dev->evtchn);
- printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+ printk("%u sectors of %u bytes\n", dev->info.sectors,
dev->info.sector_size);
printk("**************************\n");
return dev;
@@ -258,11 +256,11 @@ void blkfront_aio(struct blkfront_aiocb
uintptr_t start, end;
// Can't io at non-sector-aligned location
- ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+ ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
// Can't io non-sector-sized amounts
- ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+ ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
// Can't io non-sector-aligned buffer
- ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+ ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) &
PAGE_MASK;
@@ -280,7 +278,7 @@ void blkfront_aio(struct blkfront_aiocb
req->nr_segments = n;
req->handle = dev->handle;
req->id = (uintptr_t) aiocbp;
- req->sector_number = aiocbp->aio_offset / dev->sector_size;
+ req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
for (j = 0; j < n; j++) {
uintptr_t data = start + j * PAGE_SIZE;
@@ -292,10 +290,10 @@ void blkfront_aio(struct blkfront_aiocb
aiocbp->gref[j] = req->seg[j].gref =
gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
req->seg[j].first_sect = 0;
- req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
+ req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
}
- req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) /
dev->sector_size;
- req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf +
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+ req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) /
dev->info.sector_size;
+ req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf +
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
dev->ring.req_prod_pvt = i + 1;
@@ -313,6 +311,62 @@ void blkfront_aio_read(struct blkfront_a
void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
{
blkfront_aio(aiocbp, 0);
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op,
uint64_t id)
+{
+ int i;
+ struct blkif_request *req;
+ int notify;
+
+ blkfront_wait_slot(dev);
+ i = dev->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&dev->ring, i);
+ req->operation = op;
+ req->nr_segments = 0;
+ req->handle = dev->handle;
+ req->id = id;
+ /* Not needed anyway, but the backend will check it */
+ req->sector_number = 0;
+ dev->ring.req_prod_pvt = i + 1;
+ wmb();
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+ if (notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
+{
+ struct blkfront_dev *dev = aiocbp->aio_dev;
+ blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
+}
+
+void blkfront_sync(struct blkfront_dev *dev)
+{
+ unsigned long flags;
+
+ if (dev->info.mode == O_RDWR) {
+ if (dev->info.barrier == 1)
+ blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
+
+ if (dev->info.flush == 1)
+ blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
+ }
+
+ /* Note: This won't finish if another thread enqueues requests. */
+ local_irq_save(flags);
+ DEFINE_WAIT(w);
+ while (1) {
+ blkfront_aio_poll(dev);
+ if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+ break;
+
+ add_waiter(w, blkfront_queue);
+ local_irq_restore(flags);
+ schedule();
+ local_irq_save(flags);
+ }
+ remove_waiter(w);
+ local_irq_restore(flags);
}
int blkfront_aio_poll(struct blkfront_dev *dev)
@@ -337,93 +391,45 @@ moretodo:
rsp = RING_GET_RESPONSE(&dev->ring, cons);
nr_consumed++;
- if (rsp->status != BLKIF_RSP_OKAY)
- printk("block error %d for op %d\n", rsp->status, rsp->operation);
+ struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+ int status = rsp->status;
+
+ if (status != BLKIF_RSP_OKAY)
+ printk("block error %d for op %d\n", status, rsp->operation);
switch (rsp->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
{
- struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
- int status = rsp->status;
int j;
for (j = 0; j < aiocbp->n; j++)
gnttab_end_access(aiocbp->gref[j]);
- dev->ring.rsp_cons = ++cons;
- /* Nota: callback frees aiocbp itself */
- aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
- if (dev->ring.rsp_cons != cons)
- /* We reentered, we must not continue here */
- goto out;
break;
}
+
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ break;
+
default:
printk("unrecognized block operation %d response\n",
rsp->operation);
- case BLKIF_OP_WRITE_BARRIER:
- case BLKIF_OP_FLUSH_DISKCACHE:
- dev->ring.rsp_cons = ++cons;
+ }
+
+ dev->ring.rsp_cons = ++cons;
+ /* Nota: callback frees aiocbp itself */
+ if (aiocbp && aiocbp->aio_cb)
+ aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+ if (dev->ring.rsp_cons != cons)
+ /* We reentered, we must not continue here */
break;
- }
}
-out:
RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
if (more) goto moretodo;
return nr_consumed;
-}
-
-static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
-{
- int i;
- struct blkif_request *req;
- int notify;
-
- blkfront_wait_slot(dev);
- i = dev->ring.req_prod_pvt;
- req = RING_GET_REQUEST(&dev->ring, i);
- req->operation = op;
- req->nr_segments = 0;
- req->handle = dev->handle;
- /* Not used */
- req->id = 0;
- /* Not needed anyway, but the backend will check it */
- req->sector_number = 0;
- dev->ring.req_prod_pvt = i + 1;
- wmb();
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
- if (notify) notify_remote_via_evtchn(dev->evtchn);
-}
-
-void blkfront_sync(struct blkfront_dev *dev)
-{
- unsigned long flags;
-
- if (dev->mode == O_RDWR) {
- if (dev->barrier == 1)
- blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
-
- if (dev->flush == 1)
- blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
- }
-
- /* Note: This won't finish if another thread enqueues requests. */
- local_irq_save(flags);
- DEFINE_WAIT(w);
- while (1) {
- blkfront_aio_poll(dev);
- if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
- break;
-
- add_waiter(w, blkfront_queue);
- local_irq_restore(flags);
- schedule();
- local_irq_save(flags);
- }
- remove_waiter(w);
- local_irq_restore(flags);
}
#ifdef HAVE_LIBC
diff -r 4558664bea4a extras/mini-os/include/blkfront.h
--- a/extras/mini-os/include/blkfront.h Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/include/blkfront.h Sat Apr 05 13:25:43 2008 +0100
@@ -15,13 +15,23 @@ struct blkfront_aiocb
void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
};
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned
*sector_size, int *mode, int *info);
+struct blkfront_info
+{
+ uint64_t sectors;
+ unsigned sector_size;
+ int mode;
+ int info;
+ int barrier;
+ int flush;
+};
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info);
#ifdef HAVE_LIBC
int blkfront_open(struct blkfront_dev *dev);
#endif
void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op);
int blkfront_aio_poll(struct blkfront_dev *dev);
void blkfront_sync(struct blkfront_dev *dev);
void shutdown_blkfront(struct blkfront_dev *dev);
diff -r 4558664bea4a extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/kernel.c Sat Apr 05 13:25:43 2008 +0100
@@ -91,9 +91,7 @@ static void netfront_thread(void *p)
}
static struct blkfront_dev *blk_dev;
-static uint64_t blk_sectors;
-static unsigned blk_sector_size;
-static int blk_mode;
+static struct blkfront_info blk_info;
static uint64_t blk_size_read;
static uint64_t blk_size_write;
@@ -111,9 +109,9 @@ static struct blk_req *blk_alloc_req(uin
{
struct blk_req *req = xmalloc(struct blk_req);
req->aiocb.aio_dev = blk_dev;
- req->aiocb.aio_buf = _xmalloc(blk_sector_size, blk_sector_size);
- req->aiocb.aio_nbytes = blk_sector_size;
- req->aiocb.aio_offset = sector * blk_sector_size;
+ req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size);
+ req->aiocb.aio_nbytes = blk_info.sector_size;
+ req->aiocb.aio_offset = sector * blk_info.sector_size;
req->aiocb.data = req;
req->next = NULL;
return req;
@@ -125,7 +123,7 @@ static void blk_read_completed(struct bl
if (ret)
printk("got error code %d when reading at offset %ld\n", ret,
aiocb->aio_offset);
else
- blk_size_read += blk_sector_size;
+ blk_size_read += blk_info.sector_size;
free(aiocb->aio_buf);
free(req);
}
@@ -154,10 +152,10 @@ static void blk_write_read_completed(str
free(req);
return;
}
- blk_size_read += blk_sector_size;
+ blk_size_read += blk_info.sector_size;
buf = (int*) aiocb->aio_buf;
rand_value = req->rand_value;
- for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+ for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
if (buf[i] != rand_value) {
printk("bogus data at offset %ld\n", aiocb->aio_offset + i);
break;
@@ -177,7 +175,7 @@ static void blk_write_completed(struct b
free(req);
return;
}
- blk_size_write += blk_sector_size;
+ blk_size_write += blk_info.sector_size;
/* Push write check */
req->next = blk_to_read;
blk_to_read = req;
@@ -195,7 +193,7 @@ static void blk_write_sector(uint64_t se
req->rand_value = rand_value = rand();
buf = (int*) req->aiocb.aio_buf;
- for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+ for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
buf[i] = rand_value;
rand_value *= RAND_MIX;
}
@@ -207,35 +205,34 @@ static void blkfront_thread(void *p)
static void blkfront_thread(void *p)
{
time_t lasttime = 0;
- int blk_info;
- blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode,
&blk_info);
+ blk_dev = init_blkfront(NULL, &blk_info);
if (!blk_dev)
return;
- if (blk_info & VDISK_CDROM)
+ if (blk_info.info & VDISK_CDROM)
printk("Block device is a CDROM\n");
- if (blk_info & VDISK_REMOVABLE)
+ if (blk_info.info & VDISK_REMOVABLE)
printk("Block device is removable\n");
- if (blk_info & VDISK_READONLY)
+ if (blk_info.info & VDISK_READONLY)
printk("Block device is read-only\n");
#ifdef BLKTEST_WRITE
- if (blk_mode == O_RDWR) {
+ if (blk_info.mode == O_RDWR) {
blk_write_sector(0);
- blk_write_sector(blk_sectors-1);
+ blk_write_sector(blk_info.sectors-1);
} else
#endif
{
blk_read_sector(0);
- blk_read_sector(blk_sectors-1);
+ blk_read_sector(blk_info.sectors-1);
}
while (1) {
- uint64_t sector = rand() % blk_sectors;
+ uint64_t sector = rand() % blk_info.sectors;
struct timeval tv;
#ifdef BLKTEST_WRITE
- if (blk_mode == O_RDWR)
+ if (blk_info.mode == O_RDWR)
blk_write_sector(sector);
else
#endif
diff -r 4558664bea4a tools/ioemu/block-vbd.c
--- a/tools/ioemu/block-vbd.c Fri Apr 04 16:07:44 2008 +0100
+++ b/tools/ioemu/block-vbd.c Sat Apr 05 13:25:43 2008 +0100
@@ -49,11 +49,7 @@ typedef struct BDRVVbdState {
typedef struct BDRVVbdState {
struct blkfront_dev *dev;
int fd;
- int type;
- int mode;
- int info;
- uint64_t sectors;
- unsigned sector_size;
+ struct blkfront_info info;
QEMU_LIST_ENTRY(BDRVVbdState) list;
} BDRVVbdState;
@@ -81,13 +77,13 @@ static int vbd_open(BlockDriverState *bs
//handy to test posix access
//return -EIO;
- s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size,
&s->mode, &s->info);
+ s->dev = init_blkfront((char *) filename, &s->info);
if (!s->dev)
return -EIO;
- if (SECTOR_SIZE % s->sector_size) {
- printf("sector size is %d, we only support sector sizes that divide
%d\n", s->sector_size, SECTOR_SIZE);
+ if (SECTOR_SIZE % s->info.sector_size) {
+ printf("sector size is %d, we only support sector sizes that divide
%d\n", s->info.sector_size, SECTOR_SIZE);
return -EIO;
}
@@ -267,6 +263,32 @@ static void vbd_aio_cancel(BlockDriverAI
// Try to cancel. If can't, wait for it, drop the callback and call
qemu_aio_release(acb)
}
+static void vbd_nop_cb(void *opaque, int ret)
+{
+}
+
+static BlockDriverAIOCB *vbd_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVVbdState *s = bs->opaque;
+ VbdAIOCB *acb = NULL;
+
+ if (s->info.barrier == 1) {
+ acb = vbd_aio_setup(bs, 0, NULL, 0,
+ s->info.flush == 1 ? vbd_nop_cb : cb, opaque);
+ if (!acb)
+ return NULL;
+ blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_WRITE_BARRIER);
+ }
+ if (s->info.flush == 1) {
+ acb = vbd_aio_setup(bs, 0, NULL, 0, cb, opaque);
+ if (!acb)
+ return NULL;
+ blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_FLUSH_DISKCACHE);
+ }
+ return &acb->common;
+}
+
static void vbd_close(BlockDriverState *bs)
{
BDRVVbdState *s = bs->opaque;
@@ -282,13 +304,14 @@ static int64_t vbd_getlength(BlockDrive
static int64_t vbd_getlength(BlockDriverState *bs)
{
BDRVVbdState *s = bs->opaque;
- return s->sectors * s->sector_size;
+ return s->info.sectors * s->info.sector_size;
}
-static void vbd_flush(BlockDriverState *bs)
+static int vbd_flush(BlockDriverState *bs)
{
BDRVVbdState *s = bs->opaque;
blkfront_sync(s->dev);
+ return 0;
}
/***********************************************/
@@ -333,6 +356,7 @@ BlockDriver bdrv_vbd = {
.bdrv_aio_read = vbd_aio_read,
.bdrv_aio_write = vbd_aio_write,
.bdrv_aio_cancel = vbd_aio_cancel,
+ .bdrv_aio_flush = vbd_aio_flush,
.aiocb_size = sizeof(VbdAIOCB),
.bdrv_read = vbd_read,
.bdrv_write = vbd_write,
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|