diff -r 3c4c3d48a835 tools/blktap/drivers/block-aio.c --- a/tools/blktap/drivers/block-aio.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/block-aio.c Wed Nov 03 22:43:16 2010 +0800 @@ -44,7 +44,9 @@ #include "tapdisk.h" #include "tapaio.h" #include "blk.h" - +#include +#include + #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) /* *BSD has no O_LARGEFILE */ @@ -245,6 +247,70 @@ return -EINVAL; } +static inline void kick_responses(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) + { + RING_PUSH_RESPONSES(&info->fe_ring); + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); + } +} + +static void io_done(struct disk_driver *dd, int sid) +{ + struct tap_disk *drv = dd->drv; + + if (sid > MAX_IOFD || drv->td_do_callbacks(dd, sid) > 0) + kick_responses(dd->td_state); + + return; +} +/* tdaio_flush waits for all previous aio requests completed, then does fsync(). +* If file system implements .aio_fsync, we can use aio_fsync directly. But +* currently no file system implements .aio_fsync. +*/ +int tdaio_flush(struct disk_driver *dd) +{ + struct tdaio_state *prv = (struct tdaio_state *)dd->private; + int fd = prv->fd; + int ret; + fd_set readfds; + int maxfds; + + /*submit previous aio requests*/ + dd->early += dd->drv->td_submit(dd); + if (dd->early > 0) { + io_done(dd, MAX_IOFD + 1); + dd->early = 0; + } + + if ( prv->aio.iocb_free_count < MAX_AIO_REQS ){ + /* wait until previous aio requests completed */ + while(1){ + FD_ZERO(&readfds); + FD_SET(dd->io_fd[READ],&readfds); + maxfds = dd->io_fd[READ]; + + /*Wait for incoming messages*/ + ret = select(maxfds + 1, &readfds, (fd_set *) 0, + (fd_set *) 0, NULL); + if ( ret > 0 ){ + if(dd->io_fd[READ] && FD_ISSET(dd->io_fd[READ],&readfds)) + io_done(dd,READ); + } + + if(prv->aio.iocb_free_count >= MAX_AIO_REQS) + break; + } + } + + /* flush data from cache to disk */ + ret = fsync(fd); + return ret; +} + struct tap_disk tapdisk_aio = { .disk_type = "tapdisk_aio", .private_data_size = sizeof(struct tdaio_state), @@ -255,5 +321,6 @@ .td_close = tdaio_close, .td_do_callbacks = tdaio_do_callbacks, .td_get_parent_id = tdaio_get_parent_id, - .td_validate_parent = tdaio_validate_parent + .td_validate_parent = tdaio_validate_parent, + .td_flush = tdaio_flush }; diff -r 3c4c3d48a835 tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/block-qcow.c Wed Nov 03 22:43:16 2010 +0800 @@ -1420,6 +1420,70 @@ return 0; } +static inline void kick_responses(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) + { + RING_PUSH_RESPONSES(&info->fe_ring); + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); + } +} + +static void io_done(struct disk_driver *dd, int sid) +{ + struct tap_disk *drv = dd->drv; + + if (sid > MAX_IOFD || drv->td_do_callbacks(dd, sid) > 0) + kick_responses(dd->td_state); + + return; +} +/* As tdaio_flush, tdqcow_flush waits for all previous aio requests completed, +* then does fsync(). If file system implements .aio_fsync, we can use it. But +* currently no file system implements .aio_fsync. +*/ +int tdqcow_flush(struct disk_driver *dd) +{ + struct tdqcow_state *prv = (struct tdqcow_state *)dd->private; + int fd = prv->fd; + int ret; + fd_set readfds; + int maxfds; + + /*submit previous aio requests*/ + dd->early += dd->drv->td_submit(dd); + if (dd->early > 0) { + io_done(dd, MAX_IOFD + 1); + dd->early = 0; + } + + if ( prv->aio.iocb_free_count < prv->aio.max_aio_reqs ){ + /* wait until previous aio requests completed */ + while(1){ + FD_ZERO(&readfds); + FD_SET(dd->io_fd[READ],&readfds); + maxfds = dd->io_fd[READ]; + + /*Wait for incoming messages*/ + ret = select(maxfds + 1, &readfds, (fd_set *) 0, + (fd_set *) 0, NULL); + if ( ret > 0 ){ + if(dd->io_fd[READ] && FD_ISSET(dd->io_fd[READ],&readfds)) + io_done(dd,READ); + } + + if(prv->aio.iocb_free_count >= prv->aio.max_aio_reqs) + break; + } + } + + /* flush data from cache to disk */ + ret = fsync(fd); + return ret; +} + struct tap_disk tapdisk_qcow = { .disk_type = "tapdisk_qcow", .private_data_size = sizeof(struct tdqcow_state), @@ -1430,5 +1494,6 @@ .td_close = tdqcow_close, .td_do_callbacks = tdqcow_do_callbacks, .td_get_parent_id = tdqcow_get_parent_id, - .td_validate_parent = tdqcow_validate_parent + .td_validate_parent = tdqcow_validate_parent, + .td_flush = tdqcow_flush }; diff -r 3c4c3d48a835 tools/blktap/drivers/block-qcow2.c --- a/tools/blktap/drivers/block-qcow2.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/block-qcow2.c Wed Nov 03 22:43:16 2010 +0800 @@ -31,6 +31,7 @@ #include #include #include +#include #include "tapdisk.h" #include "tapaio.h" @@ -2077,7 +2078,80 @@ return ret; } +#ifdef USE_AIO +static inline void kick_responses(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) + { + RING_PUSH_RESPONSES(&info->fe_ring); + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); + } +} + +static void io_done(struct disk_driver *dd, int sid) +{ + struct tap_disk *drv = dd->drv; + + if (sid > MAX_IOFD || drv->td_do_callbacks(dd, sid) > 0) + kick_responses(dd->td_state); + + return; +} +/* As tdaio_flush, tdqcow_flush waits for all previous aio requests completed, +* then does fsync(). If file system implements .aio_fsync, we can use it. But +* currently no file system implements .aio_fsync. +*/ +int qcow_aio_flush(struct disk_driver *dd) +{ + BDRVQcowState *prv = dd->private; + int fd = prv->fd; + int ret; + fd_set readfds; + int maxfds; + + /*submit previous aio requests*/ + dd->early += dd->drv->td_submit(dd); + if (dd->early > 0) { + io_done(dd, MAX_IOFD + 1); + dd->early = 0; + } + + if ( prv->async.iocb_free_count < prv->async.max_aio_reqs ){ + /* wait until previous aio requests completed */ + while(1){ + FD_ZERO(&readfds); + FD_SET(dd->io_fd[READ],&readfds); + maxfds = dd->io_fd[READ]; + + /*Wait for incoming messages*/ + ret = select(maxfds + 1, &readfds, (fd_set *) 0, + (fd_set *) 0, NULL); + if ( ret > 0 ){ + if(dd->io_fd[READ] && FD_ISSET(dd->io_fd[READ],&readfds)) + io_done(dd,READ); + } + + if(prv->async.iocb_free_count >= prv->async.max_aio_reqs) + break; + } + } + + /* flush data from cache to disk */ + ret = fsync(fd); + return ret; +} +#endif +int qcow_flush(struct disk_driver *dd) +{ + BDRVQcowState *prv = dd->private; + int fd = prv->fd; + int ret; + + ret = fsync(fd); + return ret; +} struct tap_disk tapdisk_qcow2 = { "qcow2", @@ -2094,5 +2168,10 @@ qcow_close, qcow_do_callbacks, qcow_get_parent_id, - qcow_validate_parent + qcow_validate_parent, +#ifdef USE_AIO + qcow_aio_flush +#else + qcow_flush +#endif }; diff -r 3c4c3d48a835 tools/blktap/drivers/block-ram.c --- a/tools/blktap/drivers/block-ram.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/block-ram.c Wed Nov 03 22:43:16 2010 +0800 @@ -281,6 +281,16 @@ return -EINVAL; } +int tdram_flush(struct disk_driver *dd) +{ + struct tdram_state *prv = (struct tdram_state *)dd->private; + int fd = prv->fd; + int ret; + + ret = fsync(fd); + return ret; +} + struct tap_disk tapdisk_ram = { .disk_type = "tapdisk_ram", .private_data_size = sizeof(struct tdram_state), @@ -291,5 +301,6 @@ .td_close = tdram_close, .td_do_callbacks = tdram_do_callbacks, .td_get_parent_id = tdram_get_parent_id, - .td_validate_parent = tdram_validate_parent + .td_validate_parent = tdram_validate_parent, + .td_flush = tdram_flush }; diff -r 3c4c3d48a835 tools/blktap/drivers/block-sync.c --- a/tools/blktap/drivers/block-sync.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/block-sync.c Wed Nov 03 22:43:16 2010 +0800 @@ -228,6 +228,16 @@ return -EINVAL; } +int tdsync_flush(struct disk_driver *dd) +{ + struct tdsync_state *prv = (struct tdsync_state *)dd->private; + int fd = prv->fd; + int ret; + + ret = fsync(fd); + return ret; +} + struct tap_disk tapdisk_sync = { .disk_type = "tapdisk_sync", .private_data_size = sizeof(struct tdsync_state), @@ -238,5 +248,6 @@ .td_close = tdsync_close, .td_do_callbacks = tdsync_do_callbacks, .td_get_parent_id = tdsync_get_parent_id, - .td_validate_parent = tdsync_validate_parent + .td_validate_parent = tdsync_validate_parent, + .td_flush = tdsync_flush }; diff -r 3c4c3d48a835 tools/blktap/drivers/block-vmdk.c --- a/tools/blktap/drivers/block-vmdk.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/block-vmdk.c Wed Nov 03 22:43:16 2010 +0800 @@ -414,6 +414,16 @@ return -EINVAL; } +int tdvmdk_flush(struct disk_driver *dd) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; + int fd = prv->fd; + int ret; + + ret = fsync(fd); + return ret; +} + struct tap_disk tapdisk_vmdk = { .disk_type = "tapdisk_vmdk", .private_data_size = sizeof(struct tdvmdk_state), @@ -424,5 +434,6 @@ .td_close = tdvmdk_close, .td_do_callbacks = tdvmdk_do_callbacks, .td_get_parent_id = tdvmdk_get_parent_id, - .td_validate_parent = tdvmdk_validate_parent + .td_validate_parent = tdvmdk_validate_parent, + .td_flush = tdvmdk_flush }; diff -r 3c4c3d48a835 tools/blktap/drivers/tapdisk.c --- a/tools/blktap/drivers/tapdisk.c Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/tapdisk.c Wed Nov 03 22:43:16 2010 +0800 @@ -652,6 +652,12 @@ if (req == NULL) continue; + if (req->operation == BLKIF_OP_WRITE_BARRIER){ + ret = drv->td_flush(dd); + if (ret!=0) + DPRINTF("Do td_flush failed.\n"); + } + idx = req->id; if (info->busy.req) { @@ -671,7 +677,7 @@ } if ((dd->flags & TD_RDONLY) && - (req->operation == BLKIF_OP_WRITE)) { + (req->operation != BLKIF_OP_READ)) { blkif->pending_list[idx].status = BLKIF_RSP_ERROR; goto send_response; } @@ -692,7 +698,7 @@ DPRINTF("Sector request failed:\n"); DPRINTF("%s request, idx [%d,%d] size [%llu], " "sector [%llu,%llu]\n", - (req->operation == BLKIF_OP_WRITE ? + (req->operation != BLKIF_OP_READ ? "WRITE" : "READ"), idx,i, (long long unsigned) @@ -707,6 +713,7 @@ switch (req->operation) { + case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_WRITE: ret = drv->td_queue_write(dd, sector_nr, nsects, page, @@ -747,6 +754,13 @@ if (blkif->pending_list[idx].secs_pending == 0) dd->early += send_responses(dd, 0, 0, 0, idx, (void *)(long)0); + + if (req->operation == BLKIF_OP_WRITE_BARRIER){ + ret = drv->td_flush(dd); + if (ret!=0) + DPRINTF("Do td_flush failed.\n"); + } + } out: diff -r 3c4c3d48a835 tools/blktap/drivers/tapdisk.h --- a/tools/blktap/drivers/tapdisk.h Thu Aug 26 11:16:56 2010 +0100 +++ b/tools/blktap/drivers/tapdisk.h Wed Nov 03 22:43:16 2010 +0800 @@ -137,6 +137,7 @@ int (*td_get_parent_id) (struct disk_driver *dd, struct disk_id *id); int (*td_validate_parent)(struct disk_driver *dd, struct disk_driver *p, td_flag_t flags); + int (*td_flush) (struct disk_driver *dd); }; typedef struct disk_info {