# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1202724537 0
# Node ID 7128fe32720e088f61424480ef3c465edb3a8a37
# Parent bda2d96f9e2817b2d5ae3c95feeba262eb8b5d14
block: backport Jens Axboe's commit from
Tue, 16 Oct 2007 09:03:56 +0000 (11:03 +0200)
bf2de6f5a4faf0197268f18d08969b003b87b6e8
Initial support for data-less (or empty) barrier support
blkback: permit and implement empty barrier.
Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
---
block/elevator.c | 10 +++++++
block/ll_rw_blk.c | 54 ++++++++++++++++++++++++++----------------
drivers/xen/blkback/blkback.c | 14 ++++++++++
fs/bio.c | 3 +-
include/linux/bio.h | 19 +++++++++++++-
include/linux/blkdev.h | 2 +
mm/highmem.c | 6 ++++
7 files changed, 84 insertions(+), 24 deletions(-)
diff -r bda2d96f9e28 -r 7128fe32720e block/elevator.c
--- a/block/elevator.c Mon Feb 11 10:05:29 2008 +0000
+++ b/block/elevator.c Mon Feb 11 10:08:57 2008 +0000
@@ -493,6 +493,16 @@ struct request *elv_next_request(request
int ret;
while ((rq = __elv_next_request(q)) != NULL) {
+ /*
+ * Kill the empty barrier place holder, the driver must
+ * not ever see it.
+ */
+ if (blk_empty_barrier(rq)) {
+ blkdev_dequeue_request(rq);
+ end_that_request_chunk(rq, 1, 0);
+ end_that_request_last(rq, 1);
+ continue;
+ }
if (!(rq->flags & REQ_STARTED)) {
elevator_t *e = q->elevator;
diff -r bda2d96f9e28 -r 7128fe32720e block/ll_rw_blk.c
--- a/block/ll_rw_blk.c Mon Feb 11 10:05:29 2008 +0000
+++ b/block/ll_rw_blk.c Mon Feb 11 10:08:57 2008 +0000
@@ -483,9 +483,12 @@ static inline struct request *start_orde
* Queue ordered sequence. As we stack them at the head, we
* need to queue in reverse order. Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
- * request gets inbetween ordered sequence.
- */
- if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+ * request gets inbetween ordered sequence. If this request is
+ * an empty barrier, we don't need to do a postflush ever since
+ * there will be no data written between the pre and post flush.
+ * Hence a single flush will suffice.
+ */
+ if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
else
q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -2967,7 +2970,7 @@ static inline void blk_partition_remap(s
{
struct block_device *bdev = bio->bi_bdev;
- if (bdev != bdev->bd_contains) {
+ if (bio_sectors(bio) && bdev != bdev->bd_contains) {
struct hd_struct *p = bdev->bd_part;
const int rw = bio_data_dir(bio);
@@ -3028,7 +3031,7 @@ void generic_make_request(struct bio *bi
might_sleep();
/* Test device or partition size, when known. */
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector) {
+ if (maxsector && nr_sectors) {
sector_t sector = bio->bi_sector;
if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3094,7 +3097,7 @@ end_io:
old_dev = bio->bi_bdev->bd_dev;
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector) {
+ if (maxsector && nr_sectors) {
sector_t sector = bio->bi_sector;
if (maxsector < nr_sectors || maxsector - nr_sectors <
sector) {
@@ -3128,21 +3131,25 @@ void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);
- BIO_BUG_ON(!bio->bi_size);
- BIO_BUG_ON(!bio->bi_io_vec);
bio->bi_rw |= rw;
- if (rw & WRITE)
- count_vm_events(PGPGOUT, count);
- else
- count_vm_events(PGPGIN, count);
-
- if (unlikely(block_dump)) {
- char b[BDEVNAME_SIZE];
- printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
- current->comm, current->pid,
- (rw & WRITE) ? "WRITE" : "READ",
- (unsigned long long)bio->bi_sector,
- bdevname(bio->bi_bdev,b));
+
+ if (!bio_empty_barrier(bio)) {
+ BIO_BUG_ON(!bio->bi_size);
+ BIO_BUG_ON(!bio->bi_io_vec);
+
+ if (rw & WRITE)
+ count_vm_events(PGPGOUT, count);
+ else
+ count_vm_events(PGPGIN, count);
+
+ if (unlikely(block_dump)) {
+ char b[BDEVNAME_SIZE];
+ printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+ current->comm, current->pid,
+ (rw & WRITE) ? "WRITE" : "READ",
+ (unsigned long long)bio->bi_sector,
+ bdevname(bio->bi_bdev,b));
+ }
}
generic_make_request(bio);
@@ -3259,6 +3266,13 @@ static int __end_that_request_first(stru
total_bytes = bio_nbytes = 0;
while ((bio = req->bio) != NULL) {
int nbytes;
+
+ /* For an empty barrier request, the low level driver must
+ * store a potential error location in ->sector. We pass
+ * that back up in ->bi_sector
+ */
+ if (blk_empty_barrier(req))
+ bio->bi_sector = req->sector;
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
diff -r bda2d96f9e28 -r 7128fe32720e drivers/xen/blkback/blkback.c
--- a/drivers/xen/blkback/blkback.c Mon Feb 11 10:05:29 2008 +0000
+++ b/drivers/xen/blkback/blkback.c Mon Feb 11 10:08:57 2008 +0000
@@ -407,7 +407,7 @@ static void dispatch_rw_block_io(blkif_t
/* Check that number of segments is sane. */
nseg = req->nr_segments;
- if (unlikely(nseg == 0) ||
+ if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
DPRINTK("Bad number of segments in request (%d)\n", nseg);
goto fail_response;
@@ -498,6 +498,18 @@ static void dispatch_rw_block_io(blkif_t
}
preq.sector_number += seg[i].nsec;
+ }
+
+ if (!bio) {
+ BUG_ON(operation != WRITE_BARRIER);
+ bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0);
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
+
+ bio->bi_bdev = preq.bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
+ bio->bi_sector = -1;
}
plug_queue(blkif, bio);
diff -r bda2d96f9e28 -r 7128fe32720e fs/bio.c
--- a/fs/bio.c Mon Feb 11 10:05:29 2008 +0000
+++ b/fs/bio.c Mon Feb 11 10:08:57 2008 +0000
@@ -112,7 +112,8 @@ void bio_free(struct bio *bio, struct bi
BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+ if (bio->bi_io_vec)
+ mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
mempool_free(bio, bio_set->bio_pool);
}
diff -r bda2d96f9e28 -r 7128fe32720e include/linux/bio.h
--- a/include/linux/bio.h Mon Feb 11 10:05:29 2008 +0000
+++ b/include/linux/bio.h Mon Feb 11 10:08:57 2008 +0000
@@ -172,12 +172,27 @@ struct bio {
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio) (page_address(bio_page((bio))) +
bio_offset((bio)))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+ if (bio->bi_vcnt)
+ return bio_iovec(bio)->bv_len >> 9;
+
+ return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+ if (bio->bi_vcnt)
+ return page_address(bio_page(bio)) + bio_offset(bio);
+
+ return NULL;
+}
/*
* will die
diff -r bda2d96f9e28 -r 7128fe32720e include/linux/blkdev.h
--- a/include/linux/blkdev.h Mon Feb 11 10:05:29 2008 +0000
+++ b/include/linux/blkdev.h Mon Feb 11 10:08:57 2008 +0000
@@ -505,6 +505,8 @@ enum {
#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED)
#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA)
+
+#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) &&
!(rq)->hard_nr_sectors)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
diff -r bda2d96f9e28 -r 7128fe32720e mm/highmem.c
--- a/mm/highmem.c Mon Feb 11 10:05:29 2008 +0000
+++ b/mm/highmem.c Mon Feb 11 10:08:57 2008 +0000
@@ -466,6 +466,12 @@ void blk_queue_bounce(request_queue_t *q
void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
{
mempool_t *pool;
+
+ /*
+ * Data-less bio, nothing to bounce
+ */
+ if (bio_empty_barrier(*bio_orig))
+ return;
/*
* for non-isa bounce case, just check if the bounce pfn is equal
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|