Hi,
This patch adds support for barriers to blk{back,front} drivers.
protocol changes:
* There is a new operation (BLKIF_OP_WRITE_BARRIER)
to pass on barrier requests.
* There is a new state (BLKIF_RSP_EOPNOTSUPP) to indicate
unsupported operations (barrier writes may fail depending
on the underlying block device).
* A new xenstore node named "feature-barrier" indicates the
backend is able to handle barrier writes. The value can
be 1 (all is fine) or 0 (underlying block device doesn't
support barriers).
blkback changes: Add "feature-barrier" node to indicate barrier
support, pass incoming barrier requests to the block layer using
submit_bio(WRITE_BARRIER, bio). Some error handling fixes to
properly pass through barrier write failures, so the frontend
can turn off barriers then.
blkfront changes: Check if the backend sets "feature-barrier", if
present switch to QUEUE_ORDERED_DRAIN mode. Send off barrier
requests to the backend using the new BLKIF_OP_WRITE_BARRIER
operation. Also some error handling for the EOPNOTSUPP case.
cheers,
Gerd
--
Gerd Hoffmann <kraxel@xxxxxxx>
Add support for barriers to blk{back,front} drivers.
protocol changes:
* There is a new operation (BLKIF_OP_WRITE_BARRIER)
to pass on barrier requests.
* There is a new state (BLKIF_RSP_EOPNOTSUPP) to indicate
unsupported operations (barrier writes may fail depending
on the underlying block device).
* A new xenstore node named "feature-barrier" indicates the
backend is able to handle barrier writes. The value can
be 1 (all is fine) or 0 (underlying block device doesn't
support barriers).
blkback changes: Add "feature-barrier" node to indicate barrier
support, pass incoming barrier requests to the block layer using
submit_bio(WRITE_BARRIER, bio). Some error handling fixes to
properly pass through barrier write failures, so the frontend
can turn off barriers then.
blkfront changes: Check if the backend sets "feature-barrier", if
present switch to QUEUE_ORDERED_DRAIN mode. Send off barrier
requests to the backend using the new BLKIF_OP_WRITE_BARRIER
operation. Also some error handling for the EOPNOTSUPP case.
Signed-off-by: Gerd Hoffmann <kraxel@xxxxxxx>
---
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 41 ++++++++++++++-----
linux-2.6-xen-sparse/drivers/xen/blkback/common.h | 3 +
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c | 2
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 31 ++++++++++++++
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 30 +++++++++++--
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h | 2
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 20 ++++++++-
xen/include/public/io/blkif.h | 10 ++--
8 files changed, 117 insertions(+), 22 deletions(-)
Index:
build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
===================================================================
---
build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
@@ -191,9 +191,9 @@ static void fast_flush_area(pending_req_
static void print_stats(blkif_t *blkif)
{
- printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req);
+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
blkif->st_wr_req = 0;
@@ -243,12 +243,17 @@ int blkif_schedule(void *arg)
* COMPLETION CALLBACK -- Called as bh->b_end_io()
*/
-static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+static void __end_block_io_op(pending_req_t *pending_req, int error)
{
/* An error fails the entire request. */
- if (!uptodate) {
- DPRINTK("Buffer not up-to-date at end of operation\n");
+ if (error) {
+ DPRINTK("Buffer not up-to-date at end of operation,
error=%d\n", error);
pending_req->status = BLKIF_RSP_ERROR;
+ if (pending_req->operation == BLKIF_OP_WRITE_BARRIER && error
== -EOPNOTSUPP) {
+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+ blkback_barrier(pending_req->blkif->be, 0);
+ printk("blkback: write barrier op failed, not
supported\n");
+ }
}
if (atomic_dec_and_test(&pending_req->pendcnt)) {
@@ -264,7 +269,7 @@ static int end_block_io_op(struct bio *b
{
if (bio->bi_size != 0)
return 1;
- __end_block_io_op(bio->bi_private, !error);
+ __end_block_io_op(bio->bi_private, error);
bio_put(bio);
return error;
}
@@ -321,6 +326,9 @@ static int do_block_io_op(blkif_t *blkif
blkif->st_rd_req++;
dispatch_rw_block_io(blkif, req, pending_req);
break;
+ case BLKIF_OP_WRITE_BARRIER:
+ blkif->st_br_req++;
+ /* fall through */
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
dispatch_rw_block_io(blkif, req, pending_req);
@@ -342,7 +350,6 @@ static void dispatch_rw_block_io(blkif_t
pending_req_t *pending_req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
- int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct phys_req preq;
struct {
@@ -351,6 +358,22 @@ static void dispatch_rw_block_io(blkif_t
unsigned int nseg;
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int ret, i, nbio = 0;
+ int operation;
+
+ switch (req->operation) {
+ case BLKIF_OP_READ:
+ operation = READ;
+ break;
+ case BLKIF_OP_WRITE:
+ operation = WRITE;
+ break;
+ case BLKIF_OP_WRITE_BARRIER:
+ operation = WRITE_BARRIER;
+ break;
+ default:
+ operation = 0; /* make gcc happy */
+ BUG();
+ }
/* Check that number of segments is sane. */
nseg = req->nr_segments;
@@ -366,7 +389,7 @@ static void dispatch_rw_block_io(blkif_t
pending_req->blkif = blkif;
pending_req->id = req->id;
- pending_req->operation = operation;
+ pending_req->operation = req->operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
@@ -382,7 +405,7 @@ static void dispatch_rw_block_io(blkif_t
preq.nr_sects += seg[i].nsec;
flags = GNTMAP_host_map;
- if ( operation == WRITE )
+ if ( operation != READ )
flags |= GNTMAP_readonly;
gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
req->seg[i].gref, blkif->domid);
Index: build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
===================================================================
---
build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
@@ -87,6 +87,7 @@ typedef struct blkif_st {
int st_rd_req;
int st_wr_req;
int st_oo_req;
+ int st_br_req;
wait_queue_head_t waiting_to_free;
@@ -131,4 +132,6 @@ void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
int blkif_schedule(void *arg);
+int blkback_barrier(struct backend_info *be, int state);
+
#endif /* __BLKIF__BACKEND__COMMON_H__ */
Index: build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
===================================================================
---
build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
@@ -91,11 +91,13 @@ static void update_blkif_status(blkif_t
VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
static struct attribute *vbdstat_attrs[] = {
&dev_attr_oo_req.attr,
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
+ &dev_attr_br_req.attr,
NULL
};
@@ -165,6 +167,31 @@ static int blkback_remove(struct xenbus_
return 0;
}
+int blkback_barrier(struct backend_info *be, int state)
+{
+ struct xenbus_device *dev = be->dev;
+ struct xenbus_transaction xbt;
+ int err;
+
+ do {
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ return -1;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+ "%d", state);
+ if (err) {
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
+ return -1;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ } while (err == -EAGAIN);
+ return 0;
+}
/**
* Entry point to this code when a new device is created. Allocate the basic
@@ -201,6 +228,10 @@ static int blkback_probe(struct xenbus_d
if (err)
goto fail;
+ err = blkback_barrier(be, 1);
+ if (err)
+ goto fail;
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
Index:
build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
===================================================================
---
build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -316,6 +316,12 @@ static void connect(struct blkfront_info
return;
}
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-barrier", "%lu", &info->feature_barrier,
+ NULL);
+ if (err)
+ info->feature_barrier = 0;
+
err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -542,11 +548,14 @@ static int blkif_queue_request(struct re
info->shadow[id].request = (unsigned long)req;
ring_req->id = id;
- ring_req->operation = rq_data_dir(req) ?
- BLKIF_OP_WRITE : BLKIF_OP_READ;
ring_req->sector_number = (blkif_sector_t)req->sector;
ring_req->handle = info->handle;
+ ring_req->operation = rq_data_dir(req) ?
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
+ if (blk_barrier_rq(req))
+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+
ring_req->nr_segments = 0;
rq_for_each_bio (bio, req) {
bio_for_each_segment (bvec, bio, idx) {
@@ -643,6 +652,7 @@ static irqreturn_t blkif_int(int irq, vo
RING_IDX i, rp;
unsigned long flags;
struct blkfront_info *info = (struct blkfront_info *)dev_id;
+ int uptodate;
spin_lock_irqsave(&blkif_io_lock, flags);
@@ -667,19 +677,27 @@ static irqreturn_t blkif_int(int irq, vo
ADD_ID_TO_FREELIST(info, id);
+ uptodate = (bret->status == BLKIF_RSP_OKAY);
switch (bret->operation) {
+ case BLKIF_OP_WRITE_BARRIER:
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+ printk("blkfront: %s: write barrier op
failed\n",
+ info->gd->disk_name);
+ uptodate = -EOPNOTSUPP;
+ info->feature_barrier = 0;
+ xlvbd_barrier(info);
+ }
+ /* fall through */
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
if (unlikely(bret->status != BLKIF_RSP_OKAY))
DPRINTK("Bad return from blkdev data "
"request: %x\n", bret->status);
- ret = end_that_request_first(
- req, (bret->status == BLKIF_RSP_OKAY),
+ ret = end_that_request_first(req, uptodate,
req->hard_nr_sectors);
BUG_ON(ret);
- end_that_request_last(
- req, (bret->status == BLKIF_RSP_OKAY));
+ end_that_request_last(req, uptodate);
break;
default:
BUG();
Index: build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
===================================================================
---
build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
@@ -126,6 +126,7 @@ struct blkfront_info
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free;
+ int feature_barrier;
/**
* The number of people holding this device open. We won't allow a
@@ -152,5 +153,6 @@ extern void do_blkif_request (request_qu
int xlvbd_add(blkif_sector_t capacity, int device,
u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
void xlvbd_del(struct blkfront_info *info);
+int xlvbd_barrier(struct blkfront_info *info);
#endif /* __XEN_DRIVERS_BLOCK_H__ */
Index: build-32-unstable-11407/xen/include/public/io/blkif.h
===================================================================
--- build-32-unstable-11407.orig/xen/include/public/io/blkif.h
+++ build-32-unstable-11407/xen/include/public/io/blkif.h
@@ -29,8 +29,9 @@
#endif
#define blkif_sector_t uint64_t
-#define BLKIF_OP_READ 0
-#define BLKIF_OP_WRITE 1
+#define BLKIF_OP_READ 0
+#define BLKIF_OP_WRITE 1
+#define BLKIF_OP_WRITE_BARRIER 2
/*
* Maximum scatter/gather segments per request.
@@ -61,8 +62,9 @@ struct blkif_response {
};
typedef struct blkif_response blkif_response_t;
-#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */
-#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
+#define BLKIF_RSP_EOPNOTSUPP -2 /* operation not supported (can happen on
barrier writes) */
+#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */
+#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
/*
* Generate blkif ring structures and types.
Index: build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
===================================================================
--- build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -257,6 +257,10 @@ xlvbd_alloc_gendisk(int minor, blkif_sec
}
info->rq = gd->queue;
+ info->gd = gd;
+
+ if (info->feature_barrier)
+ xlvbd_barrier(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -267,8 +271,6 @@ xlvbd_alloc_gendisk(int minor, blkif_sec
if (vdisk_info & VDISK_CDROM)
gd->flags |= GENHD_FL_CD;
- info->gd = gd;
-
return 0;
out:
@@ -316,3 +318,17 @@ xlvbd_del(struct blkfront_info *info)
blk_cleanup_queue(info->rq);
info->rq = NULL;
}
+
+int
+xlvbd_barrier(struct blkfront_info *info)
+{
+ int err;
+
+ err = blk_queue_ordered(info->rq,
+ info->feature_barrier ? QUEUE_ORDERED_DRAIN :
QUEUE_ORDERED_NONE, NULL);
+ if (err)
+ return err;
+ printk("blkfront: %s: barriers %s\n",
+ info->gd->disk_name, info->feature_barrier ? "enabled" :
"disabled");
+ return 0;
+}
Index: build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
===================================================================
--- build-32-unstable-11407.orig/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
+++ build-32-unstable-11407/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
@@ -104,7 +104,7 @@ int vbd_translate(struct phys_req *req,
struct vbd *vbd = &blkif->vbd;
int rc = -EACCES;
- if ((operation == WRITE) && vbd->readonly)
+ if ((operation != READ) && vbd->readonly)
goto out;
if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|