Xen project Mailing List

Re: [Xen-devel] blkback disk I/O limit patch

From: Vasiliy Tolstov <v.tolstov@xxxxxxxxx>

Date: Fri, 1 Feb 2013 10:53:46 +0400

Cc: "xen-devel@xxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxx>

Delivery-date: Fri, 01 Feb 2013 06:54:50 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Sorry, diff -NruabBEp xen_blkback_limit.orig/blkback.c xen_blkback_limit.new//blkback.c --- xen_blkback_limit.orig/blkback.c 2012-12-04 13:03:58.000000000 +0400 +++ xen_blkback_limit.new//blkback.c 2013-01-28 08:11:30.000000000 +0400 @@ -211,10 +211,18 @@ static void print_stats(blkif_t *blkif) blkif->st_pk_req = 0; } +static void refill_iops(blkif_t *blkif) +{ + blkif->reqtime = jiffies + msecs_to_jiffies(1000); + blkif->reqcount = 0; +} + int blkif_schedule(void *arg) { blkif_t *blkif = arg; struct vbd *vbd = &blkif->vbd; + int ret = 0; + struct timeval cur_time; blkif_get(blkif); @@ -237,10 +245,22 @@ int blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - if (do_block_io_op(blkif)) + ret = do_block_io_op(blkif); + if (ret) blkif->waiting_reqs = 1; unplug_queue(blkif); + if (blkif->reqrate) { + if (2 == ret && (blkif->reqtime > jiffies)) { + jiffies_to_timeval(jiffies, &cur_time); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(blkif->reqtime - jiffies); + } + if (time_after(jiffies, blkif->reqtime)) + refill_iops(blkif); + } + if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } @@ -394,10 +414,19 @@ static int _do_block_io_op(blkif_t *blki rp = blk_rings->common.sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ + if (blkif->reqrate && (blkif->reqcount >= blkif->reqrate)) { + return (rc != rp) ? 2 : 0; + } + while (rc != rp) { if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; + if (blkif->reqrate) { + if (blkif->reqcount >= blkif->reqrate) + return 2; + } + if (kthread_should_stop()) return 1; @@ -434,8 +463,8 @@ static int _do_block_io_op(blkif_t *blki /* Apply all sanity checks to /private copy/ of request. */ barrier(); - dispatch_rw_block_io(blkif, &req, pending_req); + blkif->reqcount++; break; case BLKIF_OP_DISCARD: blk_rings->common.req_cons = rc; @@ -452,7 +481,7 @@ static int _do_block_io_op(blkif_t *blki break; default: /* A good sign something is wrong: sleep for a while to - * avoid excessive CPU consumption by a bad guest. */ + * avoid excessive CPU consumption by a bad guest.*/ msleep(1); blk_rings->common.req_cons = rc; barrier(); @@ -501,6 +530,7 @@ static void dispatch_rw_block_io(blkif_t uint32_t flags; int ret, i; int operation; + struct timeval cur_time; switch (req->operation) { case BLKIF_OP_READ: @@ -658,6 +688,7 @@ static void dispatch_rw_block_io(blkif_t else blkif->st_wr_sect += preq.nr_sects; + jiffies_to_timeval(jiffies, &cur_time); return; fail_flush: diff -NruabBEp xen_blkback_limit.orig/common.h xen_blkback_limit.new//common.h --- xen_blkback_limit.orig/common.h 2012-12-04 13:03:58.000000000 +0400 +++ xen_blkback_limit.new//common.h 2013-01-28 08:09:35.000000000 +0400 @@ -82,6 +82,11 @@ typedef struct blkif_st { unsigned int waiting_reqs; struct request_queue *plug; + /* qos information */ + unsigned long reqtime; + int reqcount; + int reqrate; + /* statistics */ unsigned long st_print; int st_rd_req; @@ -106,6 +111,8 @@ struct backend_info unsigned major; unsigned minor; char *mode; + /* qos information */ + struct xenbus_watch reqrate_watch; }; blkif_t *blkif_alloc(domid_t domid); diff -NruabBEp xen_blkback_limit.orig/xenbus.c xen_blkback_limit.new//xenbus.c --- xen_blkback_limit.orig/xenbus.c 2012-12-04 13:03:58.000000000 +0400 +++ xen_blkback_limit.new//xenbus.c 2013-01-28 08:22:26.000000000 +0400 @@ -120,6 +120,79 @@ static void update_blkif_status(blkif_t } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) +static ssize_t +show_reqrate(struct device *_dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = -ENODEV; + struct xenbus_device *dev; + struct backend_info *be; + + if (!get_device(_dev)) + return ret; + + dev = to_xenbus_device(_dev); + be = dev_get_drvdata(&dev->dev); + + if (be != NULL) + ret = sprintf(buf, "%d\n", be->blkif->reqrate); + + put_device(_dev); + + return ret; +} + +static ssize_t +store_reqrate(struct device *_dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int value; + struct xenbus_device *dev; + struct backend_info *be; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!get_device(_dev)) + return -ENODEV; + + if (sscanf(buf, "%d", &value) != 1) + return -EINVAL; + + dev = to_xenbus_device(_dev); + be = dev_get_drvdata(&dev->dev); + + if (be != NULL) + be->blkif->reqrate = value; + + put_device(_dev); + + return size; +} +static DEVICE_ATTR(reqrate, S_IRUGO | S_IWUSR, show_reqrate, + store_reqrate); + +static ssize_t +show_reqcount(struct device *_dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = -ENODEV; + struct xenbus_device *dev; + struct backend_info *be; + + if (!get_device(_dev)) + return ret; + + dev = to_xenbus_device(_dev); + be = dev_get_drvdata(&dev->dev); + + if (be != NULL) + ret = sprintf(buf, "%d\n", be->blkif->reqcount); + + put_device(_dev); + + return ret; +} +static DEVICE_ATTR(reqcount, S_IRUGO | S_IWUSR, show_reqcount, NULL); + VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); @@ -146,6 +219,17 @@ static const struct attribute_group vbds .attrs = vbdstat_attrs, }; +static struct attribute *vbdreq_attrs[] = { + &dev_attr_reqrate.attr, + &dev_attr_reqcount.attr, + NULL +}; + +static const struct attribute_group vbdreq_group = { + .name = "qos", + .attrs = vbdreq_attrs, +}; + VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); VBD_SHOW(mode, "%s\n", be->mode); @@ -165,8 +249,13 @@ int xenvbd_sysfs_addif(struct xenbus_dev if (error) goto fail3; + error = sysfs_create_group(&dev->dev.kobj, &vbdreq_group); + if (error) + goto fail4; + return 0; +fail4: sysfs_remove_group(&dev->dev.kobj, &vbdreq_group); fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); fail2: device_remove_file(&dev->dev, &dev_attr_mode); fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); @@ -175,6 +264,7 @@ fail1: device_remove_file(&dev->dev, &de void xenvbd_sysfs_delif(struct xenbus_device *dev) { + sysfs_remove_group(&dev->dev.kobj, &vbdreq_group); sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); device_remove_file(&dev->dev, &dev_attr_mode); device_remove_file(&dev->dev, &dev_attr_physical_device); @@ -201,6 +291,12 @@ static int blkback_remove(struct xenbus_ be->cdrom_watch.node = NULL; } + if (be->reqrate_watch.node) { + unregister_xenbus_watch(&be->reqrate_watch); + kfree(be->reqrate_watch.node); + be->reqrate_watch.node = NULL; + } + if (be->blkif) { blkif_disconnect(be->blkif); vbd_free(&be->blkif->vbd); @@ -338,6 +434,7 @@ static void backend_changed(struct xenbu struct xenbus_device *dev = be->dev; int cdrom = 0; char *device_type; + char name[TASK_COMM_LEN]; DPRINTK(""); @@ -376,6 +473,21 @@ static void backend_changed(struct xenbu kfree(device_type); } + /* gather information about QoS policy for this device. */ + err = blkback_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blkback dev name"); + return; + } + + err = xenbus_gather(XBT_NIL, dev->otherend, + "reqrate", "%d", &be->blkif->reqrate, + NULL); + if (err) + DPRINTK("%s xenbus_gather(reqrate) error", name); + + be->blkif->reqtime = jiffies; + if (be->major == 0 && be->minor == 0) { /* Front end dir is a number, which is used as the handle. */ @@ -482,6 +594,30 @@ static void frontend_changed(struct xenb /* ** Connection ** */ +static void reqrate_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct backend_info *be = container_of(watch, struct backend_info, + reqrate_watch); + int err; + char name[TASK_COMM_LEN]; + + err = blkback_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blkback dev name"); + return; + } + + err = xenbus_gather(XBT_NIL, be->dev->otherend, + "reqrate", "%d", + &be->blkif->reqrate, NULL); + if (err) { + DPRINTK("%s xenbus_gather(reqrate) error", name); + } else { + if (be->blkif->reqrate <= 0) + be->blkif->reqrate = 0; + } +} /** * Write the physical details regarding the block device to the store, and @@ -542,6 +678,21 @@ again: xenbus_dev_fatal(dev, err, "%s: switching to Connected state", dev->nodename); + if (be->reqrate_watch.node) { + unregister_xenbus_watch(&be->reqrate_watch); + kfree(be->reqrate_watch.node); + be->reqrate_watch.node = NULL; + } + + err = xenbus_watch_path2(dev, dev->otherend, "reqrate", + &be->reqrate_watch, + reqrate_changed); + if (err) { + xenbus_dev_fatal(dev, err, "%s: watching reqrate", + dev->nodename); + goto abort; + } + return; abort: xenbus_transaction_end(xbt, 1); 2013/1/31 Wei Liu <wei.liu2@xxxxxxxxxx>: > On Thu, 2013-01-31 at 05:14 +0000, Vasiliy Tolstov wrote: >> Sorry forget to send patch >> https://bitbucket.org/go2clouds/patches/raw/master/xen_blkback_limit/3.6.9-1.patch >> Patch for kernel 3.6.9, but if that needed i can rebase it to current >> git Linus tree. > > Can you inline your patch in your email so that developer can comment on > it. > > > Wei. > -- Vasiliy Tolstov, Clodo.ru e-mail: v.tolstov@xxxxxxxxx jabber: vase@xxxxxxxxx _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.