Xen project Mailing List

Re: [Xen-devel] blkback disk I/O limit patch

To: Vasiliy Tolstov <v.tolstov@xxxxxxxxx>

From: Konrad Rzeszutek Wilk <konrad@xxxxxxxxxx>

Date: Fri, 1 Feb 2013 09:42:56 -0500

Cc: Wei Liu <wei.liu2@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxx>

Delivery-date: Fri, 01 Feb 2013 14:43:25 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Fri, Feb 01, 2013 at 10:53:46AM +0400, Vasiliy Tolstov wrote: > Sorry, Ugh, you didn't inline it - you just copied and pasted it. Also you are missing an SoB and a description of what this patch does and why is it better than existing device mapper I/O limiting work? > > diff -NruabBEp xen_blkback_limit.orig/blkback.c > xen_blkback_limit.new//blkback.c > --- xen_blkback_limit.orig/blkback.c 2012-12-04 13:03:58.000000000 +0400 > +++ xen_blkback_limit.new//blkback.c 2013-01-28 08:11:30.000000000 +0400 > @@ -211,10 +211,18 @@ static void print_stats(blkif_t *blkif) > blkif->st_pk_req = 0; > } > > +static void refill_iops(blkif_t *blkif) > +{ > + blkif->reqtime = jiffies + msecs_to_jiffies(1000); > + blkif->reqcount = 0; > +} > + > int blkif_schedule(void *arg) > { > blkif_t *blkif = arg; > struct vbd *vbd = &blkif->vbd; > + int ret = 0; > + struct timeval cur_time; > > blkif_get(blkif); > > @@ -237,10 +245,22 @@ int blkif_schedule(void *arg) > blkif->waiting_reqs = 0; > smp_mb(); /* clear flag *before* checking for work */ > > - if (do_block_io_op(blkif)) > + ret = do_block_io_op(blkif); > + if (ret) > blkif->waiting_reqs = 1; > unplug_queue(blkif); > > + if (blkif->reqrate) { > + if (2 == ret && (blkif->reqtime > jiffies)) { > + jiffies_to_timeval(jiffies, &cur_time); > + > + set_current_state(TASK_INTERRUPTIBLE); > + schedule_timeout(blkif->reqtime - jiffies); > + } > + if (time_after(jiffies, blkif->reqtime)) > + refill_iops(blkif); > + } > + > if (log_stats && time_after(jiffies, blkif->st_print)) > print_stats(blkif); > } > @@ -394,10 +414,19 @@ static int _do_block_io_op(blkif_t *blki > rp = blk_rings->common.sring->req_prod; > rmb(); /* Ensure we see queued requests up to 'rp'. */ > > + if (blkif->reqrate && (blkif->reqcount >= blkif->reqrate)) { > + return (rc != rp) ? 2 : 0; > + } > + > while (rc != rp) { > if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) > break; > > + if (blkif->reqrate) { > + if (blkif->reqcount >= blkif->reqrate) > + return 2; > + } > + > if (kthread_should_stop()) > return 1; > > @@ -434,8 +463,8 @@ static int _do_block_io_op(blkif_t *blki > > /* Apply all sanity checks to /private copy/ of request. */ > barrier(); > - > dispatch_rw_block_io(blkif, &req, pending_req); > + blkif->reqcount++; > break; > case BLKIF_OP_DISCARD: > blk_rings->common.req_cons = rc; > @@ -452,7 +481,7 @@ static int _do_block_io_op(blkif_t *blki > break; > default: > /* A good sign something is wrong: sleep for a while to > - * avoid excessive CPU consumption by a bad guest. */ > + * avoid excessive CPU consumption by a bad guest.*/ > msleep(1); > blk_rings->common.req_cons = rc; > barrier(); > @@ -501,6 +530,7 @@ static void dispatch_rw_block_io(blkif_t > uint32_t flags; > int ret, i; > int operation; > + struct timeval cur_time; > > switch (req->operation) { > case BLKIF_OP_READ: > @@ -658,6 +688,7 @@ static void dispatch_rw_block_io(blkif_t > else > blkif->st_wr_sect += preq.nr_sects; > > + jiffies_to_timeval(jiffies, &cur_time); > return; > > fail_flush: > diff -NruabBEp xen_blkback_limit.orig/common.h xen_blkback_limit.new//common.h > --- xen_blkback_limit.orig/common.h 2012-12-04 13:03:58.000000000 +0400 > +++ xen_blkback_limit.new//common.h 2013-01-28 08:09:35.000000000 +0400 > @@ -82,6 +82,11 @@ typedef struct blkif_st { > unsigned int waiting_reqs; > struct request_queue *plug; > > + /* qos information */ > + unsigned long reqtime; > + int reqcount; > + int reqrate; > + > /* statistics */ > unsigned long st_print; > int st_rd_req; > @@ -106,6 +111,8 @@ struct backend_info > unsigned major; > unsigned minor; > char *mode; > + /* qos information */ > + struct xenbus_watch reqrate_watch; > }; > > blkif_t *blkif_alloc(domid_t domid); > diff -NruabBEp xen_blkback_limit.orig/xenbus.c xen_blkback_limit.new//xenbus.c > --- xen_blkback_limit.orig/xenbus.c 2012-12-04 13:03:58.000000000 +0400 > +++ xen_blkback_limit.new//xenbus.c 2013-01-28 08:22:26.000000000 +0400 > @@ -120,6 +120,79 @@ static void update_blkif_status(blkif_t > } \ > static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) > > +static ssize_t > +show_reqrate(struct device *_dev, struct device_attribute *attr, char *buf) > +{ > + ssize_t ret = -ENODEV; > + struct xenbus_device *dev; > + struct backend_info *be; > + > + if (!get_device(_dev)) > + return ret; > + > + dev = to_xenbus_device(_dev); > + be = dev_get_drvdata(&dev->dev); > + > + if (be != NULL) > + ret = sprintf(buf, "%d\n", be->blkif->reqrate); > + > + put_device(_dev); > + > + return ret; > +} > + > +static ssize_t > +store_reqrate(struct device *_dev, struct device_attribute *attr, > + const char *buf, size_t size) > +{ > + int value; > + struct xenbus_device *dev; > + struct backend_info *be; > + > + if (!capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > + if (!get_device(_dev)) > + return -ENODEV; > + > + if (sscanf(buf, "%d", &value) != 1) > + return -EINVAL; > + > + dev = to_xenbus_device(_dev); > + be = dev_get_drvdata(&dev->dev); > + > + if (be != NULL) > + be->blkif->reqrate = value; > + > + put_device(_dev); > + > + return size; > +} > +static DEVICE_ATTR(reqrate, S_IRUGO | S_IWUSR, show_reqrate, > + store_reqrate); > + > +static ssize_t > +show_reqcount(struct device *_dev, struct device_attribute *attr, char *buf) > +{ > + ssize_t ret = -ENODEV; > + struct xenbus_device *dev; > + struct backend_info *be; > + > + if (!get_device(_dev)) > + return ret; > + > + dev = to_xenbus_device(_dev); > + be = dev_get_drvdata(&dev->dev); > + > + if (be != NULL) > + ret = sprintf(buf, "%d\n", be->blkif->reqcount); > + > + put_device(_dev); > + > + return ret; > +} > +static DEVICE_ATTR(reqcount, S_IRUGO | S_IWUSR, show_reqcount, NULL); > + > VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); > VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); > VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); > @@ -146,6 +219,17 @@ static const struct attribute_group vbds > .attrs = vbdstat_attrs, > }; > > +static struct attribute *vbdreq_attrs[] = { > + &dev_attr_reqrate.attr, > + &dev_attr_reqcount.attr, > + NULL > +}; > + > +static const struct attribute_group vbdreq_group = { > + .name = "qos", > + .attrs = vbdreq_attrs, > +}; > + > VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); > VBD_SHOW(mode, "%s\n", be->mode); > > @@ -165,8 +249,13 @@ int xenvbd_sysfs_addif(struct xenbus_dev > if (error) > goto fail3; > > + error = sysfs_create_group(&dev->dev.kobj, &vbdreq_group); > + if (error) > + goto fail4; > + > return 0; > > +fail4: sysfs_remove_group(&dev->dev.kobj, &vbdreq_group); > fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); > fail2: device_remove_file(&dev->dev, &dev_attr_mode); > fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); > @@ -175,6 +264,7 @@ fail1: device_remove_file(&dev->dev, &de > > void xenvbd_sysfs_delif(struct xenbus_device *dev) > { > + sysfs_remove_group(&dev->dev.kobj, &vbdreq_group); > sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); > device_remove_file(&dev->dev, &dev_attr_mode); > device_remove_file(&dev->dev, &dev_attr_physical_device); > @@ -201,6 +291,12 @@ static int blkback_remove(struct xenbus_ > be->cdrom_watch.node = NULL; > } > > + if (be->reqrate_watch.node) { > + unregister_xenbus_watch(&be->reqrate_watch); > + kfree(be->reqrate_watch.node); > + be->reqrate_watch.node = NULL; > + } > + > if (be->blkif) { > blkif_disconnect(be->blkif); > vbd_free(&be->blkif->vbd); > @@ -338,6 +434,7 @@ static void backend_changed(struct xenbu > struct xenbus_device *dev = be->dev; > int cdrom = 0; > char *device_type; > + char name[TASK_COMM_LEN]; > > DPRINTK(""); > > @@ -376,6 +473,21 @@ static void backend_changed(struct xenbu > kfree(device_type); > } > > + /* gather information about QoS policy for this device. */ > + err = blkback_name(be->blkif, name); > + if (err) { > + xenbus_dev_error(be->dev, err, "get blkback dev name"); > + return; > + } > + > + err = xenbus_gather(XBT_NIL, dev->otherend, > + "reqrate", "%d", &be->blkif->reqrate, > + NULL); > + if (err) > + DPRINTK("%s xenbus_gather(reqrate) error", name); > + > + be->blkif->reqtime = jiffies; > + > if (be->major == 0 && be->minor == 0) { > /* Front end dir is a number, which is used as the handle. */ > > @@ -482,6 +594,30 @@ static void frontend_changed(struct xenb > > /* ** Connection ** */ > > +static void reqrate_changed(struct xenbus_watch *watch, > + const char **vec, unsigned int len) > +{ > + struct backend_info *be = container_of(watch, struct backend_info, > + reqrate_watch); > + int err; > + char name[TASK_COMM_LEN]; > + > + err = blkback_name(be->blkif, name); > + if (err) { > + xenbus_dev_error(be->dev, err, "get blkback dev name"); > + return; > + } > + > + err = xenbus_gather(XBT_NIL, be->dev->otherend, > + "reqrate", "%d", > + &be->blkif->reqrate, NULL); > + if (err) { > + DPRINTK("%s xenbus_gather(reqrate) error", name); > + } else { > + if (be->blkif->reqrate <= 0) > + be->blkif->reqrate = 0; > + } > +} > > /** > * Write the physical details regarding the block device to the store, and > @@ -542,6 +678,21 @@ again: > xenbus_dev_fatal(dev, err, "%s: switching to Connected state", > dev->nodename); > > + if (be->reqrate_watch.node) { > + unregister_xenbus_watch(&be->reqrate_watch); > + kfree(be->reqrate_watch.node); > + be->reqrate_watch.node = NULL; > + } > + > + err = xenbus_watch_path2(dev, dev->otherend, "reqrate", > + &be->reqrate_watch, > + reqrate_changed); > + if (err) { > + xenbus_dev_fatal(dev, err, "%s: watching reqrate", > + dev->nodename); > + goto abort; > + } > + > return; > abort: > xenbus_transaction_end(xbt, 1); > > 2013/1/31 Wei Liu <wei.liu2@xxxxxxxxxx>: > > On Thu, 2013-01-31 at 05:14 +0000, Vasiliy Tolstov wrote: > >> Sorry forget to send patch > >> https://bitbucket.org/go2clouds/patches/raw/master/xen_blkback_limit/3.6.9-1.patch > >> Patch for kernel 3.6.9, but if that needed i can rebase it to current > >> git Linus tree. > > > > Can you inline your patch in your email so that developer can comment on > > it. > > > > > > Wei. > > > > > > -- > Vasiliy Tolstov, > Clodo.ru > e-mail: v.tolstov@xxxxxxxxx > jabber: vase@xxxxxxxxx > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.