On Tue, Jun 21, 2011 at 04:29:35PM +0800, Andrew Xu wrote:
> Hi all,
>
> I add a blkback QoS patch.
What tree is this against? There is a xen-blkback in 3.0-rc4, can you rebase
it against that please.
What is the patch solving? Why can't it be done with dm-ioband?
> You can config(dynamic/static) different I/O speed for different VM disk
> by this patch.
>
> ----------------------------------------------------------------------------
>
> diff -urNp blkback/blkback.c blkback-qos/blkback.c
> --- blkback/blkback.c 2011-06-22 07:54:19.000000000 +0800
> +++ blkback-qos/blkback.c 2011-06-22 07:53:18.000000000 +0800
> @@ -44,6 +44,11 @@
> #include <asm/hypervisor.h>
> #include "common.h"
>
> +#undef DPRINTK
> +#define DPRINTK(fmt, args...) \
> + printk("blkback/blkback (%s:%d) " fmt ".\n", \
> + __FUNCTION__, __LINE__, ##args)
> +
> /*
> * These are rather arbitrary. They are fairly large because adjacent
> requests
> * pulled from a communication ring are quite likely to end up being part of
> @@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
> static int do_block_io_op(blkif_t *blkif);
> static int dispatch_rw_block_io(blkif_t *blkif,
> blkif_request_t *req,
> - pending_req_t *pending_req);
> + pending_req_t *pending_req,
> + int *done_nr_sects);
> static void make_response(blkif_t *blkif, u64 id,
> unsigned short op, int st);
>
> @@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
> blkif->st_pk_req = 0;
> }
>
> +static void refill_reqcount(blkif_t *blkif)
> +{
> + blkif->reqtime = jiffies + msecs_to_jiffies(1000);
> + blkif->reqcount = blkif->reqrate;
> + if (blkif->reqcount < blkif->reqmin)
> + blkif->reqcount = blkif->reqmin;
> +}
> +
> int blkif_schedule(void *arg)
> {
> blkif_t *blkif = arg;
> struct vbd *vbd = &blkif->vbd;
> + int ret = 0;
> + struct timeval cur_time;
>
> blkif_get(blkif);
>
> @@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
> blkif->waiting_reqs = 0;
> smp_mb(); /* clear flag *before* checking for work */
>
> - if (do_block_io_op(blkif))
> + ret = do_block_io_op(blkif);
> + if (ret)
> blkif->waiting_reqs = 1;
> unplug_queue(blkif);
>
> + if(blkif->reqmin){
> + if(2 == ret && (blkif->reqtime > jiffies)){
> + jiffies_to_timeval(jiffies, &cur_time);
> + if(log_stats && (cur_time.tv_sec % 10 ==1 ))
> + printk(KERN_DEBUG "%s: going to sleep
> %d millsecs(rate=%d)\n",
> + current->comm,
> +
> jiffies_to_msecs(blkif->reqtime - jiffies),
> + blkif->reqrate);
> +
> + set_current_state(TASK_INTERRUPTIBLE);
> + schedule_timeout(blkif->reqtime - jiffies);
> +
> + if(log_stats && (cur_time.tv_sec % 10 ==1 ))
> + printk(KERN_DEBUG "%s: sleep
> end(rate=%d)\n",
> +
> current->comm,blkif->reqrate);
> + }
> + if (time_after(jiffies, blkif->reqtime))
> + refill_reqcount(blkif);
> + }
> +
> if (log_stats && time_after(jiffies, blkif->st_print))
> print_stats(blkif);
> +
> }
>
> if (log_stats)
> @@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
> /******************************************************************
> * DOWNWARD CALLS -- These interface with the block-device layer proper.
> */
> -
> static int do_block_io_op(blkif_t *blkif)
> {
> blkif_back_rings_t *blk_rings = &blkif->blk_rings;
> @@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
> pending_req_t *pending_req;
> RING_IDX rc, rp;
> int more_to_do = 0, ret;
> + static int last_done_nr_sects = 0;
>
> rc = blk_rings->common.req_cons;
> rp = blk_rings->common.sring->req_prod;
> rmb(); /* Ensure we see queued requests up to 'rp'. */
> +
> + if (blkif->reqmin && blkif->reqcount <= 0)
> + return (rc != rp) ? 2 : 0;
>
> while ((rc != rp) || (blkif->is_suspended_req)) {
>
> if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
> break;
> +
> + if(blkif->reqmin){
> + blkif->reqcount -= last_done_nr_sects;
> + if (blkif->reqcount <= 0) {
> + more_to_do = 2;
> + break;
> + }
> + }
>
> if (kthread_should_stop()) {
> more_to_do = 1;
> @@ -367,14 +416,14 @@ handle_request:
> switch (req.operation) {
> case BLKIF_OP_READ:
> blkif->st_rd_req++;
> - ret = dispatch_rw_block_io(blkif, &req, pending_req);
> + ret = dispatch_rw_block_io(blkif, &req,
> pending_req,&last_done_nr_sects);
> break;
> case BLKIF_OP_WRITE_BARRIER:
> blkif->st_br_req++;
> /* fall through */
> case BLKIF_OP_WRITE:
> blkif->st_wr_req++;
> - ret = dispatch_rw_block_io(blkif, &req, pending_req);
> + ret = dispatch_rw_block_io(blkif, &req,
> pending_req,&last_done_nr_sects);
> break;
> case BLKIF_OP_PACKET:
> DPRINTK("error: block operation BLKIF_OP_PACKET not
> implemented\n");
> @@ -412,9 +461,29 @@ handle_request:
> return more_to_do;
> }
>
> +static char* operation2str(int operation)
> +{
> + char* ret_str = NULL;
> + switch (operation) {
> + case BLKIF_OP_READ:
> + ret_str = "READ";
> + break;
> + case BLKIF_OP_WRITE:
> + ret_str = "WRITE";
> + break;
> + case BLKIF_OP_WRITE_BARRIER:
> + ret_str = "WRITE_BARRIER";
> + break;
> + default:
> + ret_str = "0";
> + }
> + return ret_str;
> +}
> +
> static int dispatch_rw_block_io(blkif_t *blkif,
> blkif_request_t *req,
> - pending_req_t *pending_req)
> + pending_req_t *pending_req,
> + int *done_nr_sects)
> {
> extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
> struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> @@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
> struct bio *bio = NULL;
> int ret, i;
> int operation;
> + struct timeval cur_time;
> +
> + *done_nr_sects = 0;
>
> switch (req->operation) {
> case BLKIF_OP_READ:
> @@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
> else if (operation == WRITE || operation == WRITE_BARRIER)
> blkif->st_wr_sect += preq.nr_sects;
>
> + *done_nr_sects = preq.nr_sects;
> + jiffies_to_timeval(jiffies, &cur_time);
> + if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
> + printk(KERN_DEBUG " operation=%s sects=%d\n",
> + operation2str(req->operation),preq.nr_sects);
> +
> return 0;
>
> fail_flush:
> @@ -695,6 +773,8 @@ static int __init blkif_init(void)
>
> blkif_xenbus_init();
>
> + DPRINTK("blkif_inited\n");
> +
> return 0;
>
> out_of_memory:
> diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
> --- blkback/cdrom.c 2010-05-20 18:07:00.000000000 +0800
> +++ blkback-qos/cdrom.c 2011-06-22 07:34:50.000000000 +0800
> @@ -35,9 +35,9 @@
> #include "common.h"
>
> #undef DPRINTK
> -#define DPRINTK(_f, _a...) \
> - printk("(%s() file=%s, line=%d) " _f "\n", \
> - __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
> +#define DPRINTK(fmt, args...) \
> + printk("blkback/cdrom (%s:%d) " fmt ".\n", \
> + __FUNCTION__, __LINE__, ##args)
>
>
> #define MEDIA_PRESENT "media-present"
> diff -urNp blkback/common.h blkback-qos/common.h
> --- blkback/common.h 2010-05-20 18:07:00.000000000 +0800
> +++ blkback-qos/common.h 2011-06-22 07:34:50.000000000 +0800
> @@ -100,8 +100,17 @@ typedef struct blkif_st {
>
> grant_handle_t shmem_handle;
> grant_ref_t shmem_ref;
> +
> + /* qos information */
> + unsigned long reqtime;
> + int reqcount;
> + int reqmin;
> + int reqrate;
> +
> } blkif_t;
>
> +#define VBD_QOS_MIN_RATE_LIMIT 2*1024 /*
> 1MBs */
> +
> struct backend_info
> {
> struct xenbus_device *dev;
> @@ -111,6 +120,8 @@ struct backend_info
> unsigned major;
> unsigned minor;
> char *mode;
> + struct xenbus_watch rate_watch;
> + int have_rate_watch;
> };
>
> blkif_t *blkif_alloc(domid_t domid);
> diff -urNp blkback/vbd.c blkback-qos/vbd.c
> --- blkback/vbd.c 2010-05-20 18:07:00.000000000 +0800
> +++ blkback-qos/vbd.c 2011-06-22 07:34:50.000000000 +0800
> @@ -35,6 +35,11 @@
> #define vbd_sz(_v) ((_v)->bdev->bd_part ? \
> (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
>
> +#undef DPRINTK
> +#define DPRINTK(fmt, args...) \
> + printk("blkback/vbd (%s:%d) " fmt ".\n", \
> + __FUNCTION__, __LINE__, ##args)
> +
> unsigned long long vbd_size(struct vbd *vbd)
> {
> return vbd_sz(vbd);
> @@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
> if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
> vbd->type |= VDISK_REMOVABLE;
>
> - DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
> + DPRINTK("Successful creation of handle=%04x (dom=%u)",
> handle, blkif->domid);
> return 0;
> }
> diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
> --- blkback/xenbus.c 2010-05-20 18:07:00.000000000 +0800
> +++ blkback-qos/xenbus.c 2011-06-22 07:34:50.000000000 +0800
> @@ -25,13 +25,14 @@
>
> #undef DPRINTK
> #define DPRINTK(fmt, args...) \
> - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
> + printk("blkback/xenbus (%s:%d) " fmt ".\n", \
> __FUNCTION__, __LINE__, ##args)
>
> static void connect(struct backend_info *);
> static int connect_ring(struct backend_info *);
> static void backend_changed(struct xenbus_watch *, const char **,
> unsigned int);
> +static void unregister_rate_watch(struct backend_info *be);
>
> static int blkback_name(blkif_t *blkif, char *buf)
> {
> @@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
> char name[TASK_COMM_LEN];
>
> /* Not ready to connect? */
> - if (!blkif->irq || !blkif->vbd.bdev)
> + if (!blkif->irq || !blkif->vbd.bdev){
> + DPRINTK("Not ready to connect");
> return;
> + }
>
> /* Already connected? */
> if (blkif->be->dev->state == XenbusStateConnected)
> @@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
> be->cdrom_watch.node = NULL;
> }
>
> + unregister_rate_watch(be);
> +
> if (be->blkif) {
> blkif_disconnect(be->blkif);
> vbd_free(&be->blkif->vbd);
> @@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
>
> err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
> &be->backend_watch, backend_changed);
> +
> + DPRINTK("blkback_probe called");
> + DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
> +
> if (err)
> goto fail;
>
> @@ -266,7 +275,6 @@ fail:
> return err;
> }
>
> -
> /**
> * Callback received when the hotplug scripts have placed the physical-device
> * node. Read it and the mode node, and create a vbd. If the frontend is
> @@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
> struct xenbus_device *dev = be->dev;
> int cdrom = 0;
> char *device_type;
> + char name[TASK_COMM_LEN];
>
> - DPRINTK("");
> + DPRINTK("backend_changed called");
>
> err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
> &major, &minor);
> @@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
> kfree(device_type);
> }
>
> + /* gather information about QoS policy for this device. */
> + err = blkback_name(be->blkif, name);
> + if (err) {
> + xenbus_dev_error(be->dev, err, "get blkback dev name");
> + return;
> + }
> +
> + err = xenbus_gather(XBT_NIL, dev->otherend,
> + "tokens-rate", "%d", &be->blkif->reqrate,
> + NULL);
> + if(err){
> + DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
> + }else{
> + if(be->blkif->reqrate <= 0){
> + be->blkif->reqmin = 0 ;
> + DPRINTK("%s tokens-rate == 0,no limit",name);
> + }else{
> + DPRINTK("%s
> xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
> + be->blkif->reqrate *= 2;
> + be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
> + if(be->blkif->reqmin > be->blkif->reqrate){
> + be->blkif->reqrate = be->blkif->reqmin;
> + DPRINTK("%s reset default
> value(tokens-rate=%d)",name,be->blkif->reqrate);
> + }
> + }
> + }
> + be->blkif->reqtime = jiffies;
> +
> if (be->major == 0 && be->minor == 0) {
> /* Front end dir is a number, which is used as the handle. */
>
> @@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
>
> /* ** Connection ** */
>
> +static void unregister_rate_watch(struct backend_info *be)
> +{
> + if (be->have_rate_watch) {
> + unregister_xenbus_watch(&be->rate_watch);
> + kfree(be->rate_watch.node);
> + }
> + be->have_rate_watch = 0;
> +}
> +
> +static void rate_changed(struct xenbus_watch *watch,
> + const char **vec, unsigned int len)
> +{
> +
> + struct backend_info *be=container_of(watch,struct backend_info,
> rate_watch);
> + int err;
> + char name[TASK_COMM_LEN];
> +
> + err = blkback_name(be->blkif, name);
> + if (err) {
> + xenbus_dev_error(be->dev, err, "get blkback dev name");
> + return;
> + }
> +
> + err = xenbus_gather(XBT_NIL,be->dev->otherend,
> + "tokens-rate", "%d",
> + &be->blkif->reqrate,NULL);
> + if(err){
> + DPRINTK("%s xenbus_gather(tokens-rate) error",name);
> + }else{
> + if(be->blkif->reqrate <= 0){
> + be->blkif->reqmin = 0;
> + DPRINTK("%s tokens-rate == 0,no limit",name);
> + }else{
> + DPRINTK("%s
> xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
> + be->blkif->reqrate *= 2;
> + be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
> + if(be->blkif->reqmin > be->blkif->reqrate){
> + be->blkif->reqrate = be->blkif->reqmin;
> + DPRINTK("%s reset default
> value(tokens-rate=%d)",name,be->blkif->reqrate);
> + }
> + }
> + }
> +}
>
> /**
> * Write the physical details regarding the block device to the store, and
> @@ -439,6 +519,14 @@ again:
> if (err)
> goto abort;
>
> + /*add by andrew for centos pv*/
> + err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
> + if (err){
> + xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
> + dev->nodename);
> + goto abort;
> + }
> +
> err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
> vbd_size(&be->blkif->vbd));
> if (err) {
> @@ -469,11 +557,22 @@ again:
> if (err)
> xenbus_dev_fatal(dev, err, "ending transaction");
>
> + DPRINTK("xenbus_switch_to XenbusStateConnected");
> +
> err = xenbus_switch_state(dev, XenbusStateConnected);
> if (err)
> xenbus_dev_fatal(dev, err, "switching to Connected state",
> dev->nodename);
>
> + unregister_rate_watch(be);
> + err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
> +
> &be->rate_watch,rate_changed);
> + if (!err)
> + be->have_rate_watch = 1;
> + else
> + xenbus_dev_fatal(dev, err, "watching tokens-rate",
> + dev->nodename);
> +
> return;
> abort:
> xenbus_transaction_end(xbt, 1);
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|