[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] VM disk I/O limit patch



Hi all,

I add a blkback QoS patch.
You can config(dynamic/static) different I/O speed for different VM disk
by this patch.

----------------------------------------------------------------------------

diff -urNp blkback/blkback.c blkback-qos/blkback.c
--- blkback/blkback.c   2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c       2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)                          \
+       printk("blkback/blkback (%s:%d) " fmt ".\n",    \
+                __FUNCTION__, __LINE__, ##args)
+
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
  * pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
 static int do_block_io_op(blkif_t *blkif);
 static int dispatch_rw_block_io(blkif_t *blkif,
                                 blkif_request_t *req,
-                                pending_req_t *pending_req);
+                                pending_req_t *pending_req,
+                                int *done_nr_sects);
 static void make_response(blkif_t *blkif, u64 id,
                          unsigned short op, int st);
 
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
        blkif->st_pk_req = 0;
 }
 
+static void refill_reqcount(blkif_t *blkif)
+{
+       blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+       blkif->reqcount = blkif->reqrate;
+       if (blkif->reqcount < blkif->reqmin)
+               blkif->reqcount = blkif->reqmin;
+}
+
 int blkif_schedule(void *arg)
 {
        blkif_t *blkif = arg;
        struct vbd *vbd = &blkif->vbd;
+       int     ret = 0;
+       struct timeval cur_time;
 
        blkif_get(blkif);
 
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
                blkif->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
 
-               if (do_block_io_op(blkif))
+               ret = do_block_io_op(blkif);
+               if (ret)
                        blkif->waiting_reqs = 1;
                unplug_queue(blkif);
 
+               if(blkif->reqmin){
+                       if(2 == ret && (blkif->reqtime > jiffies)){
+                               jiffies_to_timeval(jiffies, &cur_time);
+                               if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+                                       printk(KERN_DEBUG "%s: going to sleep 
%d millsecs(rate=%d)\n",
+                                                       current->comm,
+                                                       
jiffies_to_msecs(blkif->reqtime - jiffies),
+                                                       blkif->reqrate);
+                               
+                               set_current_state(TASK_INTERRUPTIBLE);
+                               schedule_timeout(blkif->reqtime - jiffies);
+                               
+                               if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+                                       printk(KERN_DEBUG "%s: sleep 
end(rate=%d)\n",
+                                                       
current->comm,blkif->reqrate);
+                       }
+                       if (time_after(jiffies, blkif->reqtime))
+                               refill_reqcount(blkif);
+               }
+
                if (log_stats && time_after(jiffies, blkif->st_print))
                        print_stats(blkif);
+               
        }
 
        if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-
 static int do_block_io_op(blkif_t *blkif)
 {
        blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
        pending_req_t *pending_req;
        RING_IDX rc, rp;
        int more_to_do = 0, ret;
+       static int last_done_nr_sects = 0;      
 
        rc = blk_rings->common.req_cons;
        rp = blk_rings->common.sring->req_prod;
        rmb(); /* Ensure we see queued requests up to 'rp'. */
+       
+       if (blkif->reqmin && blkif->reqcount <= 0)
+               return (rc != rp) ? 2 : 0;
 
        while ((rc != rp) || (blkif->is_suspended_req)) {
 
                if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
                        break;
+               
+               if(blkif->reqmin){
+                       blkif->reqcount -= last_done_nr_sects;
+                       if (blkif->reqcount <= 0) {
+                               more_to_do = 2;
+                               break;
+                       }
+               }
 
                if (kthread_should_stop()) {
                        more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
                switch (req.operation) {
                case BLKIF_OP_READ:
                        blkif->st_rd_req++;
-                       ret = dispatch_rw_block_io(blkif, &req, pending_req); 
+                       ret = dispatch_rw_block_io(blkif, &req, 
pending_req,&last_done_nr_sects); 
                        break;
                case BLKIF_OP_WRITE_BARRIER:
                        blkif->st_br_req++;
                        /* fall through */
                case BLKIF_OP_WRITE:
                        blkif->st_wr_req++;
-                       ret = dispatch_rw_block_io(blkif, &req, pending_req);
+                       ret = dispatch_rw_block_io(blkif, &req, 
pending_req,&last_done_nr_sects);
                        break;
                case BLKIF_OP_PACKET:
                        DPRINTK("error: block operation BLKIF_OP_PACKET not 
implemented\n");
@@ -412,9 +461,29 @@ handle_request:
        return more_to_do;
 }
 
+static char* operation2str(int operation)
+{
+       char* ret_str = NULL;
+       switch (operation) {
+       case BLKIF_OP_READ:
+               ret_str = "READ";
+               break;
+       case BLKIF_OP_WRITE:
+               ret_str = "WRITE";
+               break;
+       case BLKIF_OP_WRITE_BARRIER:
+               ret_str = "WRITE_BARRIER";
+               break;
+       default:
+               ret_str = "0";
+       }
+       return ret_str;
+}
+
 static int dispatch_rw_block_io(blkif_t *blkif,
                                 blkif_request_t *req,
-                                pending_req_t *pending_req)
+                                pending_req_t *pending_req,
+                                int *done_nr_sects)
 {
        extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
        struct bio *bio = NULL;
        int ret, i;
        int operation;
+       struct timeval cur_time;
+
+       *done_nr_sects = 0;
 
        switch (req->operation) {
        case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
        else if (operation == WRITE || operation == WRITE_BARRIER)
                blkif->st_wr_sect += preq.nr_sects;
 
+       *done_nr_sects = preq.nr_sects;
+       jiffies_to_timeval(jiffies, &cur_time);
+       if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+               printk(KERN_DEBUG "  operation=%s sects=%d\n",
+                       operation2str(req->operation),preq.nr_sects);
+
        return 0;
 
  fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
 
        blkif_xenbus_init();
 
+       DPRINTK("blkif_inited\n");
+
        return 0;
 
  out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c     2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c 2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
 #include "common.h"
 
 #undef DPRINTK
-#define DPRINTK(_f, _a...)                     \
-       printk("(%s() file=%s, line=%d) " _f "\n",      \
-                __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...)                          \
+       printk("blkback/cdrom (%s:%d) " fmt ".\n",      \
+                __FUNCTION__, __LINE__, ##args)
 
 
 #define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h    2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h        2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
 
        grant_handle_t shmem_handle;
        grant_ref_t    shmem_ref;
+
+       /* qos information */
+       unsigned long   reqtime;
+       int    reqcount;
+       int    reqmin;
+       int    reqrate; 
+
 } blkif_t;
 
+#define VBD_QOS_MIN_RATE_LIMIT                 2*1024          /*      1MBs    
*/
+
 struct backend_info
 {
        struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
        unsigned major;
        unsigned minor;
        char *mode;
+       struct xenbus_watch rate_watch;
+       int have_rate_watch; 
 };
 
 blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c       2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c   2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
        (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)                          \
+       printk("blkback/vbd (%s:%d) " fmt ".\n",        \
+                __FUNCTION__, __LINE__, ##args)
+
 unsigned long long vbd_size(struct vbd *vbd)
 {
        return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
        if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
                vbd->type |= VDISK_REMOVABLE;
 
-       DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+       DPRINTK("Successful creation of handle=%04x (dom=%u)",
                handle, blkif->domid);
        return 0;
 }
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c    2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c        2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
 
 #undef DPRINTK
 #define DPRINTK(fmt, args...)                          \
-       pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",   \
+       printk("blkback/xenbus (%s:%d) " fmt ".\n",     \
                 __FUNCTION__, __LINE__, ##args)
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
 
 static int blkback_name(blkif_t *blkif, char *buf)
 {
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
        char name[TASK_COMM_LEN];
 
        /* Not ready to connect? */
-       if (!blkif->irq || !blkif->vbd.bdev)
+       if (!blkif->irq || !blkif->vbd.bdev){
+               DPRINTK("Not ready to connect");
                return;
+       }
 
        /* Already connected? */
        if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
                be->cdrom_watch.node = NULL;
        }
 
+       unregister_rate_watch(be);
+
        if (be->blkif) {
                blkif_disconnect(be->blkif);
                vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
 
        err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
                                 &be->backend_watch, backend_changed);
+
+       DPRINTK("blkback_probe called");
+       DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+       
        if (err)
                goto fail;
 
@@ -266,7 +275,6 @@ fail:
        return err;
 }
 
-
 /**
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
        struct xenbus_device *dev = be->dev;
        int cdrom = 0;
        char *device_type;
+       char name[TASK_COMM_LEN];
 
-       DPRINTK("");
+       DPRINTK("backend_changed called");
 
        err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
                           &major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
                kfree(device_type);
        }
 
+       /* gather information about QoS policy for this device. */
+       err = blkback_name(be->blkif, name);
+       if (err) {
+               xenbus_dev_error(be->dev, err, "get blkback dev name");
+               return;
+       }
+       
+       err = xenbus_gather(XBT_NIL, dev->otherend,
+                               "tokens-rate", "%d", &be->blkif->reqrate, 
+                               NULL);
+       if(err){
+               DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+       }else{
+               if(be->blkif->reqrate <= 0){
+                       be->blkif->reqmin = 0 ;
+                       DPRINTK("%s tokens-rate == 0,no limit",name);   
+               }else{
+                       DPRINTK("%s 
xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+                       be->blkif->reqrate *= 2;
+                       be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+                       if(be->blkif->reqmin > be->blkif->reqrate){
+                               be->blkif->reqrate = be->blkif->reqmin;
+                               DPRINTK("%s reset default 
value(tokens-rate=%d)",name,be->blkif->reqrate);
+                       }
+               }
+       }
+       be->blkif->reqtime = jiffies;
+
        if (be->major == 0 && be->minor == 0) {
                /* Front end dir is a number, which is used as the handle. */
 
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
 
 /* ** Connection ** */
 
+static void unregister_rate_watch(struct backend_info *be)
+{
+       if (be->have_rate_watch) {
+               unregister_xenbus_watch(&be->rate_watch);
+               kfree(be->rate_watch.node);
+       }
+       be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+                       const char **vec, unsigned int len)
+{
+
+       struct backend_info *be=container_of(watch,struct backend_info, 
rate_watch);
+       int err;
+       char name[TASK_COMM_LEN];
+
+       err = blkback_name(be->blkif, name);
+       if (err) {
+               xenbus_dev_error(be->dev, err, "get blkback dev name");
+               return;
+       }
+
+       err = xenbus_gather(XBT_NIL,be->dev->otherend, 
+                                       "tokens-rate",  "%d", 
+                                       &be->blkif->reqrate,NULL);
+       if(err){
+               DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+       }else{
+               if(be->blkif->reqrate <= 0){
+                       be->blkif->reqmin = 0;
+                       DPRINTK("%s tokens-rate == 0,no limit",name);   
+               }else{
+                       DPRINTK("%s 
xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+                       be->blkif->reqrate *= 2;
+                       be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+                       if(be->blkif->reqmin > be->blkif->reqrate){
+                               be->blkif->reqrate = be->blkif->reqmin;
+                               DPRINTK("%s reset default 
value(tokens-rate=%d)",name,be->blkif->reqrate);
+                       }
+               }
+       }
+}
 
 /**
  * Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
        if (err)
                goto abort;
 
+       /*add by andrew for centos pv*/
+       err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+       if (err){
+               xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+                       dev->nodename);
+               goto abort;
+       }
+
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            vbd_size(&be->blkif->vbd));
        if (err) {
@@ -469,11 +557,22 @@ again:
        if (err)
                xenbus_dev_fatal(dev, err, "ending transaction");
 
+       DPRINTK("xenbus_switch_to XenbusStateConnected");
+
        err = xenbus_switch_state(dev, XenbusStateConnected);
        if (err)
                xenbus_dev_fatal(dev, err, "switching to Connected state",
                                 dev->nodename);
 
+       unregister_rate_watch(be);
+       err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+                                                               
&be->rate_watch,rate_changed);
+       if (!err)
+               be->have_rate_watch = 1;
+       else
+               xenbus_dev_fatal(dev, err, "watching tokens-rate",
+                                dev->nodename);
+
        return;
  abort:
        xenbus_transaction_end(xbt, 1);

Attachment: blkback-qos-20110621.diff
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.