WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-users

[Xen-devel] VM disk I/O limit patch

To: xen-devel@xxxxxxxxxxxxxxxxxxx, xen-users@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] VM disk I/O limit patch
From: Andrew Xu <xu.an@xxxxxxxxxx>
Date: Tue, 21 Jun 2011 16:29:35 +0800
Cc:
Delivery-date: Tue, 21 Jun 2011 01:30:17 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Hi all,

I add a blkback QoS patch.
You can config(dynamic/static) different I/O speed for different VM disk
by this patch.

----------------------------------------------------------------------------

diff -urNp blkback/blkback.c blkback-qos/blkback.c
--- blkback/blkback.c   2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c       2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)                          \
+       printk("blkback/blkback (%s:%d) " fmt ".\n",    \
+                __FUNCTION__, __LINE__, ##args)
+
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
  * pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
 static int do_block_io_op(blkif_t *blkif);
 static int dispatch_rw_block_io(blkif_t *blkif,
                                 blkif_request_t *req,
-                                pending_req_t *pending_req);
+                                pending_req_t *pending_req,
+                                int *done_nr_sects);
 static void make_response(blkif_t *blkif, u64 id,
                          unsigned short op, int st);
 
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
        blkif->st_pk_req = 0;
 }
 
+static void refill_reqcount(blkif_t *blkif)
+{
+       blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+       blkif->reqcount = blkif->reqrate;
+       if (blkif->reqcount < blkif->reqmin)
+               blkif->reqcount = blkif->reqmin;
+}
+
 int blkif_schedule(void *arg)
 {
        blkif_t *blkif = arg;
        struct vbd *vbd = &blkif->vbd;
+       int     ret = 0;
+       struct timeval cur_time;
 
        blkif_get(blkif);
 
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
                blkif->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
 
-               if (do_block_io_op(blkif))
+               ret = do_block_io_op(blkif);
+               if (ret)
                        blkif->waiting_reqs = 1;
                unplug_queue(blkif);
 
+               if(blkif->reqmin){
+                       if(2 == ret && (blkif->reqtime > jiffies)){
+                               jiffies_to_timeval(jiffies, &cur_time);
+                               if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+                                       printk(KERN_DEBUG "%s: going to sleep 
%d millsecs(rate=%d)\n",
+                                                       current->comm,
+                                                       
jiffies_to_msecs(blkif->reqtime - jiffies),
+                                                       blkif->reqrate);
+                               
+                               set_current_state(TASK_INTERRUPTIBLE);
+                               schedule_timeout(blkif->reqtime - jiffies);
+                               
+                               if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+                                       printk(KERN_DEBUG "%s: sleep 
end(rate=%d)\n",
+                                                       
current->comm,blkif->reqrate);
+                       }
+                       if (time_after(jiffies, blkif->reqtime))
+                               refill_reqcount(blkif);
+               }
+
                if (log_stats && time_after(jiffies, blkif->st_print))
                        print_stats(blkif);
+               
        }
 
        if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-
 static int do_block_io_op(blkif_t *blkif)
 {
        blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
        pending_req_t *pending_req;
        RING_IDX rc, rp;
        int more_to_do = 0, ret;
+       static int last_done_nr_sects = 0;      
 
        rc = blk_rings->common.req_cons;
        rp = blk_rings->common.sring->req_prod;
        rmb(); /* Ensure we see queued requests up to 'rp'. */
+       
+       if (blkif->reqmin && blkif->reqcount <= 0)
+               return (rc != rp) ? 2 : 0;
 
        while ((rc != rp) || (blkif->is_suspended_req)) {
 
                if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
                        break;
+               
+               if(blkif->reqmin){
+                       blkif->reqcount -= last_done_nr_sects;
+                       if (blkif->reqcount <= 0) {
+                               more_to_do = 2;
+                               break;
+                       }
+               }
 
                if (kthread_should_stop()) {
                        more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
                switch (req.operation) {
                case BLKIF_OP_READ:
                        blkif->st_rd_req++;
-                       ret = dispatch_rw_block_io(blkif, &req, pending_req); 
+                       ret = dispatch_rw_block_io(blkif, &req, 
pending_req,&last_done_nr_sects); 
                        break;
                case BLKIF_OP_WRITE_BARRIER:
                        blkif->st_br_req++;
                        /* fall through */
                case BLKIF_OP_WRITE:
                        blkif->st_wr_req++;
-                       ret = dispatch_rw_block_io(blkif, &req, pending_req);
+                       ret = dispatch_rw_block_io(blkif, &req, 
pending_req,&last_done_nr_sects);
                        break;
                case BLKIF_OP_PACKET:
                        DPRINTK("error: block operation BLKIF_OP_PACKET not 
implemented\n");
@@ -412,9 +461,29 @@ handle_request:
        return more_to_do;
 }
 
+static char* operation2str(int operation)
+{
+       char* ret_str = NULL;
+       switch (operation) {
+       case BLKIF_OP_READ:
+               ret_str = "READ";
+               break;
+       case BLKIF_OP_WRITE:
+               ret_str = "WRITE";
+               break;
+       case BLKIF_OP_WRITE_BARRIER:
+               ret_str = "WRITE_BARRIER";
+               break;
+       default:
+               ret_str = "0";
+       }
+       return ret_str;
+}
+
 static int dispatch_rw_block_io(blkif_t *blkif,
                                 blkif_request_t *req,
-                                pending_req_t *pending_req)
+                                pending_req_t *pending_req,
+                                int *done_nr_sects)
 {
        extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
        struct bio *bio = NULL;
        int ret, i;
        int operation;
+       struct timeval cur_time;
+
+       *done_nr_sects = 0;
 
        switch (req->operation) {
        case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
        else if (operation == WRITE || operation == WRITE_BARRIER)
                blkif->st_wr_sect += preq.nr_sects;
 
+       *done_nr_sects = preq.nr_sects;
+       jiffies_to_timeval(jiffies, &cur_time);
+       if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+               printk(KERN_DEBUG "  operation=%s sects=%d\n",
+                       operation2str(req->operation),preq.nr_sects);
+
        return 0;
 
  fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
 
        blkif_xenbus_init();
 
+       DPRINTK("blkif_inited\n");
+
        return 0;
 
  out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c     2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c 2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
 #include "common.h"
 
 #undef DPRINTK
-#define DPRINTK(_f, _a...)                     \
-       printk("(%s() file=%s, line=%d) " _f "\n",      \
-                __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...)                          \
+       printk("blkback/cdrom (%s:%d) " fmt ".\n",      \
+                __FUNCTION__, __LINE__, ##args)
 
 
 #define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h    2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h        2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
 
        grant_handle_t shmem_handle;
        grant_ref_t    shmem_ref;
+
+       /* qos information */
+       unsigned long   reqtime;
+       int    reqcount;
+       int    reqmin;
+       int    reqrate; 
+
 } blkif_t;
 
+#define VBD_QOS_MIN_RATE_LIMIT                 2*1024          /*      1MBs    
*/
+
 struct backend_info
 {
        struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
        unsigned major;
        unsigned minor;
        char *mode;
+       struct xenbus_watch rate_watch;
+       int have_rate_watch; 
 };
 
 blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c       2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c   2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
        (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)                          \
+       printk("blkback/vbd (%s:%d) " fmt ".\n",        \
+                __FUNCTION__, __LINE__, ##args)
+
 unsigned long long vbd_size(struct vbd *vbd)
 {
        return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
        if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
                vbd->type |= VDISK_REMOVABLE;
 
-       DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+       DPRINTK("Successful creation of handle=%04x (dom=%u)",
                handle, blkif->domid);
        return 0;
 }
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c    2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c        2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
 
 #undef DPRINTK
 #define DPRINTK(fmt, args...)                          \
-       pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",   \
+       printk("blkback/xenbus (%s:%d) " fmt ".\n",     \
                 __FUNCTION__, __LINE__, ##args)
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
 
 static int blkback_name(blkif_t *blkif, char *buf)
 {
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
        char name[TASK_COMM_LEN];
 
        /* Not ready to connect? */
-       if (!blkif->irq || !blkif->vbd.bdev)
+       if (!blkif->irq || !blkif->vbd.bdev){
+               DPRINTK("Not ready to connect");
                return;
+       }
 
        /* Already connected? */
        if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
                be->cdrom_watch.node = NULL;
        }
 
+       unregister_rate_watch(be);
+
        if (be->blkif) {
                blkif_disconnect(be->blkif);
                vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
 
        err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
                                 &be->backend_watch, backend_changed);
+
+       DPRINTK("blkback_probe called");
+       DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+       
        if (err)
                goto fail;
 
@@ -266,7 +275,6 @@ fail:
        return err;
 }
 
-
 /**
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
        struct xenbus_device *dev = be->dev;
        int cdrom = 0;
        char *device_type;
+       char name[TASK_COMM_LEN];
 
-       DPRINTK("");
+       DPRINTK("backend_changed called");
 
        err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
                           &major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
                kfree(device_type);
        }
 
+       /* gather information about QoS policy for this device. */
+       err = blkback_name(be->blkif, name);
+       if (err) {
+               xenbus_dev_error(be->dev, err, "get blkback dev name");
+               return;
+       }
+       
+       err = xenbus_gather(XBT_NIL, dev->otherend,
+                               "tokens-rate", "%d", &be->blkif->reqrate, 
+                               NULL);
+       if(err){
+               DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+       }else{
+               if(be->blkif->reqrate <= 0){
+                       be->blkif->reqmin = 0 ;
+                       DPRINTK("%s tokens-rate == 0,no limit",name);   
+               }else{
+                       DPRINTK("%s 
xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+                       be->blkif->reqrate *= 2;
+                       be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+                       if(be->blkif->reqmin > be->blkif->reqrate){
+                               be->blkif->reqrate = be->blkif->reqmin;
+                               DPRINTK("%s reset default 
value(tokens-rate=%d)",name,be->blkif->reqrate);
+                       }
+               }
+       }
+       be->blkif->reqtime = jiffies;
+
        if (be->major == 0 && be->minor == 0) {
                /* Front end dir is a number, which is used as the handle. */
 
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
 
 /* ** Connection ** */
 
+static void unregister_rate_watch(struct backend_info *be)
+{
+       if (be->have_rate_watch) {
+               unregister_xenbus_watch(&be->rate_watch);
+               kfree(be->rate_watch.node);
+       }
+       be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+                       const char **vec, unsigned int len)
+{
+
+       struct backend_info *be=container_of(watch,struct backend_info, 
rate_watch);
+       int err;
+       char name[TASK_COMM_LEN];
+
+       err = blkback_name(be->blkif, name);
+       if (err) {
+               xenbus_dev_error(be->dev, err, "get blkback dev name");
+               return;
+       }
+
+       err = xenbus_gather(XBT_NIL,be->dev->otherend, 
+                                       "tokens-rate",  "%d", 
+                                       &be->blkif->reqrate,NULL);
+       if(err){
+               DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+       }else{
+               if(be->blkif->reqrate <= 0){
+                       be->blkif->reqmin = 0;
+                       DPRINTK("%s tokens-rate == 0,no limit",name);   
+               }else{
+                       DPRINTK("%s 
xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+                       be->blkif->reqrate *= 2;
+                       be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+                       if(be->blkif->reqmin > be->blkif->reqrate){
+                               be->blkif->reqrate = be->blkif->reqmin;
+                               DPRINTK("%s reset default 
value(tokens-rate=%d)",name,be->blkif->reqrate);
+                       }
+               }
+       }
+}
 
 /**
  * Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
        if (err)
                goto abort;
 
+       /*add by andrew for centos pv*/
+       err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+       if (err){
+               xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+                       dev->nodename);
+               goto abort;
+       }
+
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            vbd_size(&be->blkif->vbd));
        if (err) {
@@ -469,11 +557,22 @@ again:
        if (err)
                xenbus_dev_fatal(dev, err, "ending transaction");
 
+       DPRINTK("xenbus_switch_to XenbusStateConnected");
+
        err = xenbus_switch_state(dev, XenbusStateConnected);
        if (err)
                xenbus_dev_fatal(dev, err, "switching to Connected state",
                                 dev->nodename);
 
+       unregister_rate_watch(be);
+       err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+                                                               
&be->rate_watch,rate_changed);
+       if (!err)
+               be->have_rate_watch = 1;
+       else
+               xenbus_dev_fatal(dev, err, "watching tokens-rate",
+                                dev->nodename);
+
        return;
  abort:
        xenbus_transaction_end(xbt, 1);

Attachment: blkback-qos-20110621.diff
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel