Hi all,
I add a blkback QoS patch.
You can config(dynamic/static) different I/O speed for different VM disk
by this patch.
----------------------------------------------------------------------------
diff -urNp blkback/blkback.c blkback-qos/blkback.c
--- blkback/blkback.c 2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c 2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
#include <asm/hypervisor.h>
#include "common.h"
+#undef DPRINTK
+#define DPRINTK(fmt, args...) \
+ printk("blkback/blkback (%s:%d) " fmt ".\n", \
+ __FUNCTION__, __LINE__, ##args)
+
/*
* These are rather arbitrary. They are fairly large because adjacent requests
* pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
static int do_block_io_op(blkif_t *blkif);
static int dispatch_rw_block_io(blkif_t *blkif,
blkif_request_t *req,
- pending_req_t *pending_req);
+ pending_req_t *pending_req,
+ int *done_nr_sects);
static void make_response(blkif_t *blkif, u64 id,
unsigned short op, int st);
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
blkif->st_pk_req = 0;
}
+static void refill_reqcount(blkif_t *blkif)
+{
+ blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+ blkif->reqcount = blkif->reqrate;
+ if (blkif->reqcount < blkif->reqmin)
+ blkif->reqcount = blkif->reqmin;
+}
+
int blkif_schedule(void *arg)
{
blkif_t *blkif = arg;
struct vbd *vbd = &blkif->vbd;
+ int ret = 0;
+ struct timeval cur_time;
blkif_get(blkif);
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
blkif->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- if (do_block_io_op(blkif))
+ ret = do_block_io_op(blkif);
+ if (ret)
blkif->waiting_reqs = 1;
unplug_queue(blkif);
+ if(blkif->reqmin){
+ if(2 == ret && (blkif->reqtime > jiffies)){
+ jiffies_to_timeval(jiffies, &cur_time);
+ if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+ printk(KERN_DEBUG "%s: going to sleep
%d millsecs(rate=%d)\n",
+ current->comm,
+
jiffies_to_msecs(blkif->reqtime - jiffies),
+ blkif->reqrate);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(blkif->reqtime - jiffies);
+
+ if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+ printk(KERN_DEBUG "%s: sleep
end(rate=%d)\n",
+
current->comm,blkif->reqrate);
+ }
+ if (time_after(jiffies, blkif->reqtime))
+ refill_reqcount(blkif);
+ }
+
if (log_stats && time_after(jiffies, blkif->st_print))
print_stats(blkif);
+
}
if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
/******************************************************************
* DOWNWARD CALLS -- These interface with the block-device layer proper.
*/
-
static int do_block_io_op(blkif_t *blkif)
{
blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
pending_req_t *pending_req;
RING_IDX rc, rp;
int more_to_do = 0, ret;
+ static int last_done_nr_sects = 0;
rc = blk_rings->common.req_cons;
rp = blk_rings->common.sring->req_prod;
rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ if (blkif->reqmin && blkif->reqcount <= 0)
+ return (rc != rp) ? 2 : 0;
while ((rc != rp) || (blkif->is_suspended_req)) {
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
break;
+
+ if(blkif->reqmin){
+ blkif->reqcount -= last_done_nr_sects;
+ if (blkif->reqcount <= 0) {
+ more_to_do = 2;
+ break;
+ }
+ }
if (kthread_should_stop()) {
more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
switch (req.operation) {
case BLKIF_OP_READ:
blkif->st_rd_req++;
- ret = dispatch_rw_block_io(blkif, &req, pending_req);
+ ret = dispatch_rw_block_io(blkif, &req,
pending_req,&last_done_nr_sects);
break;
case BLKIF_OP_WRITE_BARRIER:
blkif->st_br_req++;
/* fall through */
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
- ret = dispatch_rw_block_io(blkif, &req, pending_req);
+ ret = dispatch_rw_block_io(blkif, &req,
pending_req,&last_done_nr_sects);
break;
case BLKIF_OP_PACKET:
DPRINTK("error: block operation BLKIF_OP_PACKET not
implemented\n");
@@ -412,9 +461,29 @@ handle_request:
return more_to_do;
}
+static char* operation2str(int operation)
+{
+ char* ret_str = NULL;
+ switch (operation) {
+ case BLKIF_OP_READ:
+ ret_str = "READ";
+ break;
+ case BLKIF_OP_WRITE:
+ ret_str = "WRITE";
+ break;
+ case BLKIF_OP_WRITE_BARRIER:
+ ret_str = "WRITE_BARRIER";
+ break;
+ default:
+ ret_str = "0";
+ }
+ return ret_str;
+}
+
static int dispatch_rw_block_io(blkif_t *blkif,
blkif_request_t *req,
- pending_req_t *pending_req)
+ pending_req_t *pending_req,
+ int *done_nr_sects)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
struct bio *bio = NULL;
int ret, i;
int operation;
+ struct timeval cur_time;
+
+ *done_nr_sects = 0;
switch (req->operation) {
case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
else if (operation == WRITE || operation == WRITE_BARRIER)
blkif->st_wr_sect += preq.nr_sects;
+ *done_nr_sects = preq.nr_sects;
+ jiffies_to_timeval(jiffies, &cur_time);
+ if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+ printk(KERN_DEBUG " operation=%s sects=%d\n",
+ operation2str(req->operation),preq.nr_sects);
+
return 0;
fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
blkif_xenbus_init();
+ DPRINTK("blkif_inited\n");
+
return 0;
out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c 2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c 2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
#include "common.h"
#undef DPRINTK
-#define DPRINTK(_f, _a...) \
- printk("(%s() file=%s, line=%d) " _f "\n", \
- __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...) \
+ printk("blkback/cdrom (%s:%d) " fmt ".\n", \
+ __FUNCTION__, __LINE__, ##args)
#define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h 2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h 2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
grant_handle_t shmem_handle;
grant_ref_t shmem_ref;
+
+ /* qos information */
+ unsigned long reqtime;
+ int reqcount;
+ int reqmin;
+ int reqrate;
+
} blkif_t;
+#define VBD_QOS_MIN_RATE_LIMIT 2*1024 /* 1MBs
*/
+
struct backend_info
{
struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
unsigned major;
unsigned minor;
char *mode;
+ struct xenbus_watch rate_watch;
+ int have_rate_watch;
};
blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c 2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c 2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
(_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
+#undef DPRINTK
+#define DPRINTK(fmt, args...) \
+ printk("blkback/vbd (%s:%d) " fmt ".\n", \
+ __FUNCTION__, __LINE__, ##args)
+
unsigned long long vbd_size(struct vbd *vbd)
{
return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
vbd->type |= VDISK_REMOVABLE;
- DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+ DPRINTK("Successful creation of handle=%04x (dom=%u)",
handle, blkif->domid);
return 0;
}
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c 2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c 2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
#undef DPRINTK
#define DPRINTK(fmt, args...) \
- pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
+ printk("blkback/xenbus (%s:%d) " fmt ".\n", \
__FUNCTION__, __LINE__, ##args)
static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
static int blkback_name(blkif_t *blkif, char *buf)
{
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
char name[TASK_COMM_LEN];
/* Not ready to connect? */
- if (!blkif->irq || !blkif->vbd.bdev)
+ if (!blkif->irq || !blkif->vbd.bdev){
+ DPRINTK("Not ready to connect");
return;
+ }
/* Already connected? */
if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
be->cdrom_watch.node = NULL;
}
+ unregister_rate_watch(be);
+
if (be->blkif) {
blkif_disconnect(be->blkif);
vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
&be->backend_watch, backend_changed);
+
+ DPRINTK("blkback_probe called");
+ DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+
if (err)
goto fail;
@@ -266,7 +275,6 @@ fail:
return err;
}
-
/**
* Callback received when the hotplug scripts have placed the physical-device
* node. Read it and the mode node, and create a vbd. If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
struct xenbus_device *dev = be->dev;
int cdrom = 0;
char *device_type;
+ char name[TASK_COMM_LEN];
- DPRINTK("");
+ DPRINTK("backend_changed called");
err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
&major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
kfree(device_type);
}
+ /* gather information about QoS policy for this device. */
+ err = blkback_name(be->blkif, name);
+ if (err) {
+ xenbus_dev_error(be->dev, err, "get blkback dev name");
+ return;
+ }
+
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "tokens-rate", "%d", &be->blkif->reqrate,
+ NULL);
+ if(err){
+ DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+ }else{
+ if(be->blkif->reqrate <= 0){
+ be->blkif->reqmin = 0 ;
+ DPRINTK("%s tokens-rate == 0,no limit",name);
+ }else{
+ DPRINTK("%s
xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+ be->blkif->reqrate *= 2;
+ be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+ if(be->blkif->reqmin > be->blkif->reqrate){
+ be->blkif->reqrate = be->blkif->reqmin;
+ DPRINTK("%s reset default
value(tokens-rate=%d)",name,be->blkif->reqrate);
+ }
+ }
+ }
+ be->blkif->reqtime = jiffies;
+
if (be->major == 0 && be->minor == 0) {
/* Front end dir is a number, which is used as the handle. */
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
/* ** Connection ** */
+static void unregister_rate_watch(struct backend_info *be)
+{
+ if (be->have_rate_watch) {
+ unregister_xenbus_watch(&be->rate_watch);
+ kfree(be->rate_watch.node);
+ }
+ be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+
+ struct backend_info *be=container_of(watch,struct backend_info,
rate_watch);
+ int err;
+ char name[TASK_COMM_LEN];
+
+ err = blkback_name(be->blkif, name);
+ if (err) {
+ xenbus_dev_error(be->dev, err, "get blkback dev name");
+ return;
+ }
+
+ err = xenbus_gather(XBT_NIL,be->dev->otherend,
+ "tokens-rate", "%d",
+ &be->blkif->reqrate,NULL);
+ if(err){
+ DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+ }else{
+ if(be->blkif->reqrate <= 0){
+ be->blkif->reqmin = 0;
+ DPRINTK("%s tokens-rate == 0,no limit",name);
+ }else{
+ DPRINTK("%s
xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+ be->blkif->reqrate *= 2;
+ be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+ if(be->blkif->reqmin > be->blkif->reqrate){
+ be->blkif->reqrate = be->blkif->reqmin;
+ DPRINTK("%s reset default
value(tokens-rate=%d)",name,be->blkif->reqrate);
+ }
+ }
+ }
+}
/**
* Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
if (err)
goto abort;
+ /*add by andrew for centos pv*/
+ err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+ if (err){
+ xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+ dev->nodename);
+ goto abort;
+ }
+
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
vbd_size(&be->blkif->vbd));
if (err) {
@@ -469,11 +557,22 @@ again:
if (err)
xenbus_dev_fatal(dev, err, "ending transaction");
+ DPRINTK("xenbus_switch_to XenbusStateConnected");
+
err = xenbus_switch_state(dev, XenbusStateConnected);
if (err)
xenbus_dev_fatal(dev, err, "switching to Connected state",
dev->nodename);
+ unregister_rate_watch(be);
+ err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+
&be->rate_watch,rate_changed);
+ if (!err)
+ be->have_rate_watch = 1;
+ else
+ xenbus_dev_fatal(dev, err, "watching tokens-rate",
+ dev->nodename);
+
return;
abort:
xenbus_transaction_end(xbt, 1);
blkback-qos-20110621.diff
Description: Binary data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|