[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote: > The current VBD layer reserves buffer space for each attached device based on > three statically configured settings which are read at boot time. > * max_indirect_segs: Maximum amount of segments. > * max_ring_page_order: Maximum order of pages to be used for the shared ring. > * max_queues: Maximum of queues(rings) to be used. > > But the storage backend, workload, and guest memory result in very different > tuning requirements. It's impossible to centrally predict application > characteristics so it's best to leave allow the settings can be dynamiclly > adjusted based on workload inside the Guest. > > Usage: > Show current values: > cat /sys/devices/vbd-xxx/max_indirect_segs > cat /sys/devices/vbd-xxx/max_ring_page_order > cat /sys/devices/vbd-xxx/max_queues > > Write new values: > echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs > echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order > echo <new value> > /sys/devices/vbd-xxx/max_queues > > Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> > -- > v2: Add device lock and other comments from Konrad. > --- > drivers/block/xen-blkfront.c | 285 > ++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 283 insertions(+), 2 deletions(-) > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 10f46a8..9a5ed22 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -46,6 +46,7 @@ > #include <linux/scatterlist.h> > #include <linux/bitmap.h> > #include <linux/list.h> > +#include <linux/delay.h> > > #include <xen/xen.h> > #include <xen/xenbus.h> > @@ -212,6 +213,11 @@ struct blkfront_info > /* Save uncomplete reqs and bios for migration. */ > struct list_head requests; > struct bio_list bio_list; > + /* For dynamic configuration. */ > + unsigned int reconfiguring:1; > + int new_max_indirect_segments; > + int new_max_ring_page_order; > + int new_max_queues; > }; > > static unsigned int nr_minors; > @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int > suspend) > for (i = 0; i < info->nr_rings; i++) > blkif_free_ring(&info->rinfo[i]); > > + /* Remove old xenstore nodes. */ > + if (info->nr_ring_pages > 1) > + xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order"); > + > + if (info->nr_rings == 1) { > + if (info->nr_ring_pages == 1) { > + xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref"); > + } else { > + for (i = 0; i < info->nr_ring_pages; i++) { > + char ring_ref_name[RINGREF_NAME_LEN]; > + > + snprintf(ring_ref_name, RINGREF_NAME_LEN, > "ring-ref%u", i); > + xenbus_rm(XBT_NIL, info->xbdev->nodename, > ring_ref_name); > + } > + } > + } else { > + xenbus_rm(XBT_NIL, info->xbdev->nodename, > "multi-queue-num-queues"); > + > + for (i = 0; i < info->nr_rings; i++) { > + char queuename[QUEUE_NAME_LEN]; > + > + snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i); > + xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename); > + } > + } > kfree(info->rinfo); > info->rinfo = NULL; > info->nr_rings = 0; > @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev, > info->nr_ring_pages = 1; > else { > ring_page_order = min(xen_blkif_max_ring_order, max_page_order); > + if (info->new_max_ring_page_order) { Instead of calling this "new_max_ring_page_order", could you just call it max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default and use it everywhere instead of xen_blkif_max_ring_order? > + BUG_ON(info->new_max_ring_page_order > max_page_order); > + ring_page_order = info->new_max_ring_page_order; > + } > info->nr_ring_pages = 1 << ring_page_order; > } > > @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info) > backend_max_queues = 1; > > info->nr_rings = min(backend_max_queues, xen_blkif_max_queues); > + if (info->new_max_queues) { Same here IMHO, this is going to make the code flow slightly easier to understand. > + BUG_ON(info->new_max_queues > backend_max_queues); > + info->nr_rings = info->new_max_queues; > + } > /* We need at least one ring. */ > if (!info->nr_rings) > info->nr_rings = 1; > @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct > blkfront_info *info) > NULL); > if (err) > info->max_indirect_segments = 0; > - else > + else { > info->max_indirect_segments = min(indirect_segments, > xen_blkif_max_segments); > + if (info->new_max_indirect_segments) { > + BUG_ON(info->new_max_indirect_segments > > indirect_segments); > + info->max_indirect_segments = > info->new_max_indirect_segments; > + } > + } > +} > + > +static ssize_t max_ring_page_order_show(struct device *dev, > + struct device_attribute *attr, char > *page) > +{ > + struct blkfront_info *info = dev_get_drvdata(dev); > + > + return sprintf(page, "%u\n", get_order(info->nr_ring_pages * > XEN_PAGE_SIZE)); > +} > + > +static ssize_t max_indirect_segs_show(struct device *dev, > + struct device_attribute *attr, char *page) > +{ > + struct blkfront_info *info = dev_get_drvdata(dev); > + > + return sprintf(page, "%u\n", info->max_indirect_segments); > +} > + > +static ssize_t max_queues_show(struct device *dev, > + struct device_attribute *attr, char *page) > +{ > + struct blkfront_info *info = dev_get_drvdata(dev); > + > + return sprintf(page, "%u\n", info->nr_rings); > +} > + > +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t > count) > +{ > + unsigned int i; > + int err = -EBUSY; > + > + /* > + * Make sure no migration in parallel, device lock is actually a > + * mutex. > + */ > + if (!device_trylock(&info->xbdev->dev)) { > + pr_err("Fail to acquire dev:%s lock, may be in migration.\n", > + dev_name(&info->xbdev->dev)); > + return err; > + } > + > + /* > + * Prevent new requests and guarantee no uncompleted reqs. > + */ > + blk_mq_freeze_queue(info->rq); > + if (part_in_flight(&info->gd->part0)) > + goto out; > + > + /* > + * Front Backend > + * Switch to XenbusStateClosed > + * frontend_changed(): > + * case XenbusStateClosed: > + * xen_blkif_disconnect() > + * Switch to > XenbusStateClosed > + * blkfront_resume(): > + * frontend_changed(): > + * reconnect > + * Wait until XenbusStateConnected > + */ > + info->reconfiguring = true; > + xenbus_switch_state(info->xbdev, XenbusStateClosed); > + > + /* Poll every 100ms, 1 minute timeout. */ > + for (i = 0; i < 600; i++) { > + /* > + * Wait backend enter XenbusStateClosed, blkback_changed() > + * will clear reconfiguring. > + */ > + if (!info->reconfiguring) > + goto resume; > + schedule_timeout_interruptible(msecs_to_jiffies(100)); > + } Instead of having this wait, could you just set info->reconfiguring = 1, set the frontend state to XenbusStateClosed and mimic exactly what a resume from suspension does? blkback_changed would have to set the frontend state to InitWait when it detects that the backend has switched to Closed, and call blkfront_resume. > + goto out; > + > +resume: > + if (blkfront_resume(info->xbdev)) > + goto out; > + > + /* Poll every 100ms, 1 minute timeout. */ > + for (i = 0; i < 600; i++) { > + /* Wait blkfront enter StateConnected which is done by > blkif_recover(). */ > + if (info->xbdev->state == XenbusStateConnected) { > + err = count; > + goto out; > + } > + schedule_timeout_interruptible(msecs_to_jiffies(100)); > + } Same here, IMHO all this should be much more similar to a resume, and you shouldn't need all this wait loops. Roger. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |