- Replaces the request free list with a (mempooled) slab.
- Replaces request buckets with a mempool. No buckets, because
we're doing full s/g on page granularity anyway, so can gfp()
independent pages everywhere. Allocations are 1-11 page-sized
segments.
- Adds support for multiple page pools.
- Adds pools to sysfs. Linked as a 'pools' kset to blktap-control.
- Makes the per-tap pool selectable. Attribute 'pool' on the tap device.
- Make pools online-resizeable. Attributes free/size on the pool kobj.
Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
---
drivers/xen/blktap/blktap.h | 35 ++-
drivers/xen/blktap/control.c | 80 ++++++--
drivers/xen/blktap/device.c | 2 +-
drivers/xen/blktap/request.c | 509 +++++++++++++++++++++++++-----------------
drivers/xen/blktap/ring.c | 10 +-
drivers/xen/blktap/sysfs.c | 36 +++
6 files changed, 433 insertions(+), 239 deletions(-)
diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
index a29b509..ad79c15 100644
--- a/drivers/xen/blktap/blktap.h
+++ b/drivers/xen/blktap/blktap.h
@@ -121,17 +121,19 @@ struct blktap_statistics {
};
struct blktap_request {
+ struct blktap *tap;
struct request *rq;
uint16_t usr_idx;
uint8_t status;
atomic_t pendcnt;
- uint8_t nr_pages;
unsigned short operation;
struct timeval time;
struct grant_handle_pair handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- struct list_head free_list;
+
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int nr_pages;
};
struct blktap {
@@ -140,6 +142,7 @@ struct blktap {
struct blktap_ring ring;
struct blktap_device device;
+ struct blktap_page_pool *pool;
int pending_cnt;
struct blktap_request *pending_requests[MAX_PENDING_REQS];
@@ -152,6 +155,13 @@ struct blktap {
struct blktap_statistics stats;
};
+struct blktap_page_pool {
+ struct mempool_s *bufs;
+ spinlock_t lock;
+ struct kobject kobj;
+ wait_queue_head_t wait;
+};
+
extern struct mutex blktap_lock;
extern struct blktap **blktaps;
extern int blktap_max_minor;
@@ -165,7 +175,6 @@ size_t blktap_ring_debug(struct blktap *, char *, size_t);
int blktap_ring_create(struct blktap *);
int blktap_ring_destroy(struct blktap *);
void blktap_ring_kick_user(struct blktap *);
-void blktap_ring_kick_all(void);
int blktap_sysfs_init(void);
void blktap_sysfs_exit(void);
@@ -181,19 +190,23 @@ void blktap_device_destroy_sync(struct blktap *);
int blktap_device_run_queue(struct blktap *);
void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
-int blktap_request_pool_init(void);
-void blktap_request_pool_free(void);
-int blktap_request_pool_grow(void);
-int blktap_request_pool_shrink(void);
-struct blktap_request *blktap_request_allocate(struct blktap *);
+int blktap_page_pool_init(struct kobject *);
+void blktap_page_pool_exit(void);
+struct blktap_page_pool *blktap_page_pool_get(const char *);
+
+size_t blktap_request_debug(struct blktap *, char *, size_t);
+struct blktap_request *blktap_request_alloc(struct blktap *);
+int blktap_request_get_pages(struct blktap *, struct blktap_request *, int);
void blktap_request_free(struct blktap *, struct blktap_request *);
-struct page *request_to_page(struct blktap_request *, int);
+void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
static inline unsigned long
request_to_kaddr(struct blktap_request *req, int seg)
{
- unsigned long pfn = page_to_pfn(request_to_page(req, seg));
- return (unsigned long)pfn_to_kaddr(pfn);
+ return (unsigned long)page_address(req->pages[seg]);
}
+#define request_to_page(_request, _seg) ((_request)->pages[_seg])
+
+
#endif
diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
index ef54fa1..8652e07 100644
--- a/drivers/xen/blktap/control.c
+++ b/drivers/xen/blktap/control.c
@@ -1,7 +1,7 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/miscdevice.h>
-
+#include <linux/device.h>
#include <asm/uaccess.h>
#include "blktap.h"
@@ -10,6 +10,7 @@ DEFINE_MUTEX(blktap_lock);
struct blktap **blktaps;
int blktap_max_minor;
+static struct blktap_page_pool *default_pool;
static struct blktap *
blktap_control_get_minor(void)
@@ -83,6 +84,9 @@ blktap_control_create_tap(void)
if (!tap)
return NULL;
+ kobject_get(&default_pool->kobj);
+ tap->pool = default_pool;
+
err = blktap_ring_create(tap);
if (err)
goto fail_tap;
@@ -110,6 +114,8 @@ blktap_control_destroy_tap(struct blktap *tap)
if (err)
return err;
+ kobject_put(&tap->pool->kobj);
+
blktap_sysfs_destroy(tap);
blktap_control_put_minor(tap);
@@ -166,12 +172,43 @@ static struct file_operations
blktap_control_file_operations = {
.ioctl = blktap_control_ioctl,
};
-static struct miscdevice blktap_misc = {
+static struct miscdevice blktap_control = {
.minor = MISC_DYNAMIC_MINOR,
.name = "blktap-control",
.fops = &blktap_control_file_operations,
};
+static struct device *control_device;
+
+static ssize_t
+blktap_control_show_default_pool(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%s", kobject_name(&default_pool->kobj));
+}
+
+static ssize_t
+blktap_control_store_default_pool(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct blktap_page_pool *pool, *tmp = default_pool;
+
+ pool = blktap_page_pool_get(buf);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ default_pool = pool;
+ kobject_put(&tmp->kobj);
+
+ return size;
+}
+
+static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
+ blktap_control_show_default_pool,
+ blktap_control_store_default_pool);
+
size_t
blktap_control_debug(struct blktap *tap, char *buf, size_t size)
{
@@ -190,12 +227,11 @@ blktap_control_init(void)
{
int err;
- err = misc_register(&blktap_misc);
- if (err) {
- blktap_misc.minor = MISC_DYNAMIC_MINOR;
- BTERR("misc_register failed for control device");
+ err = misc_register(&blktap_control);
+ if (err)
return err;
- }
+
+ control_device = blktap_control.this_device;
blktap_max_minor = min(64, MAX_BLKTAP_DEVICE);
blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL);
@@ -204,20 +240,39 @@ blktap_control_init(void)
return -ENOMEM;
}
+ err = blktap_page_pool_init(&control_device->kobj);
+ if (err)
+ return err;
+
+ default_pool = blktap_page_pool_get("default");
+ if (!default_pool)
+ return -ENOMEM;
+
+ err = device_create_file(control_device, &dev_attr_default_pool);
+ if (err)
+ return err;
+
return 0;
}
static void
blktap_control_exit(void)
{
+ if (default_pool) {
+ kobject_put(&default_pool->kobj);
+ default_pool = NULL;
+ }
+
+ blktap_page_pool_exit();
+
if (blktaps) {
kfree(blktaps);
blktaps = NULL;
}
- if (blktap_misc.minor != MISC_DYNAMIC_MINOR) {
- misc_deregister(&blktap_misc);
- blktap_misc.minor = MISC_DYNAMIC_MINOR;
+ if (control_device) {
+ misc_deregister(&blktap_control);
+ control_device = NULL;
}
}
@@ -228,7 +283,6 @@ blktap_exit(void)
blktap_ring_exit();
blktap_sysfs_exit();
blktap_device_exit();
- blktap_request_pool_free();
}
static int __init
@@ -239,10 +293,6 @@ blktap_init(void)
if (!xen_pv_domain())
return -ENODEV;
- err = blktap_request_pool_init();
- if (err)
- return err;
-
err = blktap_device_init();
if (err)
goto fail;
diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
index 3acb8fa..ed95548 100644
--- a/drivers/xen/blktap/device.c
+++ b/drivers/xen/blktap/device.c
@@ -605,7 +605,7 @@ blktap_device_run_queue(struct blktap *tap)
break;
}
- request = blktap_request_allocate(tap);
+ request = blktap_request_alloc(tap);
if (!request) {
tap->stats.st_oo_req++;
goto wait;
diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
index eee7100..ca12442 100644
--- a/drivers/xen/blktap/request.c
+++ b/drivers/xen/blktap/request.c
@@ -1,297 +1,400 @@
+#include <linux/mempool.h>
#include <linux/spinlock.h>
-#include <xen/balloon.h>
+#include <linux/mutex.h>
#include <linux/sched.h>
+#include <linux/device.h>
+#include <xen/balloon.h>
#include "blktap.h"
-#define MAX_BUCKETS 8
-#define BUCKET_SIZE MAX_PENDING_REQS
+/* max pages per shared pool. just to prevent accidental dos. */
+#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define BLKTAP_POOL_CLOSING 1
+/* default page pool size. when considering to shrink a shared pool,
+ * note that paused tapdisks may grab a whole lot of pages for a long
+ * time. */
+#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES)
-struct blktap_request_bucket;
+/* max number of pages allocatable per request. */
+#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST
-struct blktap_request_handle {
- int slot;
- uint8_t inuse;
- struct blktap_request request;
- struct blktap_request_bucket *bucket;
-};
+/* min request structs per pool. These grow dynamically. */
+#define POOL_MIN_REQS BLK_RING_SIZE
-struct blktap_request_bucket {
- atomic_t reqs_in_use;
- struct blktap_request_handle handles[BUCKET_SIZE];
- struct page **foreign_pages;
-};
+static struct kset *pool_set;
-struct blktap_request_pool {
- spinlock_t lock;
- uint8_t status;
- struct list_head free_list;
- atomic_t reqs_in_use;
- wait_queue_head_t wait_queue;
- struct blktap_request_bucket *buckets[MAX_BUCKETS];
-};
+#define kobj_to_pool(_kobj) \
+ container_of(_kobj, struct blktap_page_pool, kobj)
-static struct blktap_request_pool pool;
-
-static inline struct blktap_request_handle *
-blktap_request_to_handle(struct blktap_request *req)
-{
- return container_of(req, struct blktap_request_handle, request);
-}
+static struct kmem_cache *request_cache;
+static mempool_t *request_pool;
static void
-blktap_request_pool_init_request(struct blktap_request *request)
+__page_pool_wake(struct blktap_page_pool *pool)
{
- int i;
-
- request->usr_idx = -1;
- request->nr_pages = 0;
- request->status = BLKTAP_REQUEST_FREE;
- INIT_LIST_HEAD(&request->free_list);
- for (i = 0; i < ARRAY_SIZE(request->handles); i++) {
- request->handles[i].user = INVALID_GRANT_HANDLE;
- request->handles[i].kernel = INVALID_GRANT_HANDLE;
- }
+ mempool_t *mem = pool->bufs;
+
+ /*
+ NB. slightly wasteful to always wait for a full segment
+ set. but this ensures the next disk makes
+ progress. presently, the repeated request struct
+ alloc/release cycles would otherwise keep everyone spinning.
+ */
+
+ if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
+ wake_up(&pool->wait);
}
-static int
-blktap_request_pool_allocate_bucket(void)
+int
+blktap_request_get_pages(struct blktap *tap,
+ struct blktap_request *request, int nr_pages)
{
- int i, idx;
- unsigned long flags;
- struct blktap_request *request;
- struct blktap_request_handle *handle;
- struct blktap_request_bucket *bucket;
+ struct blktap_page_pool *pool = tap->pool;
+ mempool_t *mem = pool->bufs;
+ struct page *page;
- bucket = kzalloc(sizeof(struct blktap_request_bucket), GFP_KERNEL);
- if (!bucket)
- goto fail;
+ BUG_ON(request->nr_pages != 0);
+ BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
- bucket->foreign_pages = alloc_empty_pages_and_pagevec(MMAP_PAGES);
- if (!bucket->foreign_pages)
- goto fail;
+ if (mem->curr_nr < nr_pages)
+ return -ENOMEM;
- spin_lock_irqsave(&pool.lock, flags);
+ /* NB. avoid thundering herds of tapdisks colliding. */
+ spin_lock(&pool->lock);
- idx = -1;
- for (i = 0; i < MAX_BUCKETS; i++) {
- if (!pool.buckets[i]) {
- idx = i;
- pool.buckets[idx] = bucket;
- break;
- }
+ if (mem->curr_nr < nr_pages) {
+ spin_unlock(&pool->lock);
+ return -ENOMEM;
}
- if (idx == -1) {
- spin_unlock_irqrestore(&pool.lock, flags);
- goto fail;
+ while (request->nr_pages < nr_pages) {
+ page = mempool_alloc(mem, GFP_NOWAIT);
+ BUG_ON(!page);
+ request->pages[request->nr_pages++] = page;
}
- for (i = 0; i < BUCKET_SIZE; i++) {
- handle = bucket->handles + i;
- request = &handle->request;
+ spin_unlock(&pool->lock);
- handle->slot = i;
- handle->inuse = 0;
- handle->bucket = bucket;
+ return 0;
+}
+
+static void
+blktap_request_put_pages(struct blktap *tap,
+ struct blktap_request *request)
+{
+ struct blktap_page_pool *pool = tap->pool;
+ struct page *page;
- blktap_request_pool_init_request(request);
- list_add_tail(&request->free_list, &pool.free_list);
+ while (request->nr_pages) {
+ page = request->pages[--request->nr_pages];
+ mempool_free(page, pool->bufs);
}
+}
- spin_unlock_irqrestore(&pool.lock, flags);
+size_t
+blktap_request_debug(struct blktap *tap, char *buf, size_t size)
+{
+ struct blktap_page_pool *pool = tap->pool;
+ mempool_t *mem = pool->bufs;
+ char *s = buf, *end = buf + size;
- return 0;
+ s += snprintf(buf, end - s,
+ "pool:%s pages:%d free:%d\n",
+ kobject_name(&pool->kobj),
+ mem->min_nr, mem->curr_nr);
-fail:
- if (bucket && bucket->foreign_pages)
- free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
- kfree(bucket);
- return -ENOMEM;
+ return s - buf;
}
-static void
-blktap_request_pool_free_bucket(struct blktap_request_bucket *bucket)
+struct blktap_request*
+blktap_request_alloc(struct blktap *tap)
{
- if (!bucket)
- return;
+ struct blktap_request *request;
- BTDBG("freeing bucket %p\n", bucket);
+ request = mempool_alloc(request_pool, GFP_NOWAIT);
+ if (request)
+ request->tap = tap;
- free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
- kfree(bucket);
+ return request;
}
-struct page *
-request_to_page(struct blktap_request *req, int seg)
+void
+blktap_request_free(struct blktap *tap,
+ struct blktap_request *request)
{
- struct blktap_request_handle *handle = blktap_request_to_handle(req);
- int idx = handle->slot * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
- return handle->bucket->foreign_pages[idx];
+ blktap_request_put_pages(tap, request);
+
+ mempool_free(request, request_pool);
+
+ __page_pool_wake(tap->pool);
}
-int
-blktap_request_pool_shrink(void)
+static void
+blktap_request_ctor(void *obj)
+{
+ struct blktap_request *request = obj;
+
+ memset(request, 0, sizeof(*request));
+ sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
+}
+
+static int
+blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
{
- int i, err;
- unsigned long flags;
- struct blktap_request_bucket *bucket;
+ mempool_t *bufs = pool->bufs;
+ int err;
+
+ /* NB. mempool asserts min_nr >= 1 */
+ target = max(1, target);
+
+ err = mempool_resize(bufs, target, GFP_KERNEL);
+ if (err)
+ return err;
- err = -EAGAIN;
+ __page_pool_wake(pool);
- spin_lock_irqsave(&pool.lock, flags);
+ return 0;
+}
- /* always keep at least one bucket */
- for (i = 1; i < MAX_BUCKETS; i++) {
- bucket = pool.buckets[i];
- if (!bucket)
- continue;
+struct pool_attribute {
+ struct attribute attr;
- if (atomic_read(&bucket->reqs_in_use))
- continue;
+ ssize_t (*show)(struct blktap_page_pool *pool,
+ char *buf);
- blktap_request_pool_free_bucket(bucket);
- pool.buckets[i] = NULL;
- err = 0;
- break;
- }
+ ssize_t (*store)(struct blktap_page_pool *pool,
+ const char *buf, size_t count);
+};
- spin_unlock_irqrestore(&pool.lock, flags);
+#define kattr_to_pool_attr(_kattr) \
+ container_of(_kattr, struct pool_attribute, attr)
- return err;
+static ssize_t
+blktap_page_pool_show_size(struct blktap_page_pool *pool,
+ char *buf)
+{
+ mempool_t *mem = pool->bufs;
+ return sprintf(buf, "%d", mem->min_nr);
}
-int
-blktap_request_pool_grow(void)
+static ssize_t
+blktap_page_pool_store_size(struct blktap_page_pool *pool,
+ const char *buf, size_t size)
{
- return blktap_request_pool_allocate_bucket();
+ int target;
+
+ /*
+ * NB. target fixup to avoid undesired results. less than a
+ * full segment set can wedge the disk. much more than a
+ * couple times the physical queue depth is rarely useful.
+ */
+
+ target = simple_strtoul(buf, NULL, 0);
+ target = max(POOL_MAX_REQUEST_PAGES, target);
+ target = min(target, POOL_MAX_PAGES);
+
+ return blktap_page_pool_resize(pool, target) ? : size;
}
-struct blktap_request *
-blktap_request_allocate(struct blktap *tap)
+static struct pool_attribute blktap_page_pool_attr_size =
+ __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
+ blktap_page_pool_show_size,
+ blktap_page_pool_store_size);
+
+static ssize_t
+blktap_page_pool_show_free(struct blktap_page_pool *pool,
+ char *buf)
{
- int i;
- uint16_t usr_idx;
- unsigned long flags;
- struct blktap_request *request;
+ mempool_t *mem = pool->bufs;
+ return sprintf(buf, "%d", mem->curr_nr);
+}
- usr_idx = -1;
- request = NULL;
+static struct pool_attribute blktap_page_pool_attr_free =
+ __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
+ blktap_page_pool_show_free,
+ NULL);
- spin_lock_irqsave(&pool.lock, flags);
+static struct attribute *blktap_page_pool_attrs[] = {
+ &blktap_page_pool_attr_size.attr,
+ &blktap_page_pool_attr_free.attr,
+ NULL,
+};
- if (pool.status == BLKTAP_POOL_CLOSING)
- goto out;
+static inline struct kobject*
+__blktap_kset_find_obj(struct kset *kset, const char *name)
+{
+ struct kobject *k;
+ struct kobject *ret = NULL;
- for (i = 0; i < ARRAY_SIZE(tap->pending_requests); i++)
- if (!tap->pending_requests[i]) {
- usr_idx = i;
+ spin_lock(&kset->list_lock);
+ list_for_each_entry(k, &kset->list, entry) {
+ if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
+ ret = kobject_get(k);
break;
}
-
- if (usr_idx == (uint16_t)-1)
- goto out;
-
- if (!list_empty(&pool.free_list)) {
- request = list_entry(pool.free_list.next,
- struct blktap_request, free_list);
- list_del(&request->free_list);
}
+ spin_unlock(&kset->list_lock);
+ return ret;
+}
- if (request) {
- struct blktap_request_handle *handle;
+static ssize_t
+blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
+ char *buf)
+{
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
- atomic_inc(&pool.reqs_in_use);
+ if (attr->show)
+ return attr->show(pool, buf);
- handle = blktap_request_to_handle(request);
- atomic_inc(&handle->bucket->reqs_in_use);
- handle->inuse = 1;
+ return -EIO;
+}
- request->usr_idx = usr_idx;
+static ssize_t
+blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
+ const char *buf, size_t size)
+{
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
- tap->pending_requests[usr_idx] = request;
- tap->pending_cnt++;
- }
+ if (attr->show)
+ return attr->store(pool, buf, size);
-out:
- spin_unlock_irqrestore(&pool.lock, flags);
- return request;
+ return -EIO;
}
-void
-blktap_request_free(struct blktap *tap, struct blktap_request *request)
+static struct sysfs_ops blktap_page_pool_sysfs_ops = {
+ .show = blktap_page_pool_show_attr,
+ .store = blktap_page_pool_store_attr,
+};
+
+static void
+blktap_page_pool_release(struct kobject *kobj)
{
- int free;
- unsigned long flags;
- struct blktap_request_handle *handle;
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
+ mempool_destroy(pool->bufs);
+ kfree(pool);
+}
- BUG_ON(request->usr_idx >= ARRAY_SIZE(tap->pending_requests));
- handle = blktap_request_to_handle(request);
+struct kobj_type blktap_page_pool_ktype = {
+ .release = blktap_page_pool_release,
+ .sysfs_ops = &blktap_page_pool_sysfs_ops,
+ .default_attrs = blktap_page_pool_attrs,
+};
+
+static void*
+__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ struct page *page;
- spin_lock_irqsave(&pool.lock, flags);
+ if (!(gfp_mask & __GFP_WAIT))
+ return NULL;
- handle->inuse = 0;
- tap->pending_requests[request->usr_idx] = NULL;
- blktap_request_pool_init_request(request);
- list_add(&request->free_list, &pool.free_list);
- atomic_dec(&handle->bucket->reqs_in_use);
- free = atomic_dec_and_test(&pool.reqs_in_use);
- tap->pending_cnt--;
+ page = alloc_page(gfp_mask);
+ if (page)
+ SetPageReserved(page);
- spin_unlock_irqrestore(&pool.lock, flags);
+ return page;
+}
- if (free)
- wake_up(&pool.wait_queue);
+static void
+__mempool_page_free(void *element, void *pool_data)
+{
+ struct page *page = element;
- blktap_ring_kick_all();
+ ClearPageReserved(page);
+ put_page(page);
}
-void
-blktap_request_pool_free(void)
+static struct kobject*
+blktap_page_pool_create(const char *name, int nr_pages)
{
- int i;
- unsigned long flags;
+ struct blktap_page_pool *pool;
+ int err;
- spin_lock_irqsave(&pool.lock, flags);
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ goto fail;
- pool.status = BLKTAP_POOL_CLOSING;
- while (atomic_read(&pool.reqs_in_use)) {
- spin_unlock_irqrestore(&pool.lock, flags);
- wait_event(pool.wait_queue, !atomic_read(&pool.reqs_in_use));
- spin_lock_irqsave(&pool.lock, flags);
- }
+ spin_lock_init(&pool->lock);
+ init_waitqueue_head(&pool->wait);
- for (i = 0; i < MAX_BUCKETS; i++) {
- blktap_request_pool_free_bucket(pool.buckets[i]);
- pool.buckets[i] = NULL;
- }
+ pool->bufs = mempool_create(nr_pages,
+ __mempool_page_alloc, __mempool_page_free,
+ pool);
+ if (!pool->bufs)
+ goto fail_pool;
+
+ kobject_init(&pool->kobj, &blktap_page_pool_ktype);
+ pool->kobj.kset = pool_set;
+ err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
+ if (err)
+ goto fail_bufs;
+
+ return &pool->kobj;
- spin_unlock_irqrestore(&pool.lock, flags);
+ kobject_del(&pool->kobj);
+fail_bufs:
+ mempool_destroy(pool->bufs);
+fail_pool:
+ kfree(pool);
+fail:
+ return NULL;
}
-int __init
-blktap_request_pool_init(void)
+struct blktap_page_pool*
+blktap_page_pool_get(const char *name)
{
- int i, err;
+ struct kobject *kobj;
+
+ kobj = __blktap_kset_find_obj(pool_set, name);
+ if (!kobj)
+ kobj = blktap_page_pool_create(name,
+ POOL_DEFAULT_PAGES);
+ if (!kobj)
+ return ERR_PTR(-ENOMEM);
- memset(&pool, 0, sizeof(pool));
+ return kobj_to_pool(kobj);
+}
+
+int __init
+blktap_page_pool_init(struct kobject *parent)
+{
+ request_cache =
+ kmem_cache_create("blktap-request",
+ sizeof(struct blktap_request), 0,
+ 0, blktap_request_ctor);
+ if (!request_cache)
+ return -ENOMEM;
+
+ request_pool =
+ mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
+ if (!request_pool)
+ return -ENOMEM;
+
+ pool_set = kset_create_and_add("pools", NULL, parent);
+ if (!pool_set)
+ return -ENOMEM;
- spin_lock_init(&pool.lock);
- INIT_LIST_HEAD(&pool.free_list);
- atomic_set(&pool.reqs_in_use, 0);
- init_waitqueue_head(&pool.wait_queue);
+ return 0;
+}
- for (i = 0; i < 2; i++) {
- err = blktap_request_pool_allocate_bucket();
- if (err)
- goto fail;
+void
+blktap_page_pool_exit(void)
+{
+ if (pool_set) {
+ BUG_ON(!list_empty(&pool_set->list));
+ kset_unregister(pool_set);
+ pool_set = NULL;
}
- return 0;
+ if (request_pool) {
+ mempool_destroy(request_pool);
+ request_pool = NULL;
+ }
-fail:
- blktap_request_pool_free();
- return err;
+ if (request_cache) {
+ kmem_cache_destroy(request_cache);
+ request_cache = NULL;
+ }
}
diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
index 057e97f..a72a1b3 100644
--- a/drivers/xen/blktap/ring.c
+++ b/drivers/xen/blktap/ring.c
@@ -17,8 +17,6 @@
int blktap_ring_major;
static struct cdev blktap_ring_cdev;
-static DECLARE_WAIT_QUEUE_HEAD(blktap_poll_wait);
-
static inline struct blktap *
vma_to_blktap(struct vm_area_struct *vma)
{
@@ -409,7 +407,7 @@ static unsigned int blktap_ring_poll(struct file *filp,
poll_table *wait)
struct blktap_ring *ring = &tap->ring;
int work = 0;
- poll_wait(filp, &blktap_poll_wait, wait);
+ poll_wait(filp, &tap->pool->wait, wait);
poll_wait(filp, &ring->poll_wait, wait);
down_read(¤t->mm->mmap_sem);
@@ -440,12 +438,6 @@ blktap_ring_kick_user(struct blktap *tap)
wake_up(&tap->ring.poll_wait);
}
-void
-blktap_ring_kick_all(void)
-{
- wake_up(&blktap_poll_wait);
-}
-
int
blktap_ring_destroy(struct blktap *tap)
{
diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c
index e573549..7bbfea8 100644
--- a/drivers/xen/blktap/sysfs.c
+++ b/drivers/xen/blktap/sysfs.c
@@ -104,6 +104,8 @@ blktap_sysfs_debug_device(struct device *dev, struct
device_attribute *attr, cha
s += blktap_control_debug(tap, s, end - s);
+ s += blktap_request_debug(tap, s, end - s);
+
s += blktap_device_debug(tap, s, end - s);
s += blktap_ring_debug(tap, s, end - s);
@@ -129,6 +131,38 @@ blktap_sysfs_show_task(struct device *dev, struct
device_attribute *attr, char *
}
static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL);
+static ssize_t
+blktap_sysfs_show_pool(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct blktap *tap = dev_get_drvdata(dev);
+ return sprintf(buf, "%s", kobject_name(&tap->pool->kobj));
+}
+
+static ssize_t
+blktap_sysfs_store_pool(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct blktap *tap = dev_get_drvdata(dev);
+ struct blktap_page_pool *pool, *tmp = tap->pool;
+
+ if (tap->device.gd)
+ return -EBUSY;
+
+ pool = blktap_page_pool_get(buf);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ tap->pool = pool;
+ kobject_put(&tmp->kobj);
+
+ return size;
+}
+DEVICE_ATTR(pool, S_IRUSR|S_IWUSR,
+ blktap_sysfs_show_pool, blktap_sysfs_store_pool);
+
int
blktap_sysfs_create(struct blktap *tap)
{
@@ -151,6 +185,8 @@ blktap_sysfs_create(struct blktap *tap)
if (!err)
err = device_create_file(dev, &dev_attr_task);
if (!err)
+ err = device_create_file(dev, &dev_attr_pool);
+ if (!err)
ring->dev = dev;
else
device_unregister(dev);
--
1.7.0.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|