From: Daniel Stodden <dns@xxxxxxxxxxxx>
Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
---
drivers/block/Kconfig | 9 +
drivers/block/Makefile | 1 +
drivers/block/blktap/Makefile | 3 +
drivers/block/blktap/blktap.h | 161 +++++++++++
drivers/block/blktap/control.c | 315 +++++++++++++++++++++
drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++
drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++
drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++
drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++
drivers/xen/Kconfig | 11 -
drivers/xen/Makefile | 1 -
drivers/xen/blktap/Makefile | 3 -
drivers/xen/blktap/blktap.h | 161 -----------
drivers/xen/blktap/control.c | 315 ---------------------
drivers/xen/blktap/device.c | 551 -------------------------------------
drivers/xen/blktap/request.c | 418 ----------------------------
drivers/xen/blktap/ring.c | 595 ----------------------------------------
drivers/xen/blktap/sysfs.c | 288 -------------------
18 files changed, 2341 insertions(+), 2343 deletions(-)
create mode 100644 drivers/block/blktap/Makefile
create mode 100644 drivers/block/blktap/blktap.h
create mode 100644 drivers/block/blktap/control.c
create mode 100644 drivers/block/blktap/device.c
create mode 100644 drivers/block/blktap/request.c
create mode 100644 drivers/block/blktap/ring.c
create mode 100644 drivers/block/blktap/sysfs.c
delete mode 100644 drivers/xen/blktap/Makefile
delete mode 100644 drivers/xen/blktap/blktap.h
delete mode 100644 drivers/xen/blktap/control.c
delete mode 100644 drivers/xen/blktap/device.c
delete mode 100644 drivers/xen/blktap/request.c
delete mode 100644 drivers/xen/blktap/ring.c
delete mode 100644 drivers/xen/blktap/sysfs.c
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index bea8ae7..c4a55a3 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -471,4 +471,13 @@ config BLK_DEV_HD
If unsure, say N.
+config BLK_DEV_TAP
+ tristate "Blktap userspace devices"
+ help
+ The block tap driver allows block device requests to be
+ redirected to processes, through a device interface.
+ Doing so allows user-space development of high-performance
+ block storage backends, where disk images may be implemented
+ as files, in memory, or on other hosts across the network.
+
endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 7755a5e..8389917 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
+obj-$(CONFIG_BLK_DEV_TAP) += blktap/
obj-$(CONFIG_VIODASD) += viodasd.o
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
diff --git a/drivers/block/blktap/Makefile b/drivers/block/blktap/Makefile
new file mode 100644
index 0000000..923a7c5
--- /dev/null
+++ b/drivers/block/blktap/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_BLK_DEV_TAP) := blktap.o
+
+blktap-objs := control.o ring.o device.o request.o sysfs.o
diff --git a/drivers/block/blktap/blktap.h b/drivers/block/blktap/blktap.h
new file mode 100644
index 0000000..1318cad
--- /dev/null
+++ b/drivers/block/blktap/blktap.h
@@ -0,0 +1,161 @@
+#ifndef _BLKTAP_H_
+#define _BLKTAP_H_
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/init.h>
+#include <linux/scatterlist.h>
+#include <linux/blktap.h>
+
+extern int blktap_debug_level;
+extern int blktap_ring_major;
+extern int blktap_device_major;
+
+#define BTPRINTK(level, tag, force, _f, _a...) \
+ do { \
+ if (blktap_debug_level > level && \
+ (force || printk_ratelimit())) \
+ printk(tag "%s: " _f, __func__, ##_a); \
+ } while (0)
+
+#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a)
+#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a)
+#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
+#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
+
+#define MAX_BLKTAP_DEVICE 1024
+
+#define BLKTAP_DEVICE 4
+#define BLKTAP_DEVICE_CLOSED 5
+#define BLKTAP_SHUTDOWN_REQUESTED 8
+
+#define BLKTAP_REQUEST_FREE 0
+#define BLKTAP_REQUEST_PENDING 1
+
+struct blktap_device {
+ spinlock_t lock;
+ struct gendisk *gd;
+};
+
+struct blktap_request;
+
+struct blktap_ring {
+ struct task_struct *task;
+
+ struct vm_area_struct *vma;
+ blktap_front_ring_t ring;
+ unsigned long ring_vstart;
+ unsigned long user_vstart;
+
+ int n_pending;
+ struct blktap_request *pending[BLKTAP_RING_SIZE];
+
+ wait_queue_head_t poll_wait;
+
+ dev_t devno;
+ struct device *dev;
+};
+
+struct blktap_statistics {
+ unsigned long st_print;
+ int st_rd_req;
+ int st_wr_req;
+ int st_oo_req;
+ int st_rd_sect;
+ int st_wr_sect;
+ s64 st_rd_cnt;
+ s64 st_rd_sum_usecs;
+ s64 st_rd_max_usecs;
+ s64 st_wr_cnt;
+ s64 st_wr_sum_usecs;
+ s64 st_wr_max_usecs;
+};
+
+struct blktap_request {
+ struct blktap *tap;
+ struct request *rq;
+ int usr_idx;
+
+ int operation;
+ struct timeval time;
+
+ struct scatterlist sg_table[BLKTAP_SEGMENT_MAX];
+ struct page *pages[BLKTAP_SEGMENT_MAX];
+ int nr_pages;
+};
+
+#define blktap_for_each_sg(_sg, _req, _i) \
+ for (_sg = (_req)->sg_table, _i = 0; \
+ _i < (_req)->nr_pages; \
+ (_sg)++, (_i)++)
+
+struct blktap {
+ int minor;
+ unsigned long dev_inuse;
+
+ struct blktap_ring ring;
+ struct blktap_device device;
+ struct blktap_page_pool *pool;
+
+ wait_queue_head_t remove_wait;
+ struct work_struct remove_work;
+ char name[BLKTAP_NAME_MAX];
+
+ struct blktap_statistics stats;
+};
+
+struct blktap_page_pool {
+ struct mempool_s *bufs;
+ spinlock_t lock;
+ struct kobject kobj;
+ wait_queue_head_t wait;
+};
+
+extern struct mutex blktap_lock;
+extern struct blktap **blktaps;
+extern int blktap_max_minor;
+
+int blktap_control_destroy_tap(struct blktap *);
+size_t blktap_control_debug(struct blktap *, char *, size_t);
+
+int blktap_ring_init(void);
+void blktap_ring_exit(void);
+size_t blktap_ring_debug(struct blktap *, char *, size_t);
+int blktap_ring_create(struct blktap *);
+int blktap_ring_destroy(struct blktap *);
+struct blktap_request *blktap_ring_make_request(struct blktap *);
+void blktap_ring_free_request(struct blktap *,struct blktap_request *);
+void blktap_ring_submit_request(struct blktap *, struct blktap_request *);
+int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *,
int);
+int blktap_ring_map_request(struct blktap *, struct blktap_request *);
+void blktap_ring_unmap_request(struct blktap *, struct blktap_request *);
+void blktap_ring_set_message(struct blktap *, int);
+void blktap_ring_kick_user(struct blktap *);
+
+int blktap_sysfs_init(void);
+void blktap_sysfs_exit(void);
+int blktap_sysfs_create(struct blktap *);
+void blktap_sysfs_destroy(struct blktap *);
+
+int blktap_device_init(void);
+void blktap_device_exit(void);
+size_t blktap_device_debug(struct blktap *, char *, size_t);
+int blktap_device_create(struct blktap *, struct blktap_device_info *);
+int blktap_device_destroy(struct blktap *);
+void blktap_device_destroy_sync(struct blktap *);
+void blktap_device_run_queue(struct blktap *);
+void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
+
+int blktap_page_pool_init(struct kobject *);
+void blktap_page_pool_exit(void);
+struct blktap_page_pool *blktap_page_pool_get(const char *);
+
+size_t blktap_request_debug(struct blktap *, char *, size_t);
+struct blktap_request *blktap_request_alloc(struct blktap *);
+int blktap_request_get_pages(struct blktap *, struct blktap_request *, int);
+void blktap_request_free(struct blktap *, struct blktap_request *);
+void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
+
+
+#endif
diff --git a/drivers/block/blktap/control.c b/drivers/block/blktap/control.c
new file mode 100644
index 0000000..57b1a10
--- /dev/null
+++ b/drivers/block/blktap/control.c
@@ -0,0 +1,315 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/miscdevice.h>
+#include <linux/device.h>
+#include <asm/uaccess.h>
+
+#include "blktap.h"
+
+DEFINE_MUTEX(blktap_lock);
+
+struct blktap **blktaps;
+int blktap_max_minor;
+static struct blktap_page_pool *default_pool;
+
+static struct blktap *
+blktap_control_get_minor(void)
+{
+ int minor;
+ struct blktap *tap;
+
+ tap = kzalloc(sizeof(*tap), GFP_KERNEL);
+ if (unlikely(!tap))
+ return NULL;
+
+ mutex_lock(&blktap_lock);
+
+ for (minor = 0; minor < blktap_max_minor; minor++)
+ if (!blktaps[minor])
+ break;
+
+ if (minor == MAX_BLKTAP_DEVICE)
+ goto fail;
+
+ if (minor == blktap_max_minor) {
+ void *p;
+ int n;
+
+ n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE);
+ p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL);
+ if (!p)
+ goto fail;
+
+ blktaps = p;
+ minor = blktap_max_minor;
+ blktap_max_minor = n;
+
+ memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0]));
+ }
+
+ tap->minor = minor;
+ blktaps[minor] = tap;
+
+ __module_get(THIS_MODULE);
+out:
+ mutex_unlock(&blktap_lock);
+ return tap;
+
+fail:
+ mutex_unlock(&blktap_lock);
+ kfree(tap);
+ tap = NULL;
+ goto out;
+}
+
+static void
+blktap_control_put_minor(struct blktap* tap)
+{
+ blktaps[tap->minor] = NULL;
+ kfree(tap);
+
+ module_put(THIS_MODULE);
+}
+
+static struct blktap*
+blktap_control_create_tap(void)
+{
+ struct blktap *tap;
+ int err;
+
+ tap = blktap_control_get_minor();
+ if (!tap)
+ return NULL;
+
+ kobject_get(&default_pool->kobj);
+ tap->pool = default_pool;
+
+ err = blktap_ring_create(tap);
+ if (err)
+ goto fail_tap;
+
+ err = blktap_sysfs_create(tap);
+ if (err)
+ goto fail_ring;
+
+ return tap;
+
+fail_ring:
+ blktap_ring_destroy(tap);
+fail_tap:
+ blktap_control_put_minor(tap);
+
+ return NULL;
+}
+
+int
+blktap_control_destroy_tap(struct blktap *tap)
+{
+ int err;
+
+ err = blktap_ring_destroy(tap);
+ if (err)
+ return err;
+
+ kobject_put(&tap->pool->kobj);
+
+ blktap_sysfs_destroy(tap);
+
+ blktap_control_put_minor(tap);
+
+ return 0;
+}
+
+static int
+blktap_control_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct blktap *tap;
+
+ switch (cmd) {
+ case BLKTAP_IOCTL_ALLOC_TAP: {
+ struct blktap_info info;
+ void __user *ptr = (void __user*)arg;
+
+ tap = blktap_control_create_tap();
+ if (!tap)
+ return -ENOMEM;
+
+ info.ring_major = blktap_ring_major;
+ info.bdev_major = blktap_device_major;
+ info.ring_minor = tap->minor;
+
+ if (copy_to_user(ptr, &info, sizeof(info))) {
+ blktap_control_destroy_tap(tap);
+ return -EFAULT;
+ }
+
+ return 0;
+ }
+
+ case BLKTAP_IOCTL_FREE_TAP: {
+ int minor = arg;
+
+ if (minor > MAX_BLKTAP_DEVICE)
+ return -EINVAL;
+
+ tap = blktaps[minor];
+ if (!tap)
+ return -ENODEV;
+
+ return blktap_control_destroy_tap(tap);
+ }
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+static struct file_operations blktap_control_file_operations = {
+ .owner = THIS_MODULE,
+ .ioctl = blktap_control_ioctl,
+};
+
+static struct miscdevice blktap_control = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "blktap-control",
+ .fops = &blktap_control_file_operations,
+};
+
+static struct device *control_device;
+
+static ssize_t
+blktap_control_show_default_pool(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%s", kobject_name(&default_pool->kobj));
+}
+
+static ssize_t
+blktap_control_store_default_pool(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct blktap_page_pool *pool, *tmp = default_pool;
+
+ pool = blktap_page_pool_get(buf);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ default_pool = pool;
+ kobject_put(&tmp->kobj);
+
+ return size;
+}
+
+static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
+ blktap_control_show_default_pool,
+ blktap_control_store_default_pool);
+
+size_t
+blktap_control_debug(struct blktap *tap, char *buf, size_t size)
+{
+ char *s = buf, *end = buf + size;
+
+ s += snprintf(s, end - s,
+ "tap %u:%u name:'%s' flags:%#08lx\n",
+ MAJOR(tap->ring.devno), MINOR(tap->ring.devno),
+ tap->name, tap->dev_inuse);
+
+ return s - buf;
+}
+
+static int __init
+blktap_control_init(void)
+{
+ int err;
+
+ err = misc_register(&blktap_control);
+ if (err)
+ return err;
+
+ control_device = blktap_control.this_device;
+
+ blktap_max_minor = min(64, MAX_BLKTAP_DEVICE);
+ blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL);
+ if (!blktaps) {
+ BTERR("failed to allocate blktap minor map");
+ return -ENOMEM;
+ }
+
+ err = blktap_page_pool_init(&control_device->kobj);
+ if (err)
+ return err;
+
+ default_pool = blktap_page_pool_get("default");
+ if (!default_pool)
+ return -ENOMEM;
+
+ err = device_create_file(control_device, &dev_attr_default_pool);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void
+blktap_control_exit(void)
+{
+ if (default_pool) {
+ kobject_put(&default_pool->kobj);
+ default_pool = NULL;
+ }
+
+ blktap_page_pool_exit();
+
+ if (blktaps) {
+ kfree(blktaps);
+ blktaps = NULL;
+ }
+
+ if (control_device) {
+ misc_deregister(&blktap_control);
+ control_device = NULL;
+ }
+}
+
+static void
+blktap_exit(void)
+{
+ blktap_control_exit();
+ blktap_ring_exit();
+ blktap_sysfs_exit();
+ blktap_device_exit();
+}
+
+static int __init
+blktap_init(void)
+{
+ int err;
+
+ err = blktap_device_init();
+ if (err)
+ goto fail;
+
+ err = blktap_ring_init();
+ if (err)
+ goto fail;
+
+ err = blktap_sysfs_init();
+ if (err)
+ goto fail;
+
+ err = blktap_control_init();
+ if (err)
+ goto fail;
+
+ return 0;
+
+fail:
+ blktap_exit();
+ return err;
+}
+
+module_init(blktap_init);
+module_exit(blktap_exit);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c
new file mode 100644
index 0000000..9a09457
--- /dev/null
+++ b/drivers/block/blktap/device.c
@@ -0,0 +1,551 @@
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/cdrom.h>
+#include <linux/hdreg.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+
+#include "blktap.h"
+
+int blktap_device_major;
+
+#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
+
+static int
+blktap_device_open(struct block_device *bdev, fmode_t mode)
+{
+ struct gendisk *disk = bdev->bd_disk;
+ struct blktap_device *tapdev = disk->private_data;
+
+ if (!tapdev)
+ return -ENXIO;
+
+ /* NB. we might have bounced a bd trylock by tapdisk. when
+ * failing for reasons not !tapdev, make sure to kick tapdisk
+ * out of destroy wait state again. */
+
+ return 0;
+}
+
+static int
+blktap_device_release(struct gendisk *disk, fmode_t mode)
+{
+ struct blktap_device *tapdev = disk->private_data;
+ struct block_device *bdev = bdget_disk(disk, 0);
+ struct blktap *tap = dev_to_blktap(tapdev);
+
+ bdput(bdev);
+
+ if (!bdev->bd_openers) {
+ set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse);
+ blktap_ring_kick_user(tap);
+ }
+
+ return 0;
+}
+
+static int
+blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
+{
+ /* We don't have real geometry info, but let's at least return
+ values consistent with the size of the device */
+ sector_t nsect = get_capacity(bd->bd_disk);
+ sector_t cylinders = nsect;
+
+ hg->heads = 0xff;
+ hg->sectors = 0x3f;
+ sector_div(cylinders, hg->heads * hg->sectors);
+ hg->cylinders = cylinders;
+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
+ hg->cylinders = 0xffff;
+ return 0;
+}
+
+static int
+blktap_device_ioctl(struct block_device *bd, fmode_t mode,
+ unsigned command, unsigned long argument)
+{
+ int i;
+
+ switch (command) {
+ case CDROMMULTISESSION:
+ BTDBG("FIXME: support multisession CDs later\n");
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+ if (put_user(0, (char __user *)(argument + i)))
+ return -EFAULT;
+ return 0;
+
+ case SCSI_IOCTL_GET_IDLUN:
+ if (!access_ok(VERIFY_WRITE, argument,
+ sizeof(struct scsi_idlun)))
+ return -EFAULT;
+
+ /* return 0 for now. */
+ __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
+ __put_user(0,
+ &((struct scsi_idlun __user
*)argument)->host_unique_id);
+ return 0;
+
+ default:
+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+ command);*/
+ return -EINVAL; /* same return as native Linux */
+ }
+
+ return 0;
+}
+
+static struct block_device_operations blktap_device_file_operations = {
+ .owner = THIS_MODULE,
+ .open = blktap_device_open,
+ .release = blktap_device_release,
+ .ioctl = blktap_device_ioctl,
+ .getgeo = blktap_device_getgeo
+};
+
+/* NB. __blktap holding the queue lock; blktap where unlocked */
+
+static inline struct request*
+__blktap_next_queued_rq(struct request_queue *q)
+{
+ return blk_peek_request(q);
+}
+
+static inline void
+__blktap_dequeue_rq(struct request *rq)
+{
+ blk_start_request(rq);
+}
+
+/* NB. err == 0 indicates success, failures < 0 */
+
+static inline void
+__blktap_end_queued_rq(struct request *rq, int err)
+{
+ blk_start_request(rq);
+ __blk_end_request(rq, err, blk_rq_bytes(rq));
+}
+
+static inline void
+__blktap_end_rq(struct request *rq, int err)
+{
+ __blk_end_request(rq, err, blk_rq_bytes(rq));
+}
+
+static inline void
+blktap_end_rq(struct request *rq, int err)
+{
+ spin_lock_irq(rq->q->queue_lock);
+ __blktap_end_rq(rq, err);
+ spin_unlock_irq(rq->q->queue_lock);
+}
+
+void
+blktap_device_end_request(struct blktap *tap,
+ struct blktap_request *request,
+ int error)
+{
+ struct blktap_device *tapdev = &tap->device;
+ struct request *rq = request->rq;
+
+ blktap_ring_unmap_request(tap, request);
+
+ blktap_ring_free_request(tap, request);
+
+ dev_dbg(disk_to_dev(tapdev->gd),
+ "end_request: op=%d error=%d bytes=%d\n",
+ rq_data_dir(rq), error, blk_rq_bytes(rq));
+
+ blktap_end_rq(rq, error);
+}
+
+int
+blktap_device_make_request(struct blktap *tap, struct request *rq)
+{
+ struct blktap_device *tapdev = &tap->device;
+ struct blktap_request *request;
+ int write, nsegs;
+ int err;
+
+ request = blktap_ring_make_request(tap);
+ if (IS_ERR(request)) {
+ err = PTR_ERR(request);
+ request = NULL;
+
+ if (err == -ENOSPC || err == -ENOMEM)
+ goto stop;
+
+ goto fail;
+ }
+
+ write = rq_data_dir(rq) == WRITE;
+ nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
+
+ dev_dbg(disk_to_dev(tapdev->gd),
+ "make_request: op=%c bytes=%d nsegs=%d\n",
+ write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
+
+ request->rq = rq;
+ request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ;
+
+ err = blktap_request_get_pages(tap, request, nsegs);
+ if (err)
+ goto stop;
+
+ err = blktap_ring_map_request(tap, request);
+ if (err)
+ goto fail;
+
+ blktap_ring_submit_request(tap, request);
+
+ return 0;
+
+stop:
+ tap->stats.st_oo_req++;
+ err = -EBUSY;
+
+_out:
+ if (request)
+ blktap_ring_free_request(tap, request);
+
+ return err;
+fail:
+ if (printk_ratelimit())
+ dev_warn(disk_to_dev(tapdev->gd),
+ "make request: %d, failing\n", err);
+ goto _out;
+}
+
+/*
+ * called from tapdisk context
+ */
+void
+blktap_device_run_queue(struct blktap *tap)
+{
+ struct blktap_device *tapdev = &tap->device;
+ struct request_queue *q;
+ struct request *rq;
+ int err;
+
+ if (!tapdev->gd)
+ return;
+
+ q = tapdev->gd->queue;
+
+ spin_lock_irq(&tapdev->lock);
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+
+ do {
+ rq = __blktap_next_queued_rq(q);
+ if (!rq)
+ break;
+
+ if (!blk_fs_request(rq)) {
+ __blktap_end_queued_rq(rq, -EOPNOTSUPP);
+ continue;
+ }
+
+ spin_unlock_irq(&tapdev->lock);
+
+ err = blktap_device_make_request(tap, rq);
+
+ spin_lock_irq(&tapdev->lock);
+
+ if (err == -EBUSY) {
+ blk_stop_queue(q);
+ break;
+ }
+
+ __blktap_dequeue_rq(rq);
+
+ if (unlikely(err))
+ __blktap_end_rq(rq, err);
+ } while (1);
+
+ spin_unlock_irq(&tapdev->lock);
+}
+
+static void
+blktap_device_do_request(struct request_queue *rq)
+{
+ struct blktap_device *tapdev = rq->queuedata;
+ struct blktap *tap = dev_to_blktap(tapdev);
+
+ blktap_ring_kick_user(tap);
+}
+
+static void
+blktap_device_configure(struct blktap *tap,
+ struct blktap_device_info *info)
+{
+ struct blktap_device *tapdev = &tap->device;
+ struct gendisk *gd = tapdev->gd;
+ struct request_queue *rq = gd->queue;
+
+ set_capacity(gd, info->capacity);
+ set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO));
+
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
+ blk_queue_logical_block_size(rq, info->sector_size);
+ blk_queue_max_sectors(rq, 512);
+
+ /* Each segment in a request is up to an aligned page in size. */
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+ /* Ensure a merged request will fit in a single I/O ring slot. */
+ blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX);
+ blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX);
+
+ /* Make sure buffer addresses are sector-aligned. */
+ blk_queue_dma_alignment(rq, 511);
+
+ /* We are reordering, but cacheless. */
+ blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL);
+}
+
+static int
+blktap_device_validate_info(struct blktap *tap,
+ struct blktap_device_info *info)
+{
+ struct device *dev = tap->ring.dev;
+ int sector_order;
+
+ sector_order = ffs(info->sector_size) - 1;
+ if (sector_order < 9 ||
+ sector_order > 12 ||
+ info->sector_size != 1U<<sector_order)
+ goto fail;
+
+ if (!info->capacity ||
+ (info->capacity > ULLONG_MAX >> sector_order))
+ goto fail;
+
+ return 0;
+
+fail:
+ dev_err(dev, "capacity: %llu, sector-size: %u\n",
+ info->capacity, info->sector_size);
+ return -EINVAL;
+}
+
+int
+blktap_device_destroy(struct blktap *tap)
+{
+ struct blktap_device *tapdev = &tap->device;
+ struct block_device *bdev;
+ struct gendisk *gd;
+ int err;
+
+ gd = tapdev->gd;
+ if (!gd)
+ return 0;
+
+ bdev = bdget_disk(gd, 0);
+
+ err = !mutex_trylock(&bdev->bd_mutex);
+ if (err) {
+ /* NB. avoid a deadlock. the last opener syncs the
+ * bdev holding bd_mutex. */
+ err = -EBUSY;
+ goto out_nolock;
+ }
+
+ if (bdev->bd_openers) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ del_gendisk(gd);
+ gd->private_data = NULL;
+
+ blk_cleanup_queue(gd->queue);
+
+ put_disk(gd);
+ tapdev->gd = NULL;
+
+ clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
+ err = 0;
+out:
+ mutex_unlock(&bdev->bd_mutex);
+out_nolock:
+ bdput(bdev);
+
+ return err;
+}
+
+static void
+blktap_device_fail_queue(struct blktap *tap)
+{
+ struct blktap_device *tapdev = &tap->device;
+ struct request_queue *q = tapdev->gd->queue;
+
+ spin_lock_irq(&tapdev->lock);
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+
+ do {
+ struct request *rq = __blktap_next_queued_rq(q);
+ if (!rq)
+ break;
+
+ __blktap_end_queued_rq(rq, -EIO);
+ } while (1);
+
+ spin_unlock_irq(&tapdev->lock);
+}
+
+static int
+blktap_device_try_destroy(struct blktap *tap)
+{
+ int err;
+
+ err = blktap_device_destroy(tap);
+ if (err)
+ blktap_device_fail_queue(tap);
+
+ return err;
+}
+
+void
+blktap_device_destroy_sync(struct blktap *tap)
+{
+ wait_event(tap->ring.poll_wait,
+ !blktap_device_try_destroy(tap));
+}
+
+int
+blktap_device_create(struct blktap *tap, struct blktap_device_info *info)
+{
+ int minor, err;
+ struct gendisk *gd;
+ struct request_queue *rq;
+ struct blktap_device *tapdev;
+
+ gd = NULL;
+ rq = NULL;
+ tapdev = &tap->device;
+ minor = tap->minor;
+
+ if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
+ return -EEXIST;
+
+ if (blktap_device_validate_info(tap, info))
+ return -EINVAL;
+
+ gd = alloc_disk(1);
+ if (!gd) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ if (minor < 26) {
+ sprintf(gd->disk_name, "td%c", 'a' + minor % 26);
+ } else if (minor < (26 + 1) * 26) {
+ sprintf(gd->disk_name, "td%c%c",
+ 'a' + minor / 26 - 1,'a' + minor % 26);
+ } else {
+ const unsigned int m1 = (minor / 26 - 1) / 26 - 1;
+ const unsigned int m2 = (minor / 26 - 1) % 26;
+ const unsigned int m3 = minor % 26;
+ sprintf(gd->disk_name, "td%c%c%c",
+ 'a' + m1, 'a' + m2, 'a' + m3);
+ }
+
+ gd->major = blktap_device_major;
+ gd->first_minor = minor;
+ gd->fops = &blktap_device_file_operations;
+ gd->private_data = tapdev;
+
+ spin_lock_init(&tapdev->lock);
+ rq = blk_init_queue(blktap_device_do_request, &tapdev->lock);
+ if (!rq) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ elevator_init(rq, "noop");
+
+ gd->queue = rq;
+ rq->queuedata = tapdev;
+ tapdev->gd = gd;
+
+ blktap_device_configure(tap, info);
+ add_disk(gd);
+
+ set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
+
+ dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n",
+ queue_logical_block_size(rq),
+ queue_physical_block_size(rq),
+ (unsigned long long)get_capacity(gd));
+
+ return 0;
+
+fail:
+ if (gd)
+ del_gendisk(gd);
+ if (rq)
+ blk_cleanup_queue(rq);
+
+ return err;
+}
+
+size_t
+blktap_device_debug(struct blktap *tap, char *buf, size_t size)
+{
+ struct gendisk *disk = tap->device.gd;
+ struct request_queue *q;
+ struct block_device *bdev;
+ char *s = buf, *end = buf + size;
+
+ if (!disk)
+ return 0;
+
+ q = disk->queue;
+
+ s += snprintf(s, end - s,
+ "disk capacity:%llu sector size:%u\n",
+ (unsigned long long)get_capacity(disk),
+ queue_logical_block_size(q));
+
+ s += snprintf(s, end - s,
+ "queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
+ q->queue_flags,
+ blk_queue_plugged(q), blk_queue_stopped(q),
+ elv_queue_empty(q));
+
+ bdev = bdget_disk(disk, 0);
+ if (bdev) {
+ s += snprintf(s, end - s,
+ "bdev openers:%d closed:%d\n",
+ bdev->bd_openers,
+ test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse));
+ bdput(bdev);
+ }
+
+ return s - buf;
+}
+
+int __init
+blktap_device_init()
+{
+ int major;
+
+ /* Dynamically allocate a major for this device */
+ major = register_blkdev(0, "tapdev");
+ if (major < 0) {
+ BTERR("Couldn't register blktap device\n");
+ return -ENOMEM;
+ }
+
+ blktap_device_major = major;
+ BTINFO("blktap device major %d\n", major);
+
+ return 0;
+}
+
+void
+blktap_device_exit(void)
+{
+ if (blktap_device_major)
+ unregister_blkdev(blktap_device_major, "tapdev");
+}
diff --git a/drivers/block/blktap/request.c b/drivers/block/blktap/request.c
new file mode 100644
index 0000000..8cfd6c9
--- /dev/null
+++ b/drivers/block/blktap/request.c
@@ -0,0 +1,418 @@
+#include <linux/mempool.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+
+#include "blktap.h"
+
+/* max pages per shared pool. just to prevent accidental dos. */
+#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX)
+
+/* default page pool size. when considering to shrink a shared pool,
+ * note that paused tapdisks may grab a whole lot of pages for a long
+ * time. */
+#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX)
+
+/* max number of pages allocatable per request. */
+#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX
+
+/* min request structs per pool. These grow dynamically. */
+#define POOL_MIN_REQS BLKTAP_RING_SIZE
+
+static struct kset *pool_set;
+
+#define kobj_to_pool(_kobj) \
+ container_of(_kobj, struct blktap_page_pool, kobj)
+
+static struct kmem_cache *request_cache;
+static mempool_t *request_pool;
+
+static void
+__page_pool_wake(struct blktap_page_pool *pool)
+{
+ mempool_t *mem = pool->bufs;
+
+ /*
+ NB. slightly wasteful to always wait for a full segment
+ set. but this ensures the next disk makes
+ progress. presently, the repeated request struct
+ alloc/release cycles would otherwise keep everyone spinning.
+ */
+
+ if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
+ wake_up(&pool->wait);
+}
+
+int
+blktap_request_get_pages(struct blktap *tap,
+ struct blktap_request *request, int nr_pages)
+{
+ struct blktap_page_pool *pool = tap->pool;
+ mempool_t *mem = pool->bufs;
+ struct page *page;
+
+ BUG_ON(request->nr_pages != 0);
+ BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
+
+ if (mem->curr_nr < nr_pages)
+ return -ENOMEM;
+
+ /* NB. avoid thundering herds of tapdisks colliding. */
+ spin_lock(&pool->lock);
+
+ if (mem->curr_nr < nr_pages) {
+ spin_unlock(&pool->lock);
+ return -ENOMEM;
+ }
+
+ while (request->nr_pages < nr_pages) {
+ page = mempool_alloc(mem, GFP_NOWAIT);
+ BUG_ON(!page);
+ request->pages[request->nr_pages++] = page;
+ }
+
+ spin_unlock(&pool->lock);
+
+ return 0;
+}
+
+static void
+blktap_request_put_pages(struct blktap *tap,
+ struct blktap_request *request)
+{
+ struct blktap_page_pool *pool = tap->pool;
+ struct page *page;
+
+ while (request->nr_pages) {
+ page = request->pages[--request->nr_pages];
+ mempool_free(page, pool->bufs);
+ }
+}
+
+size_t
+blktap_request_debug(struct blktap *tap, char *buf, size_t size)
+{
+ struct blktap_page_pool *pool = tap->pool;
+ mempool_t *mem = pool->bufs;
+ char *s = buf, *end = buf + size;
+
+ s += snprintf(buf, end - s,
+ "pool:%s pages:%d free:%d\n",
+ kobject_name(&pool->kobj),
+ mem->min_nr, mem->curr_nr);
+
+ return s - buf;
+}
+
+struct blktap_request*
+blktap_request_alloc(struct blktap *tap)
+{
+ struct blktap_request *request;
+
+ request = mempool_alloc(request_pool, GFP_NOWAIT);
+ if (request)
+ request->tap = tap;
+
+ return request;
+}
+
+void
+blktap_request_free(struct blktap *tap,
+ struct blktap_request *request)
+{
+ blktap_request_put_pages(tap, request);
+
+ mempool_free(request, request_pool);
+
+ __page_pool_wake(tap->pool);
+}
+
+void
+blktap_request_bounce(struct blktap *tap,
+ struct blktap_request *request,
+ int seg, int write)
+{
+ struct scatterlist *sg = &request->sg_table[seg];
+ void *s, *p;
+
+ BUG_ON(seg >= request->nr_pages);
+
+ s = sg_virt(sg);
+ p = page_address(request->pages[seg]) + sg->offset;
+
+ if (write)
+ memcpy(p, s, sg->length);
+ else
+ memcpy(s, p, sg->length);
+}
+
+static void
+blktap_request_ctor(void *obj)
+{
+ struct blktap_request *request = obj;
+
+ memset(request, 0, sizeof(*request));
+ sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
+}
+
+static int
+blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
+{
+ mempool_t *bufs = pool->bufs;
+ int err;
+
+ /* NB. mempool asserts min_nr >= 1 */
+ target = max(1, target);
+
+ err = mempool_resize(bufs, target, GFP_KERNEL);
+ if (err)
+ return err;
+
+ __page_pool_wake(pool);
+
+ return 0;
+}
+
+struct pool_attribute {
+ struct attribute attr;
+
+ ssize_t (*show)(struct blktap_page_pool *pool,
+ char *buf);
+
+ ssize_t (*store)(struct blktap_page_pool *pool,
+ const char *buf, size_t count);
+};
+
+#define kattr_to_pool_attr(_kattr) \
+ container_of(_kattr, struct pool_attribute, attr)
+
+static ssize_t
+blktap_page_pool_show_size(struct blktap_page_pool *pool,
+ char *buf)
+{
+ mempool_t *mem = pool->bufs;
+ return sprintf(buf, "%d", mem->min_nr);
+}
+
+static ssize_t
+blktap_page_pool_store_size(struct blktap_page_pool *pool,
+ const char *buf, size_t size)
+{
+ int target;
+
+ /*
+ * NB. target fixup to avoid undesired results. less than a
+ * full segment set can wedge the disk. much more than a
+ * couple times the physical queue depth is rarely useful.
+ */
+
+ target = simple_strtoul(buf, NULL, 0);
+ target = max(POOL_MAX_REQUEST_PAGES, target);
+ target = min(target, POOL_MAX_PAGES);
+
+ return blktap_page_pool_resize(pool, target) ? : size;
+}
+
+static struct pool_attribute blktap_page_pool_attr_size =
+ __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
+ blktap_page_pool_show_size,
+ blktap_page_pool_store_size);
+
+static ssize_t
+blktap_page_pool_show_free(struct blktap_page_pool *pool,
+ char *buf)
+{
+ mempool_t *mem = pool->bufs;
+ return sprintf(buf, "%d", mem->curr_nr);
+}
+
+static struct pool_attribute blktap_page_pool_attr_free =
+ __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
+ blktap_page_pool_show_free,
+ NULL);
+
+static struct attribute *blktap_page_pool_attrs[] = {
+ &blktap_page_pool_attr_size.attr,
+ &blktap_page_pool_attr_free.attr,
+ NULL,
+};
+
+static inline struct kobject*
+__blktap_kset_find_obj(struct kset *kset, const char *name)
+{
+ struct kobject *k;
+ struct kobject *ret = NULL;
+
+ spin_lock(&kset->list_lock);
+ list_for_each_entry(k, &kset->list, entry) {
+ if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
+ ret = kobject_get(k);
+ break;
+ }
+ }
+ spin_unlock(&kset->list_lock);
+ return ret;
+}
+
+static ssize_t
+blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
+ char *buf)
+{
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
+
+ if (attr->show)
+ return attr->show(pool, buf);
+
+ return -EIO;
+}
+
+static ssize_t
+blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
+ const char *buf, size_t size)
+{
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
+
+ if (attr->show)
+ return attr->store(pool, buf, size);
+
+ return -EIO;
+}
+
+static struct sysfs_ops blktap_page_pool_sysfs_ops = {
+ .show = blktap_page_pool_show_attr,
+ .store = blktap_page_pool_store_attr,
+};
+
+static void
+blktap_page_pool_release(struct kobject *kobj)
+{
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
+ mempool_destroy(pool->bufs);
+ kfree(pool);
+}
+
+struct kobj_type blktap_page_pool_ktype = {
+ .release = blktap_page_pool_release,
+ .sysfs_ops = &blktap_page_pool_sysfs_ops,
+ .default_attrs = blktap_page_pool_attrs,
+};
+
+static void*
+__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ struct page *page;
+
+ if (!(gfp_mask & __GFP_WAIT))
+ return NULL;
+
+ page = alloc_page(gfp_mask);
+ if (page)
+ SetPageReserved(page);
+
+ return page;
+}
+
+static void
+__mempool_page_free(void *element, void *pool_data)
+{
+ struct page *page = element;
+
+ ClearPageReserved(page);
+ put_page(page);
+}
+
+static struct kobject*
+blktap_page_pool_create(const char *name, int nr_pages)
+{
+ struct blktap_page_pool *pool;
+ int err;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ goto fail;
+
+ spin_lock_init(&pool->lock);
+ init_waitqueue_head(&pool->wait);
+
+ pool->bufs = mempool_create(nr_pages,
+ __mempool_page_alloc, __mempool_page_free,
+ pool);
+ if (!pool->bufs)
+ goto fail_pool;
+
+ kobject_init(&pool->kobj, &blktap_page_pool_ktype);
+ pool->kobj.kset = pool_set;
+ err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
+ if (err)
+ goto fail_bufs;
+
+ return &pool->kobj;
+
+ kobject_del(&pool->kobj);
+fail_bufs:
+ mempool_destroy(pool->bufs);
+fail_pool:
+ kfree(pool);
+fail:
+ return NULL;
+}
+
+struct blktap_page_pool*
+blktap_page_pool_get(const char *name)
+{
+ struct kobject *kobj;
+
+ kobj = __blktap_kset_find_obj(pool_set, name);
+ if (!kobj)
+ kobj = blktap_page_pool_create(name,
+ POOL_DEFAULT_PAGES);
+ if (!kobj)
+ return ERR_PTR(-ENOMEM);
+
+ return kobj_to_pool(kobj);
+}
+
+int __init
+blktap_page_pool_init(struct kobject *parent)
+{
+ request_cache =
+ kmem_cache_create("blktap-request",
+ sizeof(struct blktap_request), 0,
+ 0, blktap_request_ctor);
+ if (!request_cache)
+ return -ENOMEM;
+
+ request_pool =
+ mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
+ if (!request_pool)
+ return -ENOMEM;
+
+ pool_set = kset_create_and_add("pools", NULL, parent);
+ if (!pool_set)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void
+blktap_page_pool_exit(void)
+{
+ if (pool_set) {
+ BUG_ON(!list_empty(&pool_set->list));
+ kset_unregister(pool_set);
+ pool_set = NULL;
+ }
+
+ if (request_pool) {
+ mempool_destroy(request_pool);
+ request_pool = NULL;
+ }
+
+ if (request_cache) {
+ kmem_cache_destroy(request_cache);
+ request_cache = NULL;
+ }
+}
diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c
new file mode 100644
index 0000000..635f1fd
--- /dev/null
+++ b/drivers/block/blktap/ring.c
@@ -0,0 +1,595 @@
+
+#include <linux/device.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/blkdev.h>
+
+#include "blktap.h"
+
+int blktap_ring_major;
+static struct cdev blktap_ring_cdev;
+
+ /*
+ * BLKTAP - immediately before the mmap area,
+ * we have a bunch of pages reserved for shared memory rings.
+ */
+#define RING_PAGES 1
+
+#define BLKTAP_INFO_SIZE_AT(_memb) \
+ offsetof(struct blktap_device_info, _memb) + \
+ sizeof(((struct blktap_device_info*)0)->_memb)
+
+static void
+blktap_ring_read_response(struct blktap *tap,
+ const blktap_ring_rsp_t *rsp)
+{
+ struct blktap_ring *ring = &tap->ring;
+ struct blktap_request *request;
+ int usr_idx, err;
+
+ request = NULL;
+
+ usr_idx = rsp->id;
+ if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) {
+ err = -ERANGE;
+ goto invalid;
+ }
+
+ request = ring->pending[usr_idx];
+
+ if (!request) {
+ err = -ESRCH;
+ goto invalid;
+ }
+
+ if (rsp->operation != request->operation) {
+ err = -EINVAL;
+ goto invalid;
+ }
+
+ dev_dbg(ring->dev,
+ "request %d [%p] response: %d\n",
+ request->usr_idx, request, rsp->status);
+
+ err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO;
+end_request:
+ blktap_device_end_request(tap, request, err);
+ return;
+
+invalid:
+ dev_warn(ring->dev,
+ "invalid response, idx:%d status:%d op:%d/%d: err %d\n",
+ usr_idx, rsp->status,
+ rsp->operation, request->operation,
+ err);
+ if (request)
+ goto end_request;
+}
+
+static void
+blktap_read_ring(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+ blktap_ring_rsp_t rsp;
+ RING_IDX rc, rp;
+
+ down_read(¤t->mm->mmap_sem);
+ if (!ring->vma) {
+ up_read(¤t->mm->mmap_sem);
+ return;
+ }
+
+ /* for each outstanding message on the ring */
+ rp = ring->ring.sring->rsp_prod;
+ rmb();
+
+ for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
+ memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp));
+ blktap_ring_read_response(tap, &rsp);
+ }
+
+ ring->ring.rsp_cons = rc;
+
+ up_read(¤t->mm->mmap_sem);
+}
+
+#define MMAP_VADDR(_start, _req, _seg) \
+ ((_start) + \
+ ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \
+ ((_seg) * BLKTAP_PAGE_SIZE))
+
+static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+static void
+blktap_ring_fail_pending(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+ struct blktap_request *request;
+ int usr_idx;
+
+ for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) {
+ request = ring->pending[usr_idx];
+ if (!request)
+ continue;
+
+ blktap_device_end_request(tap, request, -EIO);
+ }
+}
+
+static void
+blktap_ring_vm_close(struct vm_area_struct *vma)
+{
+ struct blktap *tap = vma->vm_private_data;
+ struct blktap_ring *ring = &tap->ring;
+ struct page *page = virt_to_page(ring->ring.sring);
+
+ blktap_ring_fail_pending(tap);
+
+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
+ ClearPageReserved(page);
+ __free_page(page);
+
+ ring->vma = NULL;
+
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
+ blktap_control_destroy_tap(tap);
+}
+
+static struct vm_operations_struct blktap_ring_vm_operations = {
+ .close = blktap_ring_vm_close,
+ .fault = blktap_ring_fault,
+};
+
+int
+blktap_ring_map_segment(struct blktap *tap,
+ struct blktap_request *request,
+ int seg)
+{
+ struct blktap_ring *ring = &tap->ring;
+ unsigned long uaddr;
+
+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
+ return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
+}
+
+int
+blktap_ring_map_request(struct blktap *tap,
+ struct blktap_request *request)
+{
+ int seg, err = 0;
+ int write;
+
+ write = request->operation == BLKTAP_OP_WRITE;
+
+ for (seg = 0; seg < request->nr_pages; seg++) {
+ if (write)
+ blktap_request_bounce(tap, request, seg, write);
+
+ err = blktap_ring_map_segment(tap, request, seg);
+ if (err)
+ break;
+ }
+
+ if (err)
+ blktap_ring_unmap_request(tap, request);
+
+ return err;
+}
+
+void
+blktap_ring_unmap_request(struct blktap *tap,
+ struct blktap_request *request)
+{
+ struct blktap_ring *ring = &tap->ring;
+ unsigned long uaddr;
+ unsigned size;
+ int seg, read;
+
+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
+ size = request->nr_pages << PAGE_SHIFT;
+ read = request->operation == BLKTAP_OP_READ;
+
+ if (read)
+ for (seg = 0; seg < request->nr_pages; seg++)
+ blktap_request_bounce(tap, request, seg, !read);
+
+ zap_page_range(ring->vma, uaddr, size, NULL);
+}
+
+void
+blktap_ring_free_request(struct blktap *tap,
+ struct blktap_request *request)
+{
+ struct blktap_ring *ring = &tap->ring;
+
+ ring->pending[request->usr_idx] = NULL;
+ ring->n_pending--;
+
+ blktap_request_free(tap, request);
+}
+
+struct blktap_request*
+blktap_ring_make_request(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+ struct blktap_request *request;
+ int usr_idx;
+
+ if (RING_FULL(&ring->ring))
+ return ERR_PTR(-ENOSPC);
+
+ request = blktap_request_alloc(tap);
+ if (!request)
+ return ERR_PTR(-ENOMEM);
+
+ for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++)
+ if (!ring->pending[usr_idx])
+ break;
+
+ BUG_ON(usr_idx >= BLKTAP_RING_SIZE);
+
+ request->tap = tap;
+ request->usr_idx = usr_idx;
+
+ ring->pending[usr_idx] = request;
+ ring->n_pending++;
+
+ return request;
+}
+
+void
+blktap_ring_submit_request(struct blktap *tap,
+ struct blktap_request *request)
+{
+ struct blktap_ring *ring = &tap->ring;
+ blktap_ring_req_t *breq;
+ struct scatterlist *sg;
+ int i, nsecs = 0;
+
+ dev_dbg(ring->dev,
+ "request %d [%p] submit\n", request->usr_idx, request);
+
+ breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
+
+ breq->id = request->usr_idx;
+ breq->sector_number = blk_rq_pos(request->rq);
+ breq->__pad = 0;
+ breq->operation = request->operation;
+ breq->nr_segments = request->nr_pages;
+
+ blktap_for_each_sg(sg, request, i) {
+ struct blktap_segment *seg = &breq->seg[i];
+ int first, count;
+
+ count = sg->length >> 9;
+ first = sg->offset >> 9;
+
+ seg->first_sect = first;
+ seg->last_sect = first + count - 1;
+
+ nsecs += count;
+ }
+
+ ring->ring.req_prod_pvt++;
+
+ do_gettimeofday(&request->time);
+
+
+ if (request->operation == BLKTAP_OP_WRITE) {
+ tap->stats.st_wr_sect += nsecs;
+ tap->stats.st_wr_req++;
+ }
+
+ if (request->operation == BLKTAP_OP_READ) {
+ tap->stats.st_rd_sect += nsecs;
+ tap->stats.st_rd_req++;
+ }
+}
+
+static int
+blktap_ring_open(struct inode *inode, struct file *filp)
+{
+ struct blktap *tap = NULL;
+ int minor;
+
+ minor = iminor(inode);
+
+ if (minor < blktap_max_minor)
+ tap = blktaps[minor];
+
+ if (!tap)
+ return -ENXIO;
+
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
+ return -ENXIO;
+
+ if (tap->ring.task)
+ return -EBUSY;
+
+ filp->private_data = tap;
+ tap->ring.task = current;
+
+ return 0;
+}
+
+static int
+blktap_ring_release(struct inode *inode, struct file *filp)
+{
+ struct blktap *tap = filp->private_data;
+
+ blktap_device_destroy_sync(tap);
+
+ tap->ring.task = NULL;
+
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
+ blktap_control_destroy_tap(tap);
+
+ return 0;
+}
+
+static int
+blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct blktap *tap = filp->private_data;
+ struct blktap_ring *ring = &tap->ring;
+ blktap_sring_t *sring;
+ struct page *page = NULL;
+ int err;
+
+ if (ring->vma)
+ return -EBUSY;
+
+ page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ SetPageReserved(page);
+
+ err = vm_insert_page(vma, vma->vm_start, page);
+ if (err)
+ goto fail;
+
+ sring = page_address(page);
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
+
+ ring->ring_vstart = vma->vm_start;
+ ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
+
+ vma->vm_private_data = tap;
+
+ vma->vm_flags |= VM_DONTCOPY;
+ vma->vm_flags |= VM_RESERVED;
+
+ vma->vm_ops = &blktap_ring_vm_operations;
+
+ ring->vma = vma;
+ return 0;
+
+fail:
+ if (page) {
+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
+ ClearPageReserved(page);
+ __free_page(page);
+ }
+
+ return err;
+}
+
+static int
+blktap_ring_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct blktap *tap = filp->private_data;
+ struct blktap_ring *ring = &tap->ring;
+ void __user *ptr = (void *)arg;
+ int err;
+
+ BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
+
+ if (!ring->vma || ring->vma->vm_mm != current->mm)
+ return -EACCES;
+
+ switch(cmd) {
+ case BLKTAP_IOCTL_RESPOND:
+
+ blktap_read_ring(tap);
+ return 0;
+
+ case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: {
+ struct blktap_device_info info;
+ struct blktap2_params params;
+
+ if (copy_from_user(¶ms, ptr, sizeof(params)))
+ return -EFAULT;
+
+ info.capacity = params.capacity;
+ info.sector_size = params.sector_size;
+ info.flags = 0;
+
+ err = blktap_device_create(tap, &info);
+ if (err)
+ return err;
+
+ if (params.name[0]) {
+ strncpy(tap->name, params.name, sizeof(params.name));
+ tap->name[sizeof(tap->name)-1] = 0;
+ }
+
+ return 0;
+ }
+
+ case BLKTAP_IOCTL_CREATE_DEVICE: {
+ struct blktap_device_info __user *ptr = (void *)arg;
+ struct blktap_device_info info;
+ unsigned long mask;
+ size_t base_sz, sz;
+
+ mask = BLKTAP_DEVICE_FLAG_RO;
+
+ memset(&info, 0, sizeof(info));
+ sz = base_sz = BLKTAP_INFO_SIZE_AT(flags);
+
+ if (copy_from_user(&info, ptr, sz))
+ return -EFAULT;
+
+ if (sz > base_sz)
+ if (copy_from_user(&info, ptr, sz))
+ return -EFAULT;
+
+ if (put_user(info.flags & mask, &ptr->flags))
+ return -EFAULT;
+
+ return blktap_device_create(tap, &info);
+ }
+
+ case BLKTAP_IOCTL_REMOVE_DEVICE:
+
+ return blktap_device_destroy(tap);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
+{
+ struct blktap *tap = filp->private_data;
+ struct blktap_ring *ring = &tap->ring;
+ int work;
+
+ poll_wait(filp, &tap->pool->wait, wait);
+ poll_wait(filp, &ring->poll_wait, wait);
+
+ down_read(¤t->mm->mmap_sem);
+ if (ring->vma && tap->device.gd)
+ blktap_device_run_queue(tap);
+ up_read(¤t->mm->mmap_sem);
+
+ work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
+ RING_PUSH_REQUESTS(&ring->ring);
+
+ if (work ||
+ ring->ring.sring->private.tapif_user.msg ||
+ test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static struct file_operations blktap_ring_file_operations = {
+ .owner = THIS_MODULE,
+ .open = blktap_ring_open,
+ .release = blktap_ring_release,
+ .ioctl = blktap_ring_ioctl,
+ .mmap = blktap_ring_mmap,
+ .poll = blktap_ring_poll,
+};
+
+void
+blktap_ring_kick_user(struct blktap *tap)
+{
+ wake_up(&tap->ring.poll_wait);
+}
+
+int
+blktap_ring_destroy(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+
+ if (ring->task || ring->vma)
+ return -EBUSY;
+
+ return 0;
+}
+
+int
+blktap_ring_create(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+
+ init_waitqueue_head(&ring->poll_wait);
+ ring->devno = MKDEV(blktap_ring_major, tap->minor);
+
+ return 0;
+}
+
+size_t
+blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
+{
+ struct blktap_ring *ring = &tap->ring;
+ char *s = buf, *end = buf + size;
+ int usr_idx;
+
+ s += snprintf(s, end - s,
+ "begin pending:%d\n", ring->n_pending);
+
+ for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) {
+ struct blktap_request *request;
+ struct timeval *time;
+ int write;
+
+ request = ring->pending[usr_idx];
+ if (!request)
+ continue;
+
+ write = request->operation == BLKTAP_OP_WRITE;
+ time = &request->time;
+
+ s += snprintf(s, end - s,
+ "%02d: usr_idx:%02d "
+ "op:%c nr_pages:%02d time:%lu.%09lu\n",
+ usr_idx, request->usr_idx,
+ write ? 'W' : 'R', request->nr_pages,
+ time->tv_sec, time->tv_usec);
+ }
+
+ s += snprintf(s, end - s, "end pending\n");
+
+ return s - buf;
+}
+
+
+int __init
+blktap_ring_init(void)
+{
+ dev_t dev = 0;
+ int err;
+
+ cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations);
+ blktap_ring_cdev.owner = THIS_MODULE;
+
+ err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2");
+ if (err < 0) {
+ BTERR("error registering ring devices: %d\n", err);
+ return err;
+ }
+
+ err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE);
+ if (err) {
+ BTERR("error adding ring device: %d\n", err);
+ unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE);
+ return err;
+ }
+
+ blktap_ring_major = MAJOR(dev);
+ BTINFO("blktap ring major: %d\n", blktap_ring_major);
+
+ return 0;
+}
+
+void
+blktap_ring_exit(void)
+{
+ if (!blktap_ring_major)
+ return;
+
+ cdev_del(&blktap_ring_cdev);
+ unregister_chrdev_region(MKDEV(blktap_ring_major, 0),
+ MAX_BLKTAP_DEVICE);
+
+ blktap_ring_major = 0;
+}
diff --git a/drivers/block/blktap/sysfs.c b/drivers/block/blktap/sysfs.c
new file mode 100644
index 0000000..182de9a
--- /dev/null
+++ b/drivers/block/blktap/sysfs.c
@@ -0,0 +1,288 @@
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+
+#include "blktap.h"
+
+int blktap_debug_level = 1;
+
+static struct class *class;
+
+static ssize_t
+blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const
char *buf, size_t size)
+{
+ struct blktap *tap;
+
+ tap = dev_get_drvdata(dev);
+ if (!tap)
+ return 0;
+
+ if (size >= BLKTAP_NAME_MAX)
+ return -ENAMETOOLONG;
+
+ if (strnlen(buf, size) != size)
+ return -EINVAL;
+
+ strcpy(tap->name, buf);
+
+ return size;
+}
+
+static ssize_t
+blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char
*buf)
+{
+ struct blktap *tap;
+ ssize_t size;
+
+ tap = dev_get_drvdata(dev);
+ if (!tap)
+ return 0;
+
+ if (tap->name[0])
+ size = sprintf(buf, "%s\n", tap->name);
+ else
+ size = sprintf(buf, "%d\n", tap->minor);
+
+ return size;
+}
+static DEVICE_ATTR(name, S_IRUGO|S_IWUSR,
+ blktap_sysfs_get_name, blktap_sysfs_set_name);
+
+static void
+blktap_sysfs_remove_work(struct work_struct *work)
+{
+ struct blktap *tap
+ = container_of(work, struct blktap, remove_work);
+ blktap_control_destroy_tap(tap);
+}
+
+static ssize_t
+blktap_sysfs_remove_device(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct blktap *tap;
+ int err;
+
+ tap = dev_get_drvdata(dev);
+ if (!tap)
+ return size;
+
+ if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
+ goto wait;
+
+ if (tap->ring.vma) {
+ blktap_sring_t *sring = tap->ring.ring.sring;
+ sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE;
+ blktap_ring_kick_user(tap);
+ } else {
+ INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work);
+ schedule_work(&tap->remove_work);
+ }
+wait:
+ err = wait_event_interruptible(tap->remove_wait,
+ !dev_get_drvdata(dev));
+ if (err)
+ return err;
+
+ return size;
+}
+static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
+
+static ssize_t
+blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr,
char *buf)
+{
+ struct blktap *tap;
+ char *s = buf, *end = buf + PAGE_SIZE;
+
+ tap = dev_get_drvdata(dev);
+ if (!tap)
+ return 0;
+
+ s += blktap_control_debug(tap, s, end - s);
+
+ s += blktap_request_debug(tap, s, end - s);
+
+ s += blktap_device_debug(tap, s, end - s);
+
+ s += blktap_ring_debug(tap, s, end - s);
+
+ return s - buf;
+}
+static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL);
+
+static ssize_t
+blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char
*buf)
+{
+ struct blktap *tap;
+ ssize_t rv = 0;
+
+ tap = dev_get_drvdata(dev);
+ if (!tap)
+ return 0;
+
+ if (tap->ring.task)
+ rv = sprintf(buf, "%d\n", tap->ring.task->pid);
+
+ return rv;
+}
+static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL);
+
+static ssize_t
+blktap_sysfs_show_pool(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct blktap *tap = dev_get_drvdata(dev);
+ return sprintf(buf, "%s", kobject_name(&tap->pool->kobj));
+}
+
+static ssize_t
+blktap_sysfs_store_pool(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct blktap *tap = dev_get_drvdata(dev);
+ struct blktap_page_pool *pool, *tmp = tap->pool;
+
+ if (tap->device.gd)
+ return -EBUSY;
+
+ pool = blktap_page_pool_get(buf);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ tap->pool = pool;
+ kobject_put(&tmp->kobj);
+
+ return size;
+}
+DEVICE_ATTR(pool, S_IRUSR|S_IWUSR,
+ blktap_sysfs_show_pool, blktap_sysfs_store_pool);
+
+int
+blktap_sysfs_create(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+ struct device *dev;
+ int err = 0;
+
+ init_waitqueue_head(&tap->remove_wait);
+
+ dev = device_create(class, NULL, ring->devno,
+ tap, "blktap%d", tap->minor);
+ if (IS_ERR(dev))
+ err = PTR_ERR(dev);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_name);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_remove);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_debug);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_task);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_pool);
+ if (!err)
+ ring->dev = dev;
+ else
+ device_unregister(dev);
+
+ return err;
+}
+
+void
+blktap_sysfs_destroy(struct blktap *tap)
+{
+ struct blktap_ring *ring = &tap->ring;
+ struct device *dev;
+
+ dev = ring->dev;
+
+ if (!dev)
+ return;
+
+ dev_set_drvdata(dev, NULL);
+ wake_up(&tap->remove_wait);
+
+ device_unregister(dev);
+ ring->dev = NULL;
+}
+
+static ssize_t
+blktap_sysfs_show_verbosity(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", blktap_debug_level);
+}
+
+static ssize_t
+blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
+{
+ int level;
+
+ if (sscanf(buf, "%d", &level) == 1) {
+ blktap_debug_level = level;
+ return size;
+ }
+
+ return -EINVAL;
+}
+static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR,
+ blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
+
+static ssize_t
+blktap_sysfs_show_devices(struct class *class, char *buf)
+{
+ int i, ret;
+ struct blktap *tap;
+
+ mutex_lock(&blktap_lock);
+
+ ret = 0;
+ for (i = 0; i < blktap_max_minor; i++) {
+ tap = blktaps[i];
+ if (!tap)
+ continue;
+
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
+ continue;
+
+ ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name);
+ }
+
+ mutex_unlock(&blktap_lock);
+
+ return ret;
+}
+static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL);
+
+void
+blktap_sysfs_exit(void)
+{
+ if (class)
+ class_destroy(class);
+}
+
+int __init
+blktap_sysfs_init(void)
+{
+ struct class *cls;
+ int err = 0;
+
+ cls = class_create(THIS_MODULE, "blktap2");
+ if (IS_ERR(cls))
+ err = PTR_ERR(cls);
+ if (!err)
+ err = class_create_file(cls, &class_attr_verbosity);
+ if (!err)
+ err = class_create_file(cls, &class_attr_devices);
+ if (!err)
+ class = cls;
+ else
+ class_destroy(cls);
+
+ return err;
+}
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index c34e71c..b951b83 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -45,17 +45,6 @@ config XEN_BLKDEV_BACKEND
interface.
-config XEN_BLKDEV_TAP
- tristate "Block-device tap backend driver"
- depends on XEN_BACKEND && BLOCK
- help
- The block tap driver is an alternative to the block back driver
- and allows VM block requests to be redirected to userspace through
- a device interface. The tap allows user-space development of
- high-performance block backends, where disk images may be implemented
- as files, in memory, or on other hosts across the network. This
- driver can safely coexist with the existing blockback driver.
-
config XEN_BLKBACK_PAGEMAP
tristate
depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP != n
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index aa4d6e2..44f835e 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -6,6 +6,5 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
-obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
\ No newline at end of file
diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile
deleted file mode 100644
index 822b4e4..0000000
--- a/drivers/xen/blktap/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o
-
-blktap-objs := control.o ring.o device.o request.o sysfs.o
diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
deleted file mode 100644
index 1318cad..0000000
--- a/drivers/xen/blktap/blktap.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _BLKTAP_H_
-#define _BLKTAP_H_
-
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/cdev.h>
-#include <linux/init.h>
-#include <linux/scatterlist.h>
-#include <linux/blktap.h>
-
-extern int blktap_debug_level;
-extern int blktap_ring_major;
-extern int blktap_device_major;
-
-#define BTPRINTK(level, tag, force, _f, _a...) \
- do { \
- if (blktap_debug_level > level && \
- (force || printk_ratelimit())) \
- printk(tag "%s: " _f, __func__, ##_a); \
- } while (0)
-
-#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a)
-#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a)
-#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
-#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
-
-#define MAX_BLKTAP_DEVICE 1024
-
-#define BLKTAP_DEVICE 4
-#define BLKTAP_DEVICE_CLOSED 5
-#define BLKTAP_SHUTDOWN_REQUESTED 8
-
-#define BLKTAP_REQUEST_FREE 0
-#define BLKTAP_REQUEST_PENDING 1
-
-struct blktap_device {
- spinlock_t lock;
- struct gendisk *gd;
-};
-
-struct blktap_request;
-
-struct blktap_ring {
- struct task_struct *task;
-
- struct vm_area_struct *vma;
- blktap_front_ring_t ring;
- unsigned long ring_vstart;
- unsigned long user_vstart;
-
- int n_pending;
- struct blktap_request *pending[BLKTAP_RING_SIZE];
-
- wait_queue_head_t poll_wait;
-
- dev_t devno;
- struct device *dev;
-};
-
-struct blktap_statistics {
- unsigned long st_print;
- int st_rd_req;
- int st_wr_req;
- int st_oo_req;
- int st_rd_sect;
- int st_wr_sect;
- s64 st_rd_cnt;
- s64 st_rd_sum_usecs;
- s64 st_rd_max_usecs;
- s64 st_wr_cnt;
- s64 st_wr_sum_usecs;
- s64 st_wr_max_usecs;
-};
-
-struct blktap_request {
- struct blktap *tap;
- struct request *rq;
- int usr_idx;
-
- int operation;
- struct timeval time;
-
- struct scatterlist sg_table[BLKTAP_SEGMENT_MAX];
- struct page *pages[BLKTAP_SEGMENT_MAX];
- int nr_pages;
-};
-
-#define blktap_for_each_sg(_sg, _req, _i) \
- for (_sg = (_req)->sg_table, _i = 0; \
- _i < (_req)->nr_pages; \
- (_sg)++, (_i)++)
-
-struct blktap {
- int minor;
- unsigned long dev_inuse;
-
- struct blktap_ring ring;
- struct blktap_device device;
- struct blktap_page_pool *pool;
-
- wait_queue_head_t remove_wait;
- struct work_struct remove_work;
- char name[BLKTAP_NAME_MAX];
-
- struct blktap_statistics stats;
-};
-
-struct blktap_page_pool {
- struct mempool_s *bufs;
- spinlock_t lock;
- struct kobject kobj;
- wait_queue_head_t wait;
-};
-
-extern struct mutex blktap_lock;
-extern struct blktap **blktaps;
-extern int blktap_max_minor;
-
-int blktap_control_destroy_tap(struct blktap *);
-size_t blktap_control_debug(struct blktap *, char *, size_t);
-
-int blktap_ring_init(void);
-void blktap_ring_exit(void);
-size_t blktap_ring_debug(struct blktap *, char *, size_t);
-int blktap_ring_create(struct blktap *);
-int blktap_ring_destroy(struct blktap *);
-struct blktap_request *blktap_ring_make_request(struct blktap *);
-void blktap_ring_free_request(struct blktap *,struct blktap_request *);
-void blktap_ring_submit_request(struct blktap *, struct blktap_request *);
-int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *,
int);
-int blktap_ring_map_request(struct blktap *, struct blktap_request *);
-void blktap_ring_unmap_request(struct blktap *, struct blktap_request *);
-void blktap_ring_set_message(struct blktap *, int);
-void blktap_ring_kick_user(struct blktap *);
-
-int blktap_sysfs_init(void);
-void blktap_sysfs_exit(void);
-int blktap_sysfs_create(struct blktap *);
-void blktap_sysfs_destroy(struct blktap *);
-
-int blktap_device_init(void);
-void blktap_device_exit(void);
-size_t blktap_device_debug(struct blktap *, char *, size_t);
-int blktap_device_create(struct blktap *, struct blktap_device_info *);
-int blktap_device_destroy(struct blktap *);
-void blktap_device_destroy_sync(struct blktap *);
-void blktap_device_run_queue(struct blktap *);
-void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
-
-int blktap_page_pool_init(struct kobject *);
-void blktap_page_pool_exit(void);
-struct blktap_page_pool *blktap_page_pool_get(const char *);
-
-size_t blktap_request_debug(struct blktap *, char *, size_t);
-struct blktap_request *blktap_request_alloc(struct blktap *);
-int blktap_request_get_pages(struct blktap *, struct blktap_request *, int);
-void blktap_request_free(struct blktap *, struct blktap_request *);
-void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
-
-
-#endif
diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
deleted file mode 100644
index 57b1a10..0000000
--- a/drivers/xen/blktap/control.c
+++ /dev/null
@@ -1,315 +0,0 @@
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/miscdevice.h>
-#include <linux/device.h>
-#include <asm/uaccess.h>
-
-#include "blktap.h"
-
-DEFINE_MUTEX(blktap_lock);
-
-struct blktap **blktaps;
-int blktap_max_minor;
-static struct blktap_page_pool *default_pool;
-
-static struct blktap *
-blktap_control_get_minor(void)
-{
- int minor;
- struct blktap *tap;
-
- tap = kzalloc(sizeof(*tap), GFP_KERNEL);
- if (unlikely(!tap))
- return NULL;
-
- mutex_lock(&blktap_lock);
-
- for (minor = 0; minor < blktap_max_minor; minor++)
- if (!blktaps[minor])
- break;
-
- if (minor == MAX_BLKTAP_DEVICE)
- goto fail;
-
- if (minor == blktap_max_minor) {
- void *p;
- int n;
-
- n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE);
- p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL);
- if (!p)
- goto fail;
-
- blktaps = p;
- minor = blktap_max_minor;
- blktap_max_minor = n;
-
- memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0]));
- }
-
- tap->minor = minor;
- blktaps[minor] = tap;
-
- __module_get(THIS_MODULE);
-out:
- mutex_unlock(&blktap_lock);
- return tap;
-
-fail:
- mutex_unlock(&blktap_lock);
- kfree(tap);
- tap = NULL;
- goto out;
-}
-
-static void
-blktap_control_put_minor(struct blktap* tap)
-{
- blktaps[tap->minor] = NULL;
- kfree(tap);
-
- module_put(THIS_MODULE);
-}
-
-static struct blktap*
-blktap_control_create_tap(void)
-{
- struct blktap *tap;
- int err;
-
- tap = blktap_control_get_minor();
- if (!tap)
- return NULL;
-
- kobject_get(&default_pool->kobj);
- tap->pool = default_pool;
-
- err = blktap_ring_create(tap);
- if (err)
- goto fail_tap;
-
- err = blktap_sysfs_create(tap);
- if (err)
- goto fail_ring;
-
- return tap;
-
-fail_ring:
- blktap_ring_destroy(tap);
-fail_tap:
- blktap_control_put_minor(tap);
-
- return NULL;
-}
-
-int
-blktap_control_destroy_tap(struct blktap *tap)
-{
- int err;
-
- err = blktap_ring_destroy(tap);
- if (err)
- return err;
-
- kobject_put(&tap->pool->kobj);
-
- blktap_sysfs_destroy(tap);
-
- blktap_control_put_minor(tap);
-
- return 0;
-}
-
-static int
-blktap_control_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct blktap *tap;
-
- switch (cmd) {
- case BLKTAP_IOCTL_ALLOC_TAP: {
- struct blktap_info info;
- void __user *ptr = (void __user*)arg;
-
- tap = blktap_control_create_tap();
- if (!tap)
- return -ENOMEM;
-
- info.ring_major = blktap_ring_major;
- info.bdev_major = blktap_device_major;
- info.ring_minor = tap->minor;
-
- if (copy_to_user(ptr, &info, sizeof(info))) {
- blktap_control_destroy_tap(tap);
- return -EFAULT;
- }
-
- return 0;
- }
-
- case BLKTAP_IOCTL_FREE_TAP: {
- int minor = arg;
-
- if (minor > MAX_BLKTAP_DEVICE)
- return -EINVAL;
-
- tap = blktaps[minor];
- if (!tap)
- return -ENODEV;
-
- return blktap_control_destroy_tap(tap);
- }
- }
-
- return -ENOIOCTLCMD;
-}
-
-static struct file_operations blktap_control_file_operations = {
- .owner = THIS_MODULE,
- .ioctl = blktap_control_ioctl,
-};
-
-static struct miscdevice blktap_control = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "blktap-control",
- .fops = &blktap_control_file_operations,
-};
-
-static struct device *control_device;
-
-static ssize_t
-blktap_control_show_default_pool(struct device *device,
- struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%s", kobject_name(&default_pool->kobj));
-}
-
-static ssize_t
-blktap_control_store_default_pool(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t size)
-{
- struct blktap_page_pool *pool, *tmp = default_pool;
-
- pool = blktap_page_pool_get(buf);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
-
- default_pool = pool;
- kobject_put(&tmp->kobj);
-
- return size;
-}
-
-static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
- blktap_control_show_default_pool,
- blktap_control_store_default_pool);
-
-size_t
-blktap_control_debug(struct blktap *tap, char *buf, size_t size)
-{
- char *s = buf, *end = buf + size;
-
- s += snprintf(s, end - s,
- "tap %u:%u name:'%s' flags:%#08lx\n",
- MAJOR(tap->ring.devno), MINOR(tap->ring.devno),
- tap->name, tap->dev_inuse);
-
- return s - buf;
-}
-
-static int __init
-blktap_control_init(void)
-{
- int err;
-
- err = misc_register(&blktap_control);
- if (err)
- return err;
-
- control_device = blktap_control.this_device;
-
- blktap_max_minor = min(64, MAX_BLKTAP_DEVICE);
- blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL);
- if (!blktaps) {
- BTERR("failed to allocate blktap minor map");
- return -ENOMEM;
- }
-
- err = blktap_page_pool_init(&control_device->kobj);
- if (err)
- return err;
-
- default_pool = blktap_page_pool_get("default");
- if (!default_pool)
- return -ENOMEM;
-
- err = device_create_file(control_device, &dev_attr_default_pool);
- if (err)
- return err;
-
- return 0;
-}
-
-static void
-blktap_control_exit(void)
-{
- if (default_pool) {
- kobject_put(&default_pool->kobj);
- default_pool = NULL;
- }
-
- blktap_page_pool_exit();
-
- if (blktaps) {
- kfree(blktaps);
- blktaps = NULL;
- }
-
- if (control_device) {
- misc_deregister(&blktap_control);
- control_device = NULL;
- }
-}
-
-static void
-blktap_exit(void)
-{
- blktap_control_exit();
- blktap_ring_exit();
- blktap_sysfs_exit();
- blktap_device_exit();
-}
-
-static int __init
-blktap_init(void)
-{
- int err;
-
- err = blktap_device_init();
- if (err)
- goto fail;
-
- err = blktap_ring_init();
- if (err)
- goto fail;
-
- err = blktap_sysfs_init();
- if (err)
- goto fail;
-
- err = blktap_control_init();
- if (err)
- goto fail;
-
- return 0;
-
-fail:
- blktap_exit();
- return err;
-}
-
-module_init(blktap_init);
-module_exit(blktap_exit);
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
deleted file mode 100644
index 9a09457..0000000
--- a/drivers/xen/blktap/device.c
+++ /dev/null
@@ -1,551 +0,0 @@
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/cdrom.h>
-#include <linux/hdreg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_ioctl.h>
-
-#include "blktap.h"
-
-int blktap_device_major;
-
-#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
-
-static int
-blktap_device_open(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct blktap_device *tapdev = disk->private_data;
-
- if (!tapdev)
- return -ENXIO;
-
- /* NB. we might have bounced a bd trylock by tapdisk. when
- * failing for reasons not !tapdev, make sure to kick tapdisk
- * out of destroy wait state again. */
-
- return 0;
-}
-
-static int
-blktap_device_release(struct gendisk *disk, fmode_t mode)
-{
- struct blktap_device *tapdev = disk->private_data;
- struct block_device *bdev = bdget_disk(disk, 0);
- struct blktap *tap = dev_to_blktap(tapdev);
-
- bdput(bdev);
-
- if (!bdev->bd_openers) {
- set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse);
- blktap_ring_kick_user(tap);
- }
-
- return 0;
-}
-
-static int
-blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
-{
- /* We don't have real geometry info, but let's at least return
- values consistent with the size of the device */
- sector_t nsect = get_capacity(bd->bd_disk);
- sector_t cylinders = nsect;
-
- hg->heads = 0xff;
- hg->sectors = 0x3f;
- sector_div(cylinders, hg->heads * hg->sectors);
- hg->cylinders = cylinders;
- if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
- hg->cylinders = 0xffff;
- return 0;
-}
-
-static int
-blktap_device_ioctl(struct block_device *bd, fmode_t mode,
- unsigned command, unsigned long argument)
-{
- int i;
-
- switch (command) {
- case CDROMMULTISESSION:
- BTDBG("FIXME: support multisession CDs later\n");
- for (i = 0; i < sizeof(struct cdrom_multisession); i++)
- if (put_user(0, (char __user *)(argument + i)))
- return -EFAULT;
- return 0;
-
- case SCSI_IOCTL_GET_IDLUN:
- if (!access_ok(VERIFY_WRITE, argument,
- sizeof(struct scsi_idlun)))
- return -EFAULT;
-
- /* return 0 for now. */
- __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
- __put_user(0,
- &((struct scsi_idlun __user
*)argument)->host_unique_id);
- return 0;
-
- default:
- /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
- command);*/
- return -EINVAL; /* same return as native Linux */
- }
-
- return 0;
-}
-
-static struct block_device_operations blktap_device_file_operations = {
- .owner = THIS_MODULE,
- .open = blktap_device_open,
- .release = blktap_device_release,
- .ioctl = blktap_device_ioctl,
- .getgeo = blktap_device_getgeo
-};
-
-/* NB. __blktap holding the queue lock; blktap where unlocked */
-
-static inline struct request*
-__blktap_next_queued_rq(struct request_queue *q)
-{
- return blk_peek_request(q);
-}
-
-static inline void
-__blktap_dequeue_rq(struct request *rq)
-{
- blk_start_request(rq);
-}
-
-/* NB. err == 0 indicates success, failures < 0 */
-
-static inline void
-__blktap_end_queued_rq(struct request *rq, int err)
-{
- blk_start_request(rq);
- __blk_end_request(rq, err, blk_rq_bytes(rq));
-}
-
-static inline void
-__blktap_end_rq(struct request *rq, int err)
-{
- __blk_end_request(rq, err, blk_rq_bytes(rq));
-}
-
-static inline void
-blktap_end_rq(struct request *rq, int err)
-{
- spin_lock_irq(rq->q->queue_lock);
- __blktap_end_rq(rq, err);
- spin_unlock_irq(rq->q->queue_lock);
-}
-
-void
-blktap_device_end_request(struct blktap *tap,
- struct blktap_request *request,
- int error)
-{
- struct blktap_device *tapdev = &tap->device;
- struct request *rq = request->rq;
-
- blktap_ring_unmap_request(tap, request);
-
- blktap_ring_free_request(tap, request);
-
- dev_dbg(disk_to_dev(tapdev->gd),
- "end_request: op=%d error=%d bytes=%d\n",
- rq_data_dir(rq), error, blk_rq_bytes(rq));
-
- blktap_end_rq(rq, error);
-}
-
-int
-blktap_device_make_request(struct blktap *tap, struct request *rq)
-{
- struct blktap_device *tapdev = &tap->device;
- struct blktap_request *request;
- int write, nsegs;
- int err;
-
- request = blktap_ring_make_request(tap);
- if (IS_ERR(request)) {
- err = PTR_ERR(request);
- request = NULL;
-
- if (err == -ENOSPC || err == -ENOMEM)
- goto stop;
-
- goto fail;
- }
-
- write = rq_data_dir(rq) == WRITE;
- nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
-
- dev_dbg(disk_to_dev(tapdev->gd),
- "make_request: op=%c bytes=%d nsegs=%d\n",
- write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
-
- request->rq = rq;
- request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ;
-
- err = blktap_request_get_pages(tap, request, nsegs);
- if (err)
- goto stop;
-
- err = blktap_ring_map_request(tap, request);
- if (err)
- goto fail;
-
- blktap_ring_submit_request(tap, request);
-
- return 0;
-
-stop:
- tap->stats.st_oo_req++;
- err = -EBUSY;
-
-_out:
- if (request)
- blktap_ring_free_request(tap, request);
-
- return err;
-fail:
- if (printk_ratelimit())
- dev_warn(disk_to_dev(tapdev->gd),
- "make request: %d, failing\n", err);
- goto _out;
-}
-
-/*
- * called from tapdisk context
- */
-void
-blktap_device_run_queue(struct blktap *tap)
-{
- struct blktap_device *tapdev = &tap->device;
- struct request_queue *q;
- struct request *rq;
- int err;
-
- if (!tapdev->gd)
- return;
-
- q = tapdev->gd->queue;
-
- spin_lock_irq(&tapdev->lock);
- queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
- do {
- rq = __blktap_next_queued_rq(q);
- if (!rq)
- break;
-
- if (!blk_fs_request(rq)) {
- __blktap_end_queued_rq(rq, -EOPNOTSUPP);
- continue;
- }
-
- spin_unlock_irq(&tapdev->lock);
-
- err = blktap_device_make_request(tap, rq);
-
- spin_lock_irq(&tapdev->lock);
-
- if (err == -EBUSY) {
- blk_stop_queue(q);
- break;
- }
-
- __blktap_dequeue_rq(rq);
-
- if (unlikely(err))
- __blktap_end_rq(rq, err);
- } while (1);
-
- spin_unlock_irq(&tapdev->lock);
-}
-
-static void
-blktap_device_do_request(struct request_queue *rq)
-{
- struct blktap_device *tapdev = rq->queuedata;
- struct blktap *tap = dev_to_blktap(tapdev);
-
- blktap_ring_kick_user(tap);
-}
-
-static void
-blktap_device_configure(struct blktap *tap,
- struct blktap_device_info *info)
-{
- struct blktap_device *tapdev = &tap->device;
- struct gendisk *gd = tapdev->gd;
- struct request_queue *rq = gd->queue;
-
- set_capacity(gd, info->capacity);
- set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO));
-
- /* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_logical_block_size(rq, info->sector_size);
- blk_queue_max_sectors(rq, 512);
-
- /* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
- blk_queue_max_segment_size(rq, PAGE_SIZE);
-
- /* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX);
- blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX);
-
- /* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(rq, 511);
-
- /* We are reordering, but cacheless. */
- blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL);
-}
-
-static int
-blktap_device_validate_info(struct blktap *tap,
- struct blktap_device_info *info)
-{
- struct device *dev = tap->ring.dev;
- int sector_order;
-
- sector_order = ffs(info->sector_size) - 1;
- if (sector_order < 9 ||
- sector_order > 12 ||
- info->sector_size != 1U<<sector_order)
- goto fail;
-
- if (!info->capacity ||
- (info->capacity > ULLONG_MAX >> sector_order))
- goto fail;
-
- return 0;
-
-fail:
- dev_err(dev, "capacity: %llu, sector-size: %u\n",
- info->capacity, info->sector_size);
- return -EINVAL;
-}
-
-int
-blktap_device_destroy(struct blktap *tap)
-{
- struct blktap_device *tapdev = &tap->device;
- struct block_device *bdev;
- struct gendisk *gd;
- int err;
-
- gd = tapdev->gd;
- if (!gd)
- return 0;
-
- bdev = bdget_disk(gd, 0);
-
- err = !mutex_trylock(&bdev->bd_mutex);
- if (err) {
- /* NB. avoid a deadlock. the last opener syncs the
- * bdev holding bd_mutex. */
- err = -EBUSY;
- goto out_nolock;
- }
-
- if (bdev->bd_openers) {
- err = -EBUSY;
- goto out;
- }
-
- del_gendisk(gd);
- gd->private_data = NULL;
-
- blk_cleanup_queue(gd->queue);
-
- put_disk(gd);
- tapdev->gd = NULL;
-
- clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
- err = 0;
-out:
- mutex_unlock(&bdev->bd_mutex);
-out_nolock:
- bdput(bdev);
-
- return err;
-}
-
-static void
-blktap_device_fail_queue(struct blktap *tap)
-{
- struct blktap_device *tapdev = &tap->device;
- struct request_queue *q = tapdev->gd->queue;
-
- spin_lock_irq(&tapdev->lock);
- queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
- do {
- struct request *rq = __blktap_next_queued_rq(q);
- if (!rq)
- break;
-
- __blktap_end_queued_rq(rq, -EIO);
- } while (1);
-
- spin_unlock_irq(&tapdev->lock);
-}
-
-static int
-blktap_device_try_destroy(struct blktap *tap)
-{
- int err;
-
- err = blktap_device_destroy(tap);
- if (err)
- blktap_device_fail_queue(tap);
-
- return err;
-}
-
-void
-blktap_device_destroy_sync(struct blktap *tap)
-{
- wait_event(tap->ring.poll_wait,
- !blktap_device_try_destroy(tap));
-}
-
-int
-blktap_device_create(struct blktap *tap, struct blktap_device_info *info)
-{
- int minor, err;
- struct gendisk *gd;
- struct request_queue *rq;
- struct blktap_device *tapdev;
-
- gd = NULL;
- rq = NULL;
- tapdev = &tap->device;
- minor = tap->minor;
-
- if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
- return -EEXIST;
-
- if (blktap_device_validate_info(tap, info))
- return -EINVAL;
-
- gd = alloc_disk(1);
- if (!gd) {
- err = -ENOMEM;
- goto fail;
- }
-
- if (minor < 26) {
- sprintf(gd->disk_name, "td%c", 'a' + minor % 26);
- } else if (minor < (26 + 1) * 26) {
- sprintf(gd->disk_name, "td%c%c",
- 'a' + minor / 26 - 1,'a' + minor % 26);
- } else {
- const unsigned int m1 = (minor / 26 - 1) / 26 - 1;
- const unsigned int m2 = (minor / 26 - 1) % 26;
- const unsigned int m3 = minor % 26;
- sprintf(gd->disk_name, "td%c%c%c",
- 'a' + m1, 'a' + m2, 'a' + m3);
- }
-
- gd->major = blktap_device_major;
- gd->first_minor = minor;
- gd->fops = &blktap_device_file_operations;
- gd->private_data = tapdev;
-
- spin_lock_init(&tapdev->lock);
- rq = blk_init_queue(blktap_device_do_request, &tapdev->lock);
- if (!rq) {
- err = -ENOMEM;
- goto fail;
- }
- elevator_init(rq, "noop");
-
- gd->queue = rq;
- rq->queuedata = tapdev;
- tapdev->gd = gd;
-
- blktap_device_configure(tap, info);
- add_disk(gd);
-
- set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
-
- dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n",
- queue_logical_block_size(rq),
- queue_physical_block_size(rq),
- (unsigned long long)get_capacity(gd));
-
- return 0;
-
-fail:
- if (gd)
- del_gendisk(gd);
- if (rq)
- blk_cleanup_queue(rq);
-
- return err;
-}
-
-size_t
-blktap_device_debug(struct blktap *tap, char *buf, size_t size)
-{
- struct gendisk *disk = tap->device.gd;
- struct request_queue *q;
- struct block_device *bdev;
- char *s = buf, *end = buf + size;
-
- if (!disk)
- return 0;
-
- q = disk->queue;
-
- s += snprintf(s, end - s,
- "disk capacity:%llu sector size:%u\n",
- (unsigned long long)get_capacity(disk),
- queue_logical_block_size(q));
-
- s += snprintf(s, end - s,
- "queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
- q->queue_flags,
- blk_queue_plugged(q), blk_queue_stopped(q),
- elv_queue_empty(q));
-
- bdev = bdget_disk(disk, 0);
- if (bdev) {
- s += snprintf(s, end - s,
- "bdev openers:%d closed:%d\n",
- bdev->bd_openers,
- test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse));
- bdput(bdev);
- }
-
- return s - buf;
-}
-
-int __init
-blktap_device_init()
-{
- int major;
-
- /* Dynamically allocate a major for this device */
- major = register_blkdev(0, "tapdev");
- if (major < 0) {
- BTERR("Couldn't register blktap device\n");
- return -ENOMEM;
- }
-
- blktap_device_major = major;
- BTINFO("blktap device major %d\n", major);
-
- return 0;
-}
-
-void
-blktap_device_exit(void)
-{
- if (blktap_device_major)
- unregister_blkdev(blktap_device_major, "tapdev");
-}
diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
deleted file mode 100644
index 8cfd6c9..0000000
--- a/drivers/xen/blktap/request.c
+++ /dev/null
@@ -1,418 +0,0 @@
-#include <linux/mempool.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/device.h>
-
-#include "blktap.h"
-
-/* max pages per shared pool. just to prevent accidental dos. */
-#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX)
-
-/* default page pool size. when considering to shrink a shared pool,
- * note that paused tapdisks may grab a whole lot of pages for a long
- * time. */
-#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX)
-
-/* max number of pages allocatable per request. */
-#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX
-
-/* min request structs per pool. These grow dynamically. */
-#define POOL_MIN_REQS BLKTAP_RING_SIZE
-
-static struct kset *pool_set;
-
-#define kobj_to_pool(_kobj) \
- container_of(_kobj, struct blktap_page_pool, kobj)
-
-static struct kmem_cache *request_cache;
-static mempool_t *request_pool;
-
-static void
-__page_pool_wake(struct blktap_page_pool *pool)
-{
- mempool_t *mem = pool->bufs;
-
- /*
- NB. slightly wasteful to always wait for a full segment
- set. but this ensures the next disk makes
- progress. presently, the repeated request struct
- alloc/release cycles would otherwise keep everyone spinning.
- */
-
- if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
- wake_up(&pool->wait);
-}
-
-int
-blktap_request_get_pages(struct blktap *tap,
- struct blktap_request *request, int nr_pages)
-{
- struct blktap_page_pool *pool = tap->pool;
- mempool_t *mem = pool->bufs;
- struct page *page;
-
- BUG_ON(request->nr_pages != 0);
- BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
-
- if (mem->curr_nr < nr_pages)
- return -ENOMEM;
-
- /* NB. avoid thundering herds of tapdisks colliding. */
- spin_lock(&pool->lock);
-
- if (mem->curr_nr < nr_pages) {
- spin_unlock(&pool->lock);
- return -ENOMEM;
- }
-
- while (request->nr_pages < nr_pages) {
- page = mempool_alloc(mem, GFP_NOWAIT);
- BUG_ON(!page);
- request->pages[request->nr_pages++] = page;
- }
-
- spin_unlock(&pool->lock);
-
- return 0;
-}
-
-static void
-blktap_request_put_pages(struct blktap *tap,
- struct blktap_request *request)
-{
- struct blktap_page_pool *pool = tap->pool;
- struct page *page;
-
- while (request->nr_pages) {
- page = request->pages[--request->nr_pages];
- mempool_free(page, pool->bufs);
- }
-}
-
-size_t
-blktap_request_debug(struct blktap *tap, char *buf, size_t size)
-{
- struct blktap_page_pool *pool = tap->pool;
- mempool_t *mem = pool->bufs;
- char *s = buf, *end = buf + size;
-
- s += snprintf(buf, end - s,
- "pool:%s pages:%d free:%d\n",
- kobject_name(&pool->kobj),
- mem->min_nr, mem->curr_nr);
-
- return s - buf;
-}
-
-struct blktap_request*
-blktap_request_alloc(struct blktap *tap)
-{
- struct blktap_request *request;
-
- request = mempool_alloc(request_pool, GFP_NOWAIT);
- if (request)
- request->tap = tap;
-
- return request;
-}
-
-void
-blktap_request_free(struct blktap *tap,
- struct blktap_request *request)
-{
- blktap_request_put_pages(tap, request);
-
- mempool_free(request, request_pool);
-
- __page_pool_wake(tap->pool);
-}
-
-void
-blktap_request_bounce(struct blktap *tap,
- struct blktap_request *request,
- int seg, int write)
-{
- struct scatterlist *sg = &request->sg_table[seg];
- void *s, *p;
-
- BUG_ON(seg >= request->nr_pages);
-
- s = sg_virt(sg);
- p = page_address(request->pages[seg]) + sg->offset;
-
- if (write)
- memcpy(p, s, sg->length);
- else
- memcpy(s, p, sg->length);
-}
-
-static void
-blktap_request_ctor(void *obj)
-{
- struct blktap_request *request = obj;
-
- memset(request, 0, sizeof(*request));
- sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
-}
-
-static int
-blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
-{
- mempool_t *bufs = pool->bufs;
- int err;
-
- /* NB. mempool asserts min_nr >= 1 */
- target = max(1, target);
-
- err = mempool_resize(bufs, target, GFP_KERNEL);
- if (err)
- return err;
-
- __page_pool_wake(pool);
-
- return 0;
-}
-
-struct pool_attribute {
- struct attribute attr;
-
- ssize_t (*show)(struct blktap_page_pool *pool,
- char *buf);
-
- ssize_t (*store)(struct blktap_page_pool *pool,
- const char *buf, size_t count);
-};
-
-#define kattr_to_pool_attr(_kattr) \
- container_of(_kattr, struct pool_attribute, attr)
-
-static ssize_t
-blktap_page_pool_show_size(struct blktap_page_pool *pool,
- char *buf)
-{
- mempool_t *mem = pool->bufs;
- return sprintf(buf, "%d", mem->min_nr);
-}
-
-static ssize_t
-blktap_page_pool_store_size(struct blktap_page_pool *pool,
- const char *buf, size_t size)
-{
- int target;
-
- /*
- * NB. target fixup to avoid undesired results. less than a
- * full segment set can wedge the disk. much more than a
- * couple times the physical queue depth is rarely useful.
- */
-
- target = simple_strtoul(buf, NULL, 0);
- target = max(POOL_MAX_REQUEST_PAGES, target);
- target = min(target, POOL_MAX_PAGES);
-
- return blktap_page_pool_resize(pool, target) ? : size;
-}
-
-static struct pool_attribute blktap_page_pool_attr_size =
- __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
- blktap_page_pool_show_size,
- blktap_page_pool_store_size);
-
-static ssize_t
-blktap_page_pool_show_free(struct blktap_page_pool *pool,
- char *buf)
-{
- mempool_t *mem = pool->bufs;
- return sprintf(buf, "%d", mem->curr_nr);
-}
-
-static struct pool_attribute blktap_page_pool_attr_free =
- __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
- blktap_page_pool_show_free,
- NULL);
-
-static struct attribute *blktap_page_pool_attrs[] = {
- &blktap_page_pool_attr_size.attr,
- &blktap_page_pool_attr_free.attr,
- NULL,
-};
-
-static inline struct kobject*
-__blktap_kset_find_obj(struct kset *kset, const char *name)
-{
- struct kobject *k;
- struct kobject *ret = NULL;
-
- spin_lock(&kset->list_lock);
- list_for_each_entry(k, &kset->list, entry) {
- if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
- ret = kobject_get(k);
- break;
- }
- }
- spin_unlock(&kset->list_lock);
- return ret;
-}
-
-static ssize_t
-blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
- char *buf)
-{
- struct blktap_page_pool *pool = kobj_to_pool(kobj);
- struct pool_attribute *attr = kattr_to_pool_attr(kattr);
-
- if (attr->show)
- return attr->show(pool, buf);
-
- return -EIO;
-}
-
-static ssize_t
-blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
- const char *buf, size_t size)
-{
- struct blktap_page_pool *pool = kobj_to_pool(kobj);
- struct pool_attribute *attr = kattr_to_pool_attr(kattr);
-
- if (attr->show)
- return attr->store(pool, buf, size);
-
- return -EIO;
-}
-
-static struct sysfs_ops blktap_page_pool_sysfs_ops = {
- .show = blktap_page_pool_show_attr,
- .store = blktap_page_pool_store_attr,
-};
-
-static void
-blktap_page_pool_release(struct kobject *kobj)
-{
- struct blktap_page_pool *pool = kobj_to_pool(kobj);
- mempool_destroy(pool->bufs);
- kfree(pool);
-}
-
-struct kobj_type blktap_page_pool_ktype = {
- .release = blktap_page_pool_release,
- .sysfs_ops = &blktap_page_pool_sysfs_ops,
- .default_attrs = blktap_page_pool_attrs,
-};
-
-static void*
-__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
-{
- struct page *page;
-
- if (!(gfp_mask & __GFP_WAIT))
- return NULL;
-
- page = alloc_page(gfp_mask);
- if (page)
- SetPageReserved(page);
-
- return page;
-}
-
-static void
-__mempool_page_free(void *element, void *pool_data)
-{
- struct page *page = element;
-
- ClearPageReserved(page);
- put_page(page);
-}
-
-static struct kobject*
-blktap_page_pool_create(const char *name, int nr_pages)
-{
- struct blktap_page_pool *pool;
- int err;
-
- pool = kzalloc(sizeof(*pool), GFP_KERNEL);
- if (!pool)
- goto fail;
-
- spin_lock_init(&pool->lock);
- init_waitqueue_head(&pool->wait);
-
- pool->bufs = mempool_create(nr_pages,
- __mempool_page_alloc, __mempool_page_free,
- pool);
- if (!pool->bufs)
- goto fail_pool;
-
- kobject_init(&pool->kobj, &blktap_page_pool_ktype);
- pool->kobj.kset = pool_set;
- err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
- if (err)
- goto fail_bufs;
-
- return &pool->kobj;
-
- kobject_del(&pool->kobj);
-fail_bufs:
- mempool_destroy(pool->bufs);
-fail_pool:
- kfree(pool);
-fail:
- return NULL;
-}
-
-struct blktap_page_pool*
-blktap_page_pool_get(const char *name)
-{
- struct kobject *kobj;
-
- kobj = __blktap_kset_find_obj(pool_set, name);
- if (!kobj)
- kobj = blktap_page_pool_create(name,
- POOL_DEFAULT_PAGES);
- if (!kobj)
- return ERR_PTR(-ENOMEM);
-
- return kobj_to_pool(kobj);
-}
-
-int __init
-blktap_page_pool_init(struct kobject *parent)
-{
- request_cache =
- kmem_cache_create("blktap-request",
- sizeof(struct blktap_request), 0,
- 0, blktap_request_ctor);
- if (!request_cache)
- return -ENOMEM;
-
- request_pool =
- mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
- if (!request_pool)
- return -ENOMEM;
-
- pool_set = kset_create_and_add("pools", NULL, parent);
- if (!pool_set)
- return -ENOMEM;
-
- return 0;
-}
-
-void
-blktap_page_pool_exit(void)
-{
- if (pool_set) {
- BUG_ON(!list_empty(&pool_set->list));
- kset_unregister(pool_set);
- pool_set = NULL;
- }
-
- if (request_pool) {
- mempool_destroy(request_pool);
- request_pool = NULL;
- }
-
- if (request_cache) {
- kmem_cache_destroy(request_cache);
- request_cache = NULL;
- }
-}
diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
deleted file mode 100644
index 635f1fd..0000000
--- a/drivers/xen/blktap/ring.c
+++ /dev/null
@@ -1,595 +0,0 @@
-
-#include <linux/device.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/poll.h>
-#include <linux/blkdev.h>
-
-#include "blktap.h"
-
-int blktap_ring_major;
-static struct cdev blktap_ring_cdev;
-
- /*
- * BLKTAP - immediately before the mmap area,
- * we have a bunch of pages reserved for shared memory rings.
- */
-#define RING_PAGES 1
-
-#define BLKTAP_INFO_SIZE_AT(_memb) \
- offsetof(struct blktap_device_info, _memb) + \
- sizeof(((struct blktap_device_info*)0)->_memb)
-
-static void
-blktap_ring_read_response(struct blktap *tap,
- const blktap_ring_rsp_t *rsp)
-{
- struct blktap_ring *ring = &tap->ring;
- struct blktap_request *request;
- int usr_idx, err;
-
- request = NULL;
-
- usr_idx = rsp->id;
- if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) {
- err = -ERANGE;
- goto invalid;
- }
-
- request = ring->pending[usr_idx];
-
- if (!request) {
- err = -ESRCH;
- goto invalid;
- }
-
- if (rsp->operation != request->operation) {
- err = -EINVAL;
- goto invalid;
- }
-
- dev_dbg(ring->dev,
- "request %d [%p] response: %d\n",
- request->usr_idx, request, rsp->status);
-
- err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO;
-end_request:
- blktap_device_end_request(tap, request, err);
- return;
-
-invalid:
- dev_warn(ring->dev,
- "invalid response, idx:%d status:%d op:%d/%d: err %d\n",
- usr_idx, rsp->status,
- rsp->operation, request->operation,
- err);
- if (request)
- goto end_request;
-}
-
-static void
-blktap_read_ring(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
- blktap_ring_rsp_t rsp;
- RING_IDX rc, rp;
-
- down_read(¤t->mm->mmap_sem);
- if (!ring->vma) {
- up_read(¤t->mm->mmap_sem);
- return;
- }
-
- /* for each outstanding message on the ring */
- rp = ring->ring.sring->rsp_prod;
- rmb();
-
- for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
- memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp));
- blktap_ring_read_response(tap, &rsp);
- }
-
- ring->ring.rsp_cons = rc;
-
- up_read(¤t->mm->mmap_sem);
-}
-
-#define MMAP_VADDR(_start, _req, _seg) \
- ((_start) + \
- ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \
- ((_seg) * BLKTAP_PAGE_SIZE))
-
-static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- return VM_FAULT_SIGBUS;
-}
-
-static void
-blktap_ring_fail_pending(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
- struct blktap_request *request;
- int usr_idx;
-
- for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) {
- request = ring->pending[usr_idx];
- if (!request)
- continue;
-
- blktap_device_end_request(tap, request, -EIO);
- }
-}
-
-static void
-blktap_ring_vm_close(struct vm_area_struct *vma)
-{
- struct blktap *tap = vma->vm_private_data;
- struct blktap_ring *ring = &tap->ring;
- struct page *page = virt_to_page(ring->ring.sring);
-
- blktap_ring_fail_pending(tap);
-
- zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
- ClearPageReserved(page);
- __free_page(page);
-
- ring->vma = NULL;
-
- if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
- blktap_control_destroy_tap(tap);
-}
-
-static struct vm_operations_struct blktap_ring_vm_operations = {
- .close = blktap_ring_vm_close,
- .fault = blktap_ring_fault,
-};
-
-int
-blktap_ring_map_segment(struct blktap *tap,
- struct blktap_request *request,
- int seg)
-{
- struct blktap_ring *ring = &tap->ring;
- unsigned long uaddr;
-
- uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
- return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
-}
-
-int
-blktap_ring_map_request(struct blktap *tap,
- struct blktap_request *request)
-{
- int seg, err = 0;
- int write;
-
- write = request->operation == BLKTAP_OP_WRITE;
-
- for (seg = 0; seg < request->nr_pages; seg++) {
- if (write)
- blktap_request_bounce(tap, request, seg, write);
-
- err = blktap_ring_map_segment(tap, request, seg);
- if (err)
- break;
- }
-
- if (err)
- blktap_ring_unmap_request(tap, request);
-
- return err;
-}
-
-void
-blktap_ring_unmap_request(struct blktap *tap,
- struct blktap_request *request)
-{
- struct blktap_ring *ring = &tap->ring;
- unsigned long uaddr;
- unsigned size;
- int seg, read;
-
- uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
- size = request->nr_pages << PAGE_SHIFT;
- read = request->operation == BLKTAP_OP_READ;
-
- if (read)
- for (seg = 0; seg < request->nr_pages; seg++)
- blktap_request_bounce(tap, request, seg, !read);
-
- zap_page_range(ring->vma, uaddr, size, NULL);
-}
-
-void
-blktap_ring_free_request(struct blktap *tap,
- struct blktap_request *request)
-{
- struct blktap_ring *ring = &tap->ring;
-
- ring->pending[request->usr_idx] = NULL;
- ring->n_pending--;
-
- blktap_request_free(tap, request);
-}
-
-struct blktap_request*
-blktap_ring_make_request(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
- struct blktap_request *request;
- int usr_idx;
-
- if (RING_FULL(&ring->ring))
- return ERR_PTR(-ENOSPC);
-
- request = blktap_request_alloc(tap);
- if (!request)
- return ERR_PTR(-ENOMEM);
-
- for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++)
- if (!ring->pending[usr_idx])
- break;
-
- BUG_ON(usr_idx >= BLKTAP_RING_SIZE);
-
- request->tap = tap;
- request->usr_idx = usr_idx;
-
- ring->pending[usr_idx] = request;
- ring->n_pending++;
-
- return request;
-}
-
-void
-blktap_ring_submit_request(struct blktap *tap,
- struct blktap_request *request)
-{
- struct blktap_ring *ring = &tap->ring;
- blktap_ring_req_t *breq;
- struct scatterlist *sg;
- int i, nsecs = 0;
-
- dev_dbg(ring->dev,
- "request %d [%p] submit\n", request->usr_idx, request);
-
- breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
-
- breq->id = request->usr_idx;
- breq->sector_number = blk_rq_pos(request->rq);
- breq->__pad = 0;
- breq->operation = request->operation;
- breq->nr_segments = request->nr_pages;
-
- blktap_for_each_sg(sg, request, i) {
- struct blktap_segment *seg = &breq->seg[i];
- int first, count;
-
- count = sg->length >> 9;
- first = sg->offset >> 9;
-
- seg->first_sect = first;
- seg->last_sect = first + count - 1;
-
- nsecs += count;
- }
-
- ring->ring.req_prod_pvt++;
-
- do_gettimeofday(&request->time);
-
-
- if (request->operation == BLKTAP_OP_WRITE) {
- tap->stats.st_wr_sect += nsecs;
- tap->stats.st_wr_req++;
- }
-
- if (request->operation == BLKTAP_OP_READ) {
- tap->stats.st_rd_sect += nsecs;
- tap->stats.st_rd_req++;
- }
-}
-
-static int
-blktap_ring_open(struct inode *inode, struct file *filp)
-{
- struct blktap *tap = NULL;
- int minor;
-
- minor = iminor(inode);
-
- if (minor < blktap_max_minor)
- tap = blktaps[minor];
-
- if (!tap)
- return -ENXIO;
-
- if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
- return -ENXIO;
-
- if (tap->ring.task)
- return -EBUSY;
-
- filp->private_data = tap;
- tap->ring.task = current;
-
- return 0;
-}
-
-static int
-blktap_ring_release(struct inode *inode, struct file *filp)
-{
- struct blktap *tap = filp->private_data;
-
- blktap_device_destroy_sync(tap);
-
- tap->ring.task = NULL;
-
- if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
- blktap_control_destroy_tap(tap);
-
- return 0;
-}
-
-static int
-blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- struct blktap *tap = filp->private_data;
- struct blktap_ring *ring = &tap->ring;
- blktap_sring_t *sring;
- struct page *page = NULL;
- int err;
-
- if (ring->vma)
- return -EBUSY;
-
- page = alloc_page(GFP_KERNEL|__GFP_ZERO);
- if (!page)
- return -ENOMEM;
-
- SetPageReserved(page);
-
- err = vm_insert_page(vma, vma->vm_start, page);
- if (err)
- goto fail;
-
- sring = page_address(page);
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
-
- ring->ring_vstart = vma->vm_start;
- ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
-
- vma->vm_private_data = tap;
-
- vma->vm_flags |= VM_DONTCOPY;
- vma->vm_flags |= VM_RESERVED;
-
- vma->vm_ops = &blktap_ring_vm_operations;
-
- ring->vma = vma;
- return 0;
-
-fail:
- if (page) {
- zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
- ClearPageReserved(page);
- __free_page(page);
- }
-
- return err;
-}
-
-static int
-blktap_ring_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct blktap *tap = filp->private_data;
- struct blktap_ring *ring = &tap->ring;
- void __user *ptr = (void *)arg;
- int err;
-
- BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
-
- if (!ring->vma || ring->vma->vm_mm != current->mm)
- return -EACCES;
-
- switch(cmd) {
- case BLKTAP_IOCTL_RESPOND:
-
- blktap_read_ring(tap);
- return 0;
-
- case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: {
- struct blktap_device_info info;
- struct blktap2_params params;
-
- if (copy_from_user(¶ms, ptr, sizeof(params)))
- return -EFAULT;
-
- info.capacity = params.capacity;
- info.sector_size = params.sector_size;
- info.flags = 0;
-
- err = blktap_device_create(tap, &info);
- if (err)
- return err;
-
- if (params.name[0]) {
- strncpy(tap->name, params.name, sizeof(params.name));
- tap->name[sizeof(tap->name)-1] = 0;
- }
-
- return 0;
- }
-
- case BLKTAP_IOCTL_CREATE_DEVICE: {
- struct blktap_device_info __user *ptr = (void *)arg;
- struct blktap_device_info info;
- unsigned long mask;
- size_t base_sz, sz;
-
- mask = BLKTAP_DEVICE_FLAG_RO;
-
- memset(&info, 0, sizeof(info));
- sz = base_sz = BLKTAP_INFO_SIZE_AT(flags);
-
- if (copy_from_user(&info, ptr, sz))
- return -EFAULT;
-
- if (sz > base_sz)
- if (copy_from_user(&info, ptr, sz))
- return -EFAULT;
-
- if (put_user(info.flags & mask, &ptr->flags))
- return -EFAULT;
-
- return blktap_device_create(tap, &info);
- }
-
- case BLKTAP_IOCTL_REMOVE_DEVICE:
-
- return blktap_device_destroy(tap);
- }
-
- return -ENOIOCTLCMD;
-}
-
-static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
-{
- struct blktap *tap = filp->private_data;
- struct blktap_ring *ring = &tap->ring;
- int work;
-
- poll_wait(filp, &tap->pool->wait, wait);
- poll_wait(filp, &ring->poll_wait, wait);
-
- down_read(¤t->mm->mmap_sem);
- if (ring->vma && tap->device.gd)
- blktap_device_run_queue(tap);
- up_read(¤t->mm->mmap_sem);
-
- work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
- RING_PUSH_REQUESTS(&ring->ring);
-
- if (work ||
- ring->ring.sring->private.tapif_user.msg ||
- test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
- return POLLIN | POLLRDNORM;
-
- return 0;
-}
-
-static struct file_operations blktap_ring_file_operations = {
- .owner = THIS_MODULE,
- .open = blktap_ring_open,
- .release = blktap_ring_release,
- .ioctl = blktap_ring_ioctl,
- .mmap = blktap_ring_mmap,
- .poll = blktap_ring_poll,
-};
-
-void
-blktap_ring_kick_user(struct blktap *tap)
-{
- wake_up(&tap->ring.poll_wait);
-}
-
-int
-blktap_ring_destroy(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
-
- if (ring->task || ring->vma)
- return -EBUSY;
-
- return 0;
-}
-
-int
-blktap_ring_create(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
-
- init_waitqueue_head(&ring->poll_wait);
- ring->devno = MKDEV(blktap_ring_major, tap->minor);
-
- return 0;
-}
-
-size_t
-blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
-{
- struct blktap_ring *ring = &tap->ring;
- char *s = buf, *end = buf + size;
- int usr_idx;
-
- s += snprintf(s, end - s,
- "begin pending:%d\n", ring->n_pending);
-
- for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) {
- struct blktap_request *request;
- struct timeval *time;
- int write;
-
- request = ring->pending[usr_idx];
- if (!request)
- continue;
-
- write = request->operation == BLKTAP_OP_WRITE;
- time = &request->time;
-
- s += snprintf(s, end - s,
- "%02d: usr_idx:%02d "
- "op:%c nr_pages:%02d time:%lu.%09lu\n",
- usr_idx, request->usr_idx,
- write ? 'W' : 'R', request->nr_pages,
- time->tv_sec, time->tv_usec);
- }
-
- s += snprintf(s, end - s, "end pending\n");
-
- return s - buf;
-}
-
-
-int __init
-blktap_ring_init(void)
-{
- dev_t dev = 0;
- int err;
-
- cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations);
- blktap_ring_cdev.owner = THIS_MODULE;
-
- err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2");
- if (err < 0) {
- BTERR("error registering ring devices: %d\n", err);
- return err;
- }
-
- err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE);
- if (err) {
- BTERR("error adding ring device: %d\n", err);
- unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE);
- return err;
- }
-
- blktap_ring_major = MAJOR(dev);
- BTINFO("blktap ring major: %d\n", blktap_ring_major);
-
- return 0;
-}
-
-void
-blktap_ring_exit(void)
-{
- if (!blktap_ring_major)
- return;
-
- cdev_del(&blktap_ring_cdev);
- unregister_chrdev_region(MKDEV(blktap_ring_major, 0),
- MAX_BLKTAP_DEVICE);
-
- blktap_ring_major = 0;
-}
diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c
deleted file mode 100644
index 182de9a..0000000
--- a/drivers/xen/blktap/sysfs.c
+++ /dev/null
@@ -1,288 +0,0 @@
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/genhd.h>
-#include <linux/blkdev.h>
-
-#include "blktap.h"
-
-int blktap_debug_level = 1;
-
-static struct class *class;
-
-static ssize_t
-blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const
char *buf, size_t size)
-{
- struct blktap *tap;
-
- tap = dev_get_drvdata(dev);
- if (!tap)
- return 0;
-
- if (size >= BLKTAP_NAME_MAX)
- return -ENAMETOOLONG;
-
- if (strnlen(buf, size) != size)
- return -EINVAL;
-
- strcpy(tap->name, buf);
-
- return size;
-}
-
-static ssize_t
-blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char
*buf)
-{
- struct blktap *tap;
- ssize_t size;
-
- tap = dev_get_drvdata(dev);
- if (!tap)
- return 0;
-
- if (tap->name[0])
- size = sprintf(buf, "%s\n", tap->name);
- else
- size = sprintf(buf, "%d\n", tap->minor);
-
- return size;
-}
-static DEVICE_ATTR(name, S_IRUGO|S_IWUSR,
- blktap_sysfs_get_name, blktap_sysfs_set_name);
-
-static void
-blktap_sysfs_remove_work(struct work_struct *work)
-{
- struct blktap *tap
- = container_of(work, struct blktap, remove_work);
- blktap_control_destroy_tap(tap);
-}
-
-static ssize_t
-blktap_sysfs_remove_device(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
-{
- struct blktap *tap;
- int err;
-
- tap = dev_get_drvdata(dev);
- if (!tap)
- return size;
-
- if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
- goto wait;
-
- if (tap->ring.vma) {
- blktap_sring_t *sring = tap->ring.ring.sring;
- sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE;
- blktap_ring_kick_user(tap);
- } else {
- INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work);
- schedule_work(&tap->remove_work);
- }
-wait:
- err = wait_event_interruptible(tap->remove_wait,
- !dev_get_drvdata(dev));
- if (err)
- return err;
-
- return size;
-}
-static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
-
-static ssize_t
-blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr,
char *buf)
-{
- struct blktap *tap;
- char *s = buf, *end = buf + PAGE_SIZE;
-
- tap = dev_get_drvdata(dev);
- if (!tap)
- return 0;
-
- s += blktap_control_debug(tap, s, end - s);
-
- s += blktap_request_debug(tap, s, end - s);
-
- s += blktap_device_debug(tap, s, end - s);
-
- s += blktap_ring_debug(tap, s, end - s);
-
- return s - buf;
-}
-static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL);
-
-static ssize_t
-blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char
*buf)
-{
- struct blktap *tap;
- ssize_t rv = 0;
-
- tap = dev_get_drvdata(dev);
- if (!tap)
- return 0;
-
- if (tap->ring.task)
- rv = sprintf(buf, "%d\n", tap->ring.task->pid);
-
- return rv;
-}
-static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL);
-
-static ssize_t
-blktap_sysfs_show_pool(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct blktap *tap = dev_get_drvdata(dev);
- return sprintf(buf, "%s", kobject_name(&tap->pool->kobj));
-}
-
-static ssize_t
-blktap_sysfs_store_pool(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
-{
- struct blktap *tap = dev_get_drvdata(dev);
- struct blktap_page_pool *pool, *tmp = tap->pool;
-
- if (tap->device.gd)
- return -EBUSY;
-
- pool = blktap_page_pool_get(buf);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
-
- tap->pool = pool;
- kobject_put(&tmp->kobj);
-
- return size;
-}
-DEVICE_ATTR(pool, S_IRUSR|S_IWUSR,
- blktap_sysfs_show_pool, blktap_sysfs_store_pool);
-
-int
-blktap_sysfs_create(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
- struct device *dev;
- int err = 0;
-
- init_waitqueue_head(&tap->remove_wait);
-
- dev = device_create(class, NULL, ring->devno,
- tap, "blktap%d", tap->minor);
- if (IS_ERR(dev))
- err = PTR_ERR(dev);
- if (!err)
- err = device_create_file(dev, &dev_attr_name);
- if (!err)
- err = device_create_file(dev, &dev_attr_remove);
- if (!err)
- err = device_create_file(dev, &dev_attr_debug);
- if (!err)
- err = device_create_file(dev, &dev_attr_task);
- if (!err)
- err = device_create_file(dev, &dev_attr_pool);
- if (!err)
- ring->dev = dev;
- else
- device_unregister(dev);
-
- return err;
-}
-
-void
-blktap_sysfs_destroy(struct blktap *tap)
-{
- struct blktap_ring *ring = &tap->ring;
- struct device *dev;
-
- dev = ring->dev;
-
- if (!dev)
- return;
-
- dev_set_drvdata(dev, NULL);
- wake_up(&tap->remove_wait);
-
- device_unregister(dev);
- ring->dev = NULL;
-}
-
-static ssize_t
-blktap_sysfs_show_verbosity(struct class *class, char *buf)
-{
- return sprintf(buf, "%d\n", blktap_debug_level);
-}
-
-static ssize_t
-blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
-{
- int level;
-
- if (sscanf(buf, "%d", &level) == 1) {
- blktap_debug_level = level;
- return size;
- }
-
- return -EINVAL;
-}
-static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR,
- blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
-
-static ssize_t
-blktap_sysfs_show_devices(struct class *class, char *buf)
-{
- int i, ret;
- struct blktap *tap;
-
- mutex_lock(&blktap_lock);
-
- ret = 0;
- for (i = 0; i < blktap_max_minor; i++) {
- tap = blktaps[i];
- if (!tap)
- continue;
-
- if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
- continue;
-
- ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name);
- }
-
- mutex_unlock(&blktap_lock);
-
- return ret;
-}
-static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL);
-
-void
-blktap_sysfs_exit(void)
-{
- if (class)
- class_destroy(class);
-}
-
-int __init
-blktap_sysfs_init(void)
-{
- struct class *cls;
- int err = 0;
-
- cls = class_create(THIS_MODULE, "blktap2");
- if (IS_ERR(cls))
- err = PTR_ERR(cls);
- if (!err)
- err = class_create_file(cls, &class_attr_verbosity);
- if (!err)
- err = class_create_file(cls, &class_attr_devices);
- if (!err)
- class = cls;
- else
- class_destroy(cls);
-
- return err;
-}
--
1.7.0.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|