WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2/5] blktap: Make VMAs non-foreign and bounce buffere

To: Xen <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 2/5] blktap: Make VMAs non-foreign and bounce buffered.
From: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
Date: Fri, 12 Nov 2010 15:31:44 -0800
Cc: Jeremy Fitzhardinge <jeremy@xxxxxxxx>, Daniel Stodden <daniel.stodden@xxxxxxxxxx>
Delivery-date: Fri, 12 Nov 2010 15:39:17 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1289604707-13378-1-git-send-email-daniel.stodden@xxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1289604707-13378-1-git-send-email-daniel.stodden@xxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Drop zero-copy I/O. Removes all grantmap mechanism from blktap,
bouncing I/O from/to a pool of dom0 memory and request
SGs. Essentially renders blktap without any residual dependencies on
Xen whatsoever.

Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
---
 drivers/xen/blktap/blktap.h  |   43 ++--
 drivers/xen/blktap/control.c |    8 +-
 drivers/xen/blktap/device.c  |  564 ++++++------------------------------------
 drivers/xen/blktap/request.c |   20 ++-
 drivers/xen/blktap/ring.c    |  319 +++++++++++++------------
 5 files changed, 285 insertions(+), 669 deletions(-)

diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
index ad79c15..fe63fc9 100644
--- a/drivers/xen/blktap/blktap.h
+++ b/drivers/xen/blktap/blktap.h
@@ -7,7 +7,6 @@
 #include <linux/init.h>
 #include <linux/scatterlist.h>
 #include <xen/blkif.h>
-#include <xen/grant_table.h>
 
 extern int blktap_debug_level;
 extern int blktap_ring_major;
@@ -27,7 +26,6 @@ extern int blktap_device_major;
 
 #define MAX_BLKTAP_DEVICE            1024
 
-#define BLKTAP_CONTROL               1
 #define BLKTAP_DEVICE                4
 #define BLKTAP_DEVICE_CLOSED         5
 #define BLKTAP_SHUTDOWN_REQUESTED    8
@@ -94,11 +92,13 @@ struct blktap_ring {
        struct task_struct            *task;
 
        struct vm_area_struct         *vma;
-       struct blkif_front_ring             ring;
-       struct vm_foreign_map          foreign_map;
+       struct blkif_front_ring        ring;
        unsigned long                  ring_vstart;
        unsigned long                  user_vstart;
 
+       int                            n_pending;
+       struct blktap_request         *pending[MAX_PENDING_REQS];
+
        wait_queue_head_t              poll_wait;
 
        dev_t                          devno;
@@ -123,19 +123,21 @@ struct blktap_statistics {
 struct blktap_request {
        struct blktap                 *tap;
        struct request                *rq;
-       uint16_t                       usr_idx;
-
-       uint8_t                        status;
-       atomic_t                       pendcnt;
-       unsigned short                 operation;
+       int                            usr_idx;
 
+       int                            operation;
        struct timeval                 time;
-       struct grant_handle_pair       handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 
+       struct scatterlist             sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page                   *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        int                            nr_pages;
 };
 
+#define blktap_for_each_sg(_sg, _req, _i)      \
+       for (_sg = (_req)->sg_table, _i = 0;    \
+            _i < (_req)->nr_pages;             \
+            (_sg)++, (_i)++)
+
 struct blktap {
        int                            minor;
        unsigned long                  dev_inuse;
@@ -144,10 +146,6 @@ struct blktap {
        struct blktap_device           device;
        struct blktap_page_pool       *pool;
 
-       int                            pending_cnt;
-       struct blktap_request         *pending_requests[MAX_PENDING_REQS];
-       struct scatterlist             sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-
        wait_queue_head_t              remove_wait;
        struct work_struct             remove_work;
        char                           name[BLKTAP2_MAX_MESSAGE_LEN];
@@ -174,6 +172,13 @@ void blktap_ring_exit(void);
 size_t blktap_ring_debug(struct blktap *, char *, size_t);
 int blktap_ring_create(struct blktap *);
 int blktap_ring_destroy(struct blktap *);
+struct blktap_request *blktap_ring_make_request(struct blktap *);
+void blktap_ring_free_request(struct blktap *,struct blktap_request *);
+void blktap_ring_submit_request(struct blktap *, struct blktap_request *);
+int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, 
int);
+int blktap_ring_map_request(struct blktap *, struct blktap_request *);
+void blktap_ring_unmap_request(struct blktap *, struct blktap_request *);
+void blktap_ring_set_message(struct blktap *, int);
 void blktap_ring_kick_user(struct blktap *);
 
 int blktap_sysfs_init(void);
@@ -187,7 +192,7 @@ size_t blktap_device_debug(struct blktap *, char *, size_t);
 int blktap_device_create(struct blktap *, struct blktap_params *);
 int blktap_device_destroy(struct blktap *);
 void blktap_device_destroy_sync(struct blktap *);
-int blktap_device_run_queue(struct blktap *);
+void blktap_device_run_queue(struct blktap *);
 void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
 
 int blktap_page_pool_init(struct kobject *);
@@ -200,13 +205,5 @@ int blktap_request_get_pages(struct blktap *, struct 
blktap_request *, int);
 void blktap_request_free(struct blktap *, struct blktap_request *);
 void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
 
-static inline unsigned long
-request_to_kaddr(struct blktap_request *req, int seg)
-{
-       return (unsigned long)page_address(req->pages[seg]);
-}
-
-#define request_to_page(_request, _seg) ((_request)->pages[_seg])
-
 
 #endif
diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
index 8652e07..f339bba 100644
--- a/drivers/xen/blktap/control.c
+++ b/drivers/xen/blktap/control.c
@@ -18,13 +18,10 @@ blktap_control_get_minor(void)
        int minor;
        struct blktap *tap;
 
-       tap = kmalloc(sizeof(*tap), GFP_KERNEL);
+       tap = kzalloc(sizeof(*tap), GFP_KERNEL);
        if (unlikely(!tap))
                return NULL;
 
-       memset(tap, 0, sizeof(*tap));
-       sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
        mutex_lock(&blktap_lock);
 
        for (minor = 0; minor < blktap_max_minor; minor++)
@@ -290,9 +287,6 @@ blktap_init(void)
 {
        int err;
 
-       if (!xen_pv_domain())
-               return -ENODEV;
-
        err = blktap_device_init();
        if (err)
                goto fail;
diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
index ed95548..02e1fc8 100644
--- a/drivers/xen/blktap/device.c
+++ b/drivers/xen/blktap/device.c
@@ -2,27 +2,11 @@
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
 #include <linux/hdreg.h>
-#include <linux/module.h>
-#include <asm/tlbflush.h>
-
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 
-#include <xen/xenbus.h>
-#include <xen/interface/io/blkif.h>
-
-#include <asm/xen/page.h>
-#include <asm/xen/hypercall.h>
-
 #include "blktap.h"
 
-#include "../blkback/blkback-pagemap.h"
-
-struct blktap_grant_table {
-       int cnt;
-       struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
-};
-
 int blktap_device_major;
 
 #define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
@@ -119,526 +103,136 @@ static struct block_device_operations 
blktap_device_file_operations = {
        .getgeo    = blktap_device_getgeo
 };
 
-static int
-blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
-                   unsigned long addr, void *data)
-{
-       pte_t *pte = (pte_t *)data;
-
-       BTDBG("ptep %p -> %012llx\n", ptep, pte_val(*pte));
-       set_pte(ptep, *pte);
-       return 0;
-}
-
-static int
-blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
-{
-       return apply_to_page_range(mm, address,
-                                  PAGE_SIZE, blktap_map_uaddr_fn, &pte);
-}
-
-static int
-blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
-                    unsigned long addr, void *data)
-{
-       struct mm_struct *mm = (struct mm_struct *)data;
-
-       BTDBG("ptep %p\n", ptep);
-       pte_clear(mm, addr, ptep);
-       return 0;
-}
-
-static int
-blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
-{
-       return apply_to_page_range(mm, address,
-                                  PAGE_SIZE, blktap_umap_uaddr_fn, mm);
-}
-
-static inline void
-flush_tlb_kernel_page(unsigned long kvaddr)
-{
-       flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
-}
-
-static void
-blktap_device_end_dequeued_request(struct blktap_device *dev,
-                                  struct request *req, int error)
-{
-       unsigned long flags;
-       int ret;
-
-       //spin_lock_irq(&dev->lock);
-       spin_lock_irqsave(dev->gd->queue->queue_lock, flags);
-       ret = __blk_end_request(req, error, blk_rq_bytes(req));
-       spin_unlock_irqrestore(dev->gd->queue->queue_lock, flags);
-       //spin_unlock_irq(&dev->lock);
-
-       BUG_ON(ret);
-}
-
-static void
-blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
-{
-       uint64_t ptep;
-       int ret, usr_idx;
-       unsigned int i, cnt;
-       struct page **map, *page;
-       struct blktap_ring *ring;
-       struct grant_handle_pair *khandle;
-       unsigned long kvaddr, uvaddr, offset;
-       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
-
-       cnt     = 0;
-       ring    = &tap->ring;
-       usr_idx = request->usr_idx;
-       map     = ring->foreign_map.map;
-
-       if (!ring->vma)
-               return;
-
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               zap_page_range(ring->vma, 
-                              MMAP_VADDR(ring->user_vstart, usr_idx, 0),
-                              request->nr_pages << PAGE_SHIFT, NULL);
-
-       for (i = 0; i < request->nr_pages; i++) {
-               kvaddr = request_to_kaddr(request, i);
-               uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
-
-               khandle = request->handles + i;
-
-               if (khandle->kernel != INVALID_GRANT_HANDLE) {
-                       gnttab_set_unmap_op(&unmap[cnt], kvaddr,
-                                           GNTMAP_host_map, khandle->kernel);
-                       cnt++;
-                       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
-                                           INVALID_P2M_ENTRY);
-               }
-
-               if (khandle->user != INVALID_GRANT_HANDLE) {
-                       BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-                       if (create_lookup_pte_addr(ring->vma->vm_mm,
-                                                  uvaddr, &ptep) != 0) {
-                               BTERR("Couldn't get a pte addr!\n");
-                               return;
-                       }
-
-                       gnttab_set_unmap_op(&unmap[cnt], ptep,
-                                           GNTMAP_host_map
-                                           | GNTMAP_application_map
-                                           | GNTMAP_contains_pte,
-                                           khandle->user);
-                       cnt++;
-               }
-
-               offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
-
-               BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
-                     "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
-                     "0x%08lx, handle: %u\n", offset, map[offset], request,
-                     usr_idx, i, kvaddr, khandle->kernel, uvaddr,
-                     khandle->user);
-
-               page = map[offset];
-               if (page && blkback_pagemap_contains_page(page))
-                       set_page_private(page, 0);
-
-               map[offset] = NULL;
-
-               khandle->kernel = INVALID_GRANT_HANDLE;
-               khandle->user   = INVALID_GRANT_HANDLE;
-       }
-
-       if (cnt) {
-               ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
-                                               unmap, cnt);
-               BUG_ON(ret);
-       }
-
-       if (!xen_feature(XENFEAT_auto_translated_physmap))
-               zap_page_range(ring->vma, 
-                              MMAP_VADDR(ring->user_vstart, usr_idx, 0), 
-                              request->nr_pages << PAGE_SHIFT, NULL);
-}
-
-static void
-blktap_unmap(struct blktap *tap, struct blktap_request *request)
-{
-       int i, usr_idx;
-       unsigned long kvaddr;
-
-       usr_idx = request->usr_idx;
-
-       for (i = 0; i < request->nr_pages; i++) {
-               kvaddr = request_to_kaddr(request, i);
-               BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
-                     "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
-                     kvaddr, request->handles[i].kernel,
-                     MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
-                     request->handles[i].user);
-
-               if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
-                       blktap_umap_uaddr(current->mm, kvaddr);
-                       flush_tlb_kernel_page(kvaddr);
-                       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
-                                           INVALID_P2M_ENTRY);
-               }
-       }
-
-       blktap_device_fast_flush(tap, request);
-}
-
 void
 blktap_device_end_request(struct blktap *tap,
                          struct blktap_request *request,
                          int error)
 {
        struct blktap_device *tapdev = &tap->device;
+       struct request *rq = request->rq;
+
+       blktap_ring_unmap_request(tap, request);
+
+       blktap_ring_free_request(tap, request);
 
-       blktap_unmap(tap, request);
+       dev_dbg(&tapdev->gd->dev,
+               "end_request: op=%d error=%d bytes=%d\n",
+               rq_data_dir(rq), error, blk_rq_bytes(rq));
 
        spin_lock_irq(&tapdev->lock);
        end_request(request->rq, !error);
        spin_unlock_irq(&tapdev->lock);
-
-       blktap_request_free(tap, request);
 }
 
-static int
-blktap_prep_foreign(struct blktap *tap,
-                   struct blktap_request *request,
-                   struct blkif_request *blkif_req,
-                   unsigned int seg, struct page *page,
-                   struct blktap_grant_table *table)
-{
-       uint64_t ptep;
-       uint32_t flags;
-#ifdef BLKTAP_CHAINED_BLKTAP
-       struct page *tap_page;
-#endif
-       struct blktap_ring *ring;
-       struct blkback_pagemap map;
-       unsigned long uvaddr, kvaddr;
-
-       ring = &tap->ring;
-       map  = blkback_pagemap_read(page);
-       blkif_req->seg[seg].gref = map.gref;
-
-       uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
-       kvaddr = request_to_kaddr(request, seg);
-       flags  = GNTMAP_host_map |
-               (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
-
-       gnttab_set_map_op(&table->grants[table->cnt],
-                         kvaddr, flags, map.gref, map.domid);
-       table->cnt++;
-
-
-#ifdef BLKTAP_CHAINED_BLKTAP
-       /* enable chained tap devices */
-       tap_page = request_to_page(request, seg);
-       set_page_private(tap_page, page_private(page));
-       SetPageBlkback(tap_page);
-#endif
-
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               return 0;
-
-       if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
-               BTERR("couldn't get a pte addr!\n");
-               return -1;
-       }
-
-       flags |= GNTMAP_application_map | GNTMAP_contains_pte;
-       gnttab_set_map_op(&table->grants[table->cnt],
-                         ptep, flags, map.gref, map.domid);
-       table->cnt++;
-
-       return 0;
-}
-
-static int
-blktap_map_foreign(struct blktap *tap,
-                  struct blktap_request *request,
-                  struct blkif_request *blkif_req,
-                  struct blktap_grant_table *table)
+int
+blktap_device_make_request(struct blktap *tap, struct request *rq)
 {
-       struct page *page;
-       int i, grant, err, usr_idx;
-       struct blktap_ring *ring;
-       unsigned long uvaddr, foreign_mfn;
-
-       if (!table->cnt)
-               return 0;
+       struct blktap_device *tapdev = &tap->device;
+       struct blktap_request *request;
+       int write, nsegs;
+       int err;
 
-       err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-                                       table->grants, table->cnt);
-       BUG_ON(err);
+       request = blktap_ring_make_request(tap);
+       if (IS_ERR(request)) {
+               err = PTR_ERR(request);
+               request = NULL;
 
-       grant   = 0;
-       usr_idx = request->usr_idx;
-       ring    = &tap->ring;
+               if (err == -ENOSPC || err == -ENOMEM)
+                       goto stop;
 
-       for (i = 0; i < request->nr_pages; i++) {
-               if (!blkif_req->seg[i].gref)
-                       continue;
+               goto fail;
+       }
 
-               uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
+       write = rq_data_dir(rq) == WRITE;
+       nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
 
-               if (unlikely(table->grants[grant].status)) {
-                       BTERR("invalid kernel buffer: could not remap it\n");
-                       err |= 1;
-                       table->grants[grant].handle = INVALID_GRANT_HANDLE;
-               }
+       dev_dbg(&tapdev->gd->dev,
+               "make_request: op=%c bytes=%d nsegs=%d\n",
+               write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
 
-               request->handles[i].kernel = table->grants[grant].handle;
-               foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
-               grant++;
+       request->rq = rq;
+       request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
 
-               if (xen_feature(XENFEAT_auto_translated_physmap))
-                       goto done;
-
-               if (unlikely(table->grants[grant].status)) {
-                       BTERR("invalid user buffer: could not remap it\n");
-                       err |= 1;
-                       table->grants[grant].handle = INVALID_GRANT_HANDLE;
-               }
+       err = blktap_request_get_pages(tap, request, nsegs);
+       if (err)
+               goto stop;
 
-               request->handles[i].user = table->grants[grant].handle;
-               grant++;
+       err = blktap_ring_map_request(tap, request);
+       if (err)
+               goto fail;
 
-       done:
-               if (err)
-                       continue;
+       blktap_ring_submit_request(tap, request);
 
-               page = request_to_page(request, i);
+       return 0;
 
-               if (!xen_feature(XENFEAT_auto_translated_physmap))
-                       set_phys_to_machine(page_to_pfn(page),
-                                           FOREIGN_FRAME(foreign_mfn));
-               else if (vm_insert_page(ring->vma, uvaddr, page))
-                       err |= 1;
+stop:
+       tap->stats.st_oo_req++;
+       err = -EBUSY;
 
-               BTDBG("pending_req: %p, seg: %d, page: %p, "
-                     "kvaddr: 0x%p, khandle: %u, uvaddr: 0x%08lx, "
-                     "uhandle: %u\n", request, i, page,
-                     pfn_to_kaddr(page_to_pfn(page)),
-                     request->handles[i].kernel,
-                     uvaddr, request->handles[i].user);
-       }
+_out:
+       if (request)
+               blktap_ring_free_request(tap, request);
 
        return err;
-}
-
-static void
-blktap_map(struct blktap *tap,
-          struct blktap_request *request,
-          unsigned int seg, struct page *page)
-{
-       pte_t pte;
-       int usr_idx;
-       struct blktap_ring *ring;
-       unsigned long uvaddr, kvaddr;
-
-       ring    = &tap->ring;
-       usr_idx = request->usr_idx;
-       uvaddr  = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
-       kvaddr  = request_to_kaddr(request, seg);
-
-       pte = mk_pte(page, ring->vma->vm_page_prot);
-       blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
-       flush_tlb_page(ring->vma, uvaddr);
-       blktap_map_uaddr(ring->vma->vm_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
-       flush_tlb_kernel_page(kvaddr);
-
-       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
-       request->handles[seg].kernel = INVALID_GRANT_HANDLE;
-       request->handles[seg].user   = INVALID_GRANT_HANDLE;
-
-       BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
-             "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
-             uvaddr);
-}
-
-static int
-blktap_device_process_request(struct blktap *tap,
-                             struct blktap_request *request,
-                             struct request *req)
-{
-       struct page *page;
-       int i, usr_idx, err;
-       struct blktap_ring *ring;
-       struct scatterlist *sg;
-       struct blktap_grant_table table;
-       unsigned int fsect, lsect, nr_sects;
-       unsigned long offset, uvaddr;
-       struct blkif_request blkif_req, *target;
-
-       err = -1;
-       memset(&table, 0, sizeof(table));
-
-       ring    = &tap->ring;
-       usr_idx = request->usr_idx;
-       blkif_req.id = usr_idx;
-       blkif_req.sector_number = (blkif_sector_t)req->sector;
-       blkif_req.handle = 0;
-       blkif_req.operation = rq_data_dir(req) ?
-               BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-       request->rq        = req;
-       request->operation = blkif_req.operation;
-       request->status    = BLKTAP_REQUEST_PENDING;
-       do_gettimeofday(&request->time);
-
-       nr_sects = 0;
-       request->nr_pages = 0;
-       blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg);
-       BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
-       for (i = 0; i < blkif_req.nr_segments; ++i) {
-                       sg = tap->sg + i;
-                       fsect = sg->offset >> 9;
-                       lsect = fsect + (sg->length >> 9) - 1;
-                       nr_sects += sg->length >> 9;
-
-                       blkif_req.seg[i] =
-                               (struct blkif_request_segment) {
-                               .gref       = 0,
-                               .first_sect = fsect,
-                               .last_sect  = lsect };
-
-                       if (blkback_pagemap_contains_page(sg_page(sg))) {
-                               /* foreign page -- use xen */
-                               if (blktap_prep_foreign(tap,
-                                                       request,
-                                                       &blkif_req,
-                                                       i,
-                                                       sg_page(sg),
-                                                       &table))
-                                       goto out;
-                       } else {
-                               /* do it the old fashioned way */
-                               blktap_map(tap,
-                                          request,
-                                          i,
-                                          sg_page(sg));
-                       }
-
-                       uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
-                       offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
-                       page   = request_to_page(request, i);
-                       ring->foreign_map.map[offset] = page;
-                       SetPageReserved(page);
-
-                       BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
-                             uvaddr, page, page_to_pfn(page));
-                       BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
-                             "page: %p, kvaddr: %p, uvaddr: 0x%08lx\n",
-                             offset, request, i,
-                             page, pfn_to_kaddr(page_to_pfn(page)), uvaddr);
-
-                       request->nr_pages++;
-       }
-
-       if (blktap_map_foreign(tap, request, &blkif_req, &table))
-               goto out;
-
-       /* Finally, write the request message to the user ring. */
-       target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
-       memcpy(target, &blkif_req, sizeof(blkif_req));
-       target->id = request->usr_idx;
-       wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
-       ring->ring.req_prod_pvt++;
-
-       if (rq_data_dir(req)) {
-               tap->stats.st_wr_sect += nr_sects;
-               tap->stats.st_wr_req++;
-       } else {
-               tap->stats.st_rd_sect += nr_sects;
-               tap->stats.st_rd_req++;
-       }
-
-       err = 0;
-
-out:
-       if (err)
-               blktap_device_fast_flush(tap, request);
-       return err;
+fail:
+       if (printk_ratelimit())
+               dev_warn(&tapdev->gd->dev,
+                        "make request: %d, failing\n", err);
+       goto _out;
 }
 
 /*
  * called from tapdisk context
  */
-int
+void
 blktap_device_run_queue(struct blktap *tap)
 {
-       int err, rv;
-       struct request_queue *rq;
-       struct request *req;
-       struct blktap_ring *ring;
-       struct blktap_device *dev;
-       struct blktap_request *request;
-
-       ring   = &tap->ring;
-       dev    = &tap->device;
-       rq     = dev->gd->queue;
+       struct blktap_device *tapdev = &tap->device;
+       struct request_queue *q;
+       struct request *rq;
+       int err;
 
-       BTDBG("running queue for %d\n", tap->minor);
-       spin_lock_irq(&dev->lock);
-       queue_flag_clear(QUEUE_FLAG_STOPPED, rq);
+       if (!tapdev->gd)
+               return;
 
-       while ((req = elv_next_request(rq)) != NULL) {
-               if (!blk_fs_request(req)) {
-                       end_request(req, 0);
-                       continue;
-               }
+       q = tapdev->gd->queue;
 
-               if (blk_empty_barrier_rq(req)) {
-                       end_request(req, 1);
-                       continue;
-               }
+       spin_lock_irq(&tapdev->lock);
+       queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 
-               if (RING_FULL(&ring->ring)) {
-               wait:
-                       /* Avoid pointless unplugs. */
-                       blk_stop_queue(rq);
+       do {
+               rq = elv_next_request(q);
+               if (!rq)
                        break;
+
+               if (!blk_fs_request(rq)) {
+                       end_queued_request(rq, 0);
+                       continue;
                }
 
-               request = blktap_request_alloc(tap);
-               if (!request) {
-                       tap->stats.st_oo_req++;
-                       goto wait;
+               if (blk_empty_barrier(rq)) {
+                       end_queued_request(rq, 1);
+                       continue;
                }
 
-               BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
-                     "buffer:%p [%s], pending: %p\n", req, tap->minor,
-                     req->cmd, (unsigned long long)req->sector,
-                     req->current_nr_sectors,
-                     req->nr_sectors, req->buffer,
-                     rq_data_dir(req) ? "write" : "read", request);
+               spin_unlock_irq(&tapdev->lock);
 
-               blkdev_dequeue_request(req);
+               err = blktap_device_make_request(tap, rq);
 
-               spin_unlock_irq(&dev->lock);
+               spin_lock_irq(&tapdev->lock);
 
-               err = blktap_device_process_request(tap, request, req);
-               if (err) {
-                       blktap_device_end_dequeued_request(dev, req, -EIO);
-                       blktap_request_free(tap, request);
+               if (err == -EBUSY) {
+                       blk_stop_queue(q);
+                       break;
                }
 
-               spin_lock_irq(&dev->lock);
-       }
-
-       spin_unlock_irq(&dev->lock);
-
-       rv = ring->ring.req_prod_pvt -
-               ring->ring.sring->req_prod;
+               blkdev_dequeue_request(req);
 
-       RING_PUSH_REQUESTS(&ring->ring);
+               if (unlikely(err))
+                       end_request(rq, 0);
+       } while (1);
 
-       return rv;
+       spin_unlock_irq(&tapdev->lock);
 }
 
 static void
diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
index ca12442..9bef48c 100644
--- a/drivers/xen/blktap/request.c
+++ b/drivers/xen/blktap/request.c
@@ -3,7 +3,6 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/device.h>
-#include <xen/balloon.h>
 
 #include "blktap.h"
 
@@ -129,6 +128,25 @@ blktap_request_free(struct blktap *tap,
        __page_pool_wake(tap->pool);
 }
 
+void
+blktap_request_bounce(struct blktap *tap,
+                     struct blktap_request *request,
+                     int seg, int write)
+{
+       struct scatterlist *sg = &request->sg_table[seg];
+       void *s, *p;
+
+       BUG_ON(seg >= request->nr_pages);
+
+       s = sg_virt(sg);
+       p = page_address(request->pages[seg]) + sg->offset;
+
+       if (write)
+               memcpy(p, s, sg->length);
+       else
+               memcpy(s, p, sg->length);
+}
+
 static void
 blktap_request_ctor(void *obj)
 {
diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
index a72a1b3..38896e7 100644
--- a/drivers/xen/blktap/ring.c
+++ b/drivers/xen/blktap/ring.c
@@ -1,30 +1,15 @@
+
 #include <linux/device.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/poll.h>
-
-#include <asm/xen/page.h>
-#include <asm/xen/hypercall.h>
+#include <linux/blkdev.h>
 
 #include "blktap.h"
 
-#ifdef CONFIG_XEN_BLKDEV_BACKEND
-#include "../blkback/blkback-pagemap.h"
-#else
-#define blkback_pagemap_contains_page(page) 0
-#endif
-
 int blktap_ring_major;
 static struct cdev blktap_ring_cdev;
 
-static inline struct blktap *
-vma_to_blktap(struct vm_area_struct *vma)
-{
-       struct vm_foreign_map *m = vma->vm_private_data;
-       struct blktap_ring *r = container_of(m, struct blktap_ring, 
foreign_map);
-       return container_of(r, struct blktap, ring);
-}
-
  /* 
   * BLKTAP - immediately before the mmap area,
   * we have a bunch of pages reserved for shared memory rings.
@@ -47,7 +32,7 @@ blktap_ring_read_response(struct blktap *tap,
                goto invalid;
        }
 
-       request = tap->pending_requests[usr_idx];
+       request = ring->pending[usr_idx];
 
        if (!request) {
                err = -ESRCH;
@@ -110,90 +95,15 @@ static int blktap_ring_fault(struct vm_area_struct *vma, 
struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-static pte_t
-blktap_ring_clear_pte(struct vm_area_struct *vma,
-                     unsigned long uvaddr,
-                     pte_t *ptep, int is_fullmm)
-{
-       pte_t copy;
-       struct blktap *tap;
-       unsigned long kvaddr;
-       struct page **map, *page;
-       struct blktap_ring *ring;
-       struct blktap_request *request;
-       struct grant_handle_pair *khandle;
-       struct gnttab_unmap_grant_ref unmap[2];
-       int offset, seg, usr_idx, count = 0;
-
-       tap  = vma_to_blktap(vma);
-       ring = &tap->ring;
-       map  = ring->foreign_map.map;
-       BUG_ON(!map);   /* TODO Should this be changed to if statement? */
-
-       /*
-        * Zap entry if the address is before the start of the grant
-        * mapped region.
-        */
-       if (uvaddr < ring->user_vstart)
-               return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
-                                              ptep, is_fullmm);
-
-       offset  = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT);
-       usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       seg     = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-       offset  = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
-       page    = map[offset];
-       if (page && blkback_pagemap_contains_page(page))
-               set_page_private(page, 0);
-       map[offset] = NULL;
-
-       request = tap->pending_requests[usr_idx];
-       kvaddr  = request_to_kaddr(request, seg);
-       khandle = request->handles + seg;
-
-       if (khandle->kernel != INVALID_GRANT_HANDLE) {
-               gnttab_set_unmap_op(&unmap[count], kvaddr, 
-                                   GNTMAP_host_map, khandle->kernel);
-               count++;
-
-               set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, 
-                                   INVALID_P2M_ENTRY);
-       }
-
-       if (khandle->user != INVALID_GRANT_HANDLE) {
-               BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-
-               copy = *ptep;
-               gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep).maddr,
-                                   GNTMAP_host_map
-                                   | GNTMAP_application_map
-                                   | GNTMAP_contains_pte,
-                                   khandle->user);
-               count++;
-       } else
-               copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
-                                              is_fullmm);
-
-       if (count)
-               if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
-                                             unmap, count))
-                       BUG();
-
-       khandle->kernel = INVALID_GRANT_HANDLE;
-       khandle->user   = INVALID_GRANT_HANDLE;
-
-       return copy;
-}
-
 static void
 blktap_ring_fail_pending(struct blktap *tap)
 {
+       struct blktap_ring *ring = &tap->ring;
        struct blktap_request *request;
        int usr_idx;
 
        for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
-               request = tap->pending_requests[usr_idx];
+               request = ring->pending[usr_idx];
                if (!request)
                        continue;
 
@@ -204,15 +114,12 @@ blktap_ring_fail_pending(struct blktap *tap)
 static void
 blktap_ring_vm_close(struct vm_area_struct *vma)
 {
-       struct blktap *tap = vma_to_blktap(vma);
+       struct blktap *tap = vma->vm_private_data;
        struct blktap_ring *ring = &tap->ring;
        struct page *page = virt_to_page(ring->ring.sring);
 
        blktap_ring_fail_pending(tap);
 
-       kfree(ring->foreign_map.map);
-       ring->foreign_map.map = NULL;
-
        zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
        ClearPageReserved(page);
        __free_page(page);
@@ -226,9 +133,154 @@ blktap_ring_vm_close(struct vm_area_struct *vma)
 static struct vm_operations_struct blktap_ring_vm_operations = {
        .close    = blktap_ring_vm_close,
        .fault    = blktap_ring_fault,
-       .zap_pte  = blktap_ring_clear_pte,
 };
 
+int
+blktap_ring_map_segment(struct blktap *tap,
+                       struct blktap_request *request,
+                       int seg)
+{
+       struct blktap_ring *ring = &tap->ring;
+       unsigned long uaddr;
+
+       uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
+       return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
+}
+
+int
+blktap_ring_map_request(struct blktap *tap,
+                       struct blktap_request *request)
+{
+       int seg, err = 0;
+       int write;
+
+       write = request->operation == BLKIF_OP_WRITE;
+
+       for (seg = 0; seg < request->nr_pages; seg++) {
+               if (write)
+                       blktap_request_bounce(tap, request, seg, write);
+
+               err = blktap_ring_map_segment(tap, request, seg);
+               if (err)
+                       break;
+       }
+
+       if (err)
+               blktap_ring_unmap_request(tap, request);
+
+       return err;
+}
+
+void
+blktap_ring_unmap_request(struct blktap *tap,
+                         struct blktap_request *request)
+{
+       struct blktap_ring *ring = &tap->ring;
+       unsigned long uaddr;
+       unsigned size;
+       int seg, read;
+
+       uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
+       size  = request->nr_pages << PAGE_SHIFT;
+       read  = request->operation == BLKIF_OP_READ;
+
+       if (read)
+               for (seg = 0; seg < request->nr_pages; seg++)
+                       blktap_request_bounce(tap, request, seg, !read);
+
+       zap_page_range(ring->vma, uaddr, size, NULL);
+}
+
+void
+blktap_ring_free_request(struct blktap *tap,
+                        struct blktap_request *request)
+{
+       struct blktap_ring *ring = &tap->ring;
+
+       ring->pending[request->usr_idx] = NULL;
+       ring->n_pending--;
+
+       blktap_request_free(tap, request);
+}
+
+struct blktap_request*
+blktap_ring_make_request(struct blktap *tap)
+{
+       struct blktap_ring *ring = &tap->ring;
+       struct blktap_request *request;
+       int usr_idx;
+
+       if (RING_FULL(&ring->ring))
+               return ERR_PTR(-ENOSPC);
+
+       request = blktap_request_alloc(tap);
+       if (!request)
+               return ERR_PTR(-ENOMEM);
+
+       for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++)
+               if (!ring->pending[usr_idx])
+                       break;
+
+       BUG_ON(usr_idx >= BLK_RING_SIZE);
+
+       request->tap     = tap;
+       request->usr_idx = usr_idx;
+
+       ring->pending[usr_idx] = request;
+       ring->n_pending++;
+
+       return request;
+}
+
+void
+blktap_ring_submit_request(struct blktap *tap,
+                          struct blktap_request *request)
+{
+       struct blktap_ring *ring = &tap->ring;
+       struct blkif_request *breq;
+       struct scatterlist *sg;
+       int i, nsecs = 0;
+
+       dev_dbg(ring->dev,
+               "request %d [%p] submit\n", request->usr_idx, request);
+
+       breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
+
+       breq->id            = request->usr_idx;
+       breq->sector_number = request->rq->sector;
+       breq->handle        = 0;
+       breq->operation     = request->operation;
+       breq->nr_segments   = request->nr_pages;
+
+       blktap_for_each_sg(sg, request, i) {
+               struct blkif_request_segment *seg = &breq->seg[i];
+               int first, count;
+
+               count = sg->length >> 9;
+               first = sg->offset >> 9;
+
+               seg->first_sect = first;
+               seg->last_sect  = first + count - 1;
+
+               nsecs += count;
+       }
+
+       ring->ring.req_prod_pvt++;
+
+       do_gettimeofday(&request->time);
+
+
+       if (request->operation == BLKIF_OP_WRITE) {
+               tap->stats.st_wr_sect += nsecs;
+               tap->stats.st_wr_req++;
+       }
+
+       if (request->operation == BLKIF_OP_READ) {
+               tap->stats.st_rd_sect += nsecs;
+               tap->stats.st_rd_req++;
+       }
+}
+
 static int
 blktap_ring_open(struct inode *inode, struct file *filp)
 {
@@ -270,51 +322,21 @@ blktap_ring_release(struct inode *inode, struct file 
*filp)
        return 0;
 }
 
-/* Note on mmap:
- * We need to map pages to user space in a way that will allow the block
- * subsystem set up direct IO to them.  This couldn't be done before, because
- * there isn't really a sane way to translate a user virtual address down to a 
- * physical address when the page belongs to another domain.
- *
- * My first approach was to map the page in to kernel memory, add an entry
- * for it in the physical frame list (using alloc_lomem_region as in blkback)
- * and then attempt to map that page up to user space.  This is disallowed
- * by xen though, which realizes that we don't really own the machine frame
- * underlying the physical page.
- *
- * The new approach is to provide explicit support for this in xen linux.
- * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
- * mapped from other vms.  vma->vm_private_data is set up as a mapping 
- * from pages to actual page structs.  There is a new clause in get_user_pages
- * that does the right thing for this sort of mapping.
- */
 static int
 blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
 {
        struct blktap *tap = filp->private_data;
        struct blktap_ring *ring = &tap->ring;
        struct blkif_sring *sring;
-       struct page *page;
-       int size, err;
-       struct page **map;
-
-       map   = NULL;
-       sring = NULL;
+       struct page *page = NULL;
+       int err;
 
        if (ring->vma)
                return -EBUSY;
 
-       size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-       if (size != (MMAP_PAGES + RING_PAGES)) {
-               BTERR("you _must_ map exactly %lu pages!\n",
-                     MMAP_PAGES + RING_PAGES);
-               return -EAGAIN;
-       }
-
-       /* allocate the shared ring */
        page = alloc_page(GFP_KERNEL|__GFP_ZERO);
        if (!page)
-               goto fail;
+               return -ENOMEM;
 
        SetPageReserved(page);
 
@@ -329,22 +351,12 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct 
*vma)
        ring->ring_vstart = vma->vm_start;
        ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
 
-       /* allocate the foreign map */
-       map = kzalloc(size * sizeof(struct page *), GFP_KERNEL);
-       if (!map)
-               goto fail;
+       vma->vm_private_data = tap;
 
-       /* Mark this VM as containing foreign pages, and set up mappings. */
-       ring->foreign_map.map = map;
-       vma->vm_private_data = &ring->foreign_map;
-       vma->vm_flags |= VM_FOREIGN;
        vma->vm_flags |= VM_DONTCOPY;
        vma->vm_flags |= VM_RESERVED;
-       vma->vm_ops = &blktap_ring_vm_operations;
 
-#ifdef CONFIG_X86
-       vma->vm_mm->context.has_foreign_mappings = 1;
-#endif
+       vma->vm_ops = &blktap_ring_vm_operations;
 
        ring->vma = vma;
        return 0;
@@ -356,10 +368,7 @@ fail:
                __free_page(page);
        }
 
-       if (map)
-               kfree(map);
-
-       return -ENOMEM;
+       return err;
 }
 
 static int
@@ -405,16 +414,19 @@ static unsigned int blktap_ring_poll(struct file *filp, 
poll_table *wait)
 {
        struct blktap *tap = filp->private_data;
        struct blktap_ring *ring = &tap->ring;
-       int work = 0;
+       int work;
 
        poll_wait(filp, &tap->pool->wait, wait);
        poll_wait(filp, &ring->poll_wait, wait);
 
        down_read(&current->mm->mmap_sem);
        if (ring->vma && tap->device.gd)
-               work = blktap_device_run_queue(tap);
+               blktap_device_run_queue(tap);
        up_read(&current->mm->mmap_sem);
 
+       work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
+       RING_PUSH_REQUESTS(&ring->ring);
+
        if (work ||
            ring->ring.sring->private.tapif_user.msg ||
            test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
@@ -463,18 +475,19 @@ blktap_ring_create(struct blktap *tap)
 size_t
 blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
 {
+       struct blktap_ring *ring = &tap->ring;
        char *s = buf, *end = buf + size;
        int usr_idx;
 
        s += snprintf(s, end - s,
-                     "begin pending:%d\n", tap->pending_cnt);
+                     "begin pending:%d\n", ring->n_pending);
 
        for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
                struct blktap_request *request;
                struct timeval *time;
                int write;
 
-               request = tap->pending_requests[usr_idx];
+               request = ring->pending[usr_idx];
                if (!request)
                        continue;
 
-- 
1.7.0.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel