The main cleanup item is to convert the index map from a plain "unsigned long" holding two merged together integers into a proper structure (at once halving its size on 64-bit). Additionally, add a previously missing range check of res.id in blktap_read_ufe_ring() and remove blkif_reqs (which could have been set via module load option to a value other than the default, which code elsewhere isn't capable of handling). Signed-off-by: Jan Beulich --- sle10sp4-2011-02-01.orig/drivers/xen/blktap/blktap.c +++ sle10sp4-2011-02-01/drivers/xen/blktap/blktap.c @@ -79,7 +79,6 @@ (_start + \ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ ((_seg) * PAGE_SIZE)) -static int blkif_reqs = MAX_PENDING_REQS; static int mmap_pages = MMAP_PAGES; #define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we @@ -113,7 +112,9 @@ typedef struct tap_blkif { pid_t pid; /*tapdisk process id */ enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace shutdown */ - unsigned long *idx_map; /*Record the user ring id to kern + struct idx_map { + u16 mem, req; + } *idx_map; /*Record the user ring id to kern [req id, idx] tuple */ blkif_t *blkif; /*Associate blkif with tapdev */ struct domid_translate_ext trans; /*Translation from domid to bus. */ @@ -123,7 +124,6 @@ typedef struct tap_blkif { static struct tap_blkif *tapfds[MAX_TAP_DEV]; static int blktap_next_minor; -module_param(blkif_reqs, int, 0); /* Run-time switchable: /sys/module/blktap/parameters/ */ static unsigned int log_stats = 0; static unsigned int debug_lvl = 0; @@ -154,12 +154,6 @@ static DEFINE_SPINLOCK(pending_free_lock static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq); static int alloc_pending_reqs; -typedef unsigned int PEND_RING_IDX; - -static inline int MASK_PEND_IDX(int i) { - return (i & (MAX_PENDING_REQS-1)); -} - static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) { return (req - pending_reqs[idx]); } @@ -249,40 +243,26 @@ static inline int BLKTAP_MODE_VALID(unsi * ring ID. */ -static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx) -{ - return ((fe_dom << 16) | MASK_PEND_IDX(idx)); -} - -extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) -{ - return (PEND_RING_IDX)(id & 0x0000ffff); -} - -extern inline int ID_TO_MIDX(unsigned long id) -{ - return (int)(id >> 16); -} - -#define INVALID_REQ 0xdead0000 +#define INVALID_MIDX 0xdead /*TODO: Convert to a free list*/ -static inline int GET_NEXT_REQ(unsigned long *idx_map) +static inline unsigned int GET_NEXT_REQ(const struct idx_map *idx_map) { - int i; + unsigned int i; + for (i = 0; i < MAX_PENDING_REQS; i++) - if (idx_map[i] == INVALID_REQ) - return i; + if (idx_map[i].mem == INVALID_MIDX) + break; - return INVALID_REQ; + return i; } -static inline int OFFSET_TO_USR_IDX(int offset) +static inline unsigned int OFFSET_TO_USR_IDX(unsigned long offset) { return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST; } -static inline int OFFSET_TO_SEG(int offset) +static inline unsigned int OFFSET_TO_SEG(unsigned long offset) { return offset % BLKIF_MAX_SEGMENTS_PER_REQUEST; } @@ -319,13 +299,11 @@ static pte_t blktap_clear_pte(struct vm_ { pte_t copy; tap_blkif_t *info = NULL; - int offset, seg, usr_idx, pending_idx, mmap_idx; - unsigned long uvstart = 0; - unsigned long kvaddr; + unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0; + unsigned long offset, uvstart = 0; struct page *pg; struct grant_handle_pair *khandle; struct gnttab_unmap_grant_ref unmap[2]; - int count = 0; /* * If the address is before the start of the grant mapped region or @@ -343,14 +321,13 @@ static pte_t blktap_clear_pte(struct vm_ BUG_ON(!info); BUG_ON(!info->idx_map); - offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT); + offset = (uvaddr - uvstart) >> PAGE_SHIFT; usr_idx = OFFSET_TO_USR_IDX(offset); seg = OFFSET_TO_SEG(offset); - pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); - mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); + pending_idx = info->idx_map[usr_idx].req; + mmap_idx = info->idx_map[usr_idx].mem; - kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg); pg = idx_to_page(mmap_idx, pending_idx, seg); ClearPageReserved(pg); info->foreign_map.map[offset + RING_PAGES] = NULL; @@ -358,12 +335,14 @@ static pte_t blktap_clear_pte(struct vm_ khandle = &pending_handle(mmap_idx, pending_idx, seg); if (khandle->kernel != INVALID_GRANT_HANDLE) { - gnttab_set_unmap_op(&unmap[count], kvaddr, + unsigned long pfn = page_to_pfn(pg); + + gnttab_set_unmap_op(&unmap[count], + (unsigned long)pfn_to_kaddr(pfn), GNTMAP_host_map, khandle->kernel); count++; - set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, - INVALID_P2M_ENTRY); + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (khandle->user != INVALID_GRANT_HANDLE) { @@ -624,7 +603,7 @@ static int blktap_open(struct inode *ino filp->private_data = info; info->mm = NULL; - info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, + info->idx_map = kmalloc(sizeof(*info->idx_map) * MAX_PENDING_REQS, GFP_KERNEL); if (info->idx_map == NULL) @@ -632,8 +611,10 @@ static int blktap_open(struct inode *ino if (idx > 0) { init_waitqueue_head(&info->wait); - for (i = 0; i < MAX_PENDING_REQS; i++) - info->idx_map[i] = INVALID_REQ; + for (i = 0; i < MAX_PENDING_REQS; i++) { + info->idx_map[i].mem = INVALID_MIDX; + info->idx_map[i].req = ~0; + } } DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx); @@ -890,11 +871,9 @@ static int blktap_ioctl(struct inode *in return blktap_major; case BLKTAP_QUERY_ALLOC_REQS: - { - WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n", - alloc_pending_reqs, blkif_reqs); - return (alloc_pending_reqs/blkif_reqs) * 100; - } + WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%lu\n", + alloc_pending_reqs, MAX_PENDING_REQS); + return (alloc_pending_reqs/MAX_PENDING_REQS) * 100; } return -ENOIOCTLCMD; } @@ -949,14 +928,14 @@ static int req_increase(void) return -EINVAL; pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) - * blkif_reqs, GFP_KERNEL); + * MAX_PENDING_REQS, GFP_KERNEL); foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages); if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc]) goto out_of_memory; - DPRINTK("%s: reqs=%d, pages=%d\n", - __FUNCTION__, blkif_reqs, mmap_pages); + DPRINTK("%s: reqs=%lu, pages=%d\n", + __FUNCTION__, MAX_PENDING_REQS, mmap_pages); for (i = 0; i < MAX_PENDING_REQS; i++) { list_add_tail(&pending_reqs[mmap_alloc][i].free_list, @@ -1056,14 +1035,14 @@ static void blktap_zap_page_range(struct } } -static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, - int tapidx) +static void fast_flush_area(pending_req_t *req, unsigned int k_idx, + unsigned int u_idx, int tapidx) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; - unsigned int i, invcount = 0, locked = 0; + unsigned int i, mmap_idx, invcount = 0, locked = 0; struct grant_handle_pair *khandle; uint64_t ptep; - int ret, mmap_idx; + int ret; unsigned long uvaddr; tap_blkif_t *info; struct mm_struct *mm; @@ -1218,7 +1197,7 @@ static int blktap_read_ufe_ring(tap_blki RING_IDX i, j, rp; blkif_response_t *resp; blkif_t *blkif=NULL; - int pending_idx, usr_idx, mmap_idx; + unsigned int pending_idx, usr_idx, mmap_idx; pending_req_t *pending_req; if (!info) @@ -1240,18 +1219,23 @@ static int blktap_read_ufe_ring(tap_blki ++info->ufe_ring.rsp_cons; /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/ - usr_idx = (int)res.id; - pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); - mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); - - if ( (mmap_idx >= mmap_alloc) || - (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) ) - WPRINTK("Incorrect req map" - "[%d], internal map [%d,%d (%d)]\n", - usr_idx, mmap_idx, - ID_TO_IDX(info->idx_map[usr_idx]), - MASK_PEND_IDX( - ID_TO_IDX(info->idx_map[usr_idx]))); + if (res.id >= MAX_PENDING_REQS) { + WPRINTK("incorrect req map [%llx]\n", + (unsigned long long)res.id); + continue; + } + + usr_idx = (unsigned int)res.id; + pending_idx = info->idx_map[usr_idx].req; + mmap_idx = info->idx_map[usr_idx].mem; + + if (mmap_idx >= mmap_alloc || + pending_idx >= MAX_PENDING_REQS) { + WPRINTK("incorrect req map [%d]," + " internal map [%d,%d]\n", + usr_idx, mmap_idx, pending_idx); + continue; + } pending_req = &pending_reqs[mmap_idx][pending_idx]; blkif = pending_req->blkif; @@ -1270,7 +1254,7 @@ static int blktap_read_ufe_ring(tap_blki info->foreign_map.map[offset] = NULL; } fast_flush_area(pending_req, pending_idx, usr_idx, info->minor); - info->idx_map[usr_idx] = INVALID_REQ; + info->idx_map[usr_idx].mem = INVALID_MIDX; make_response(blkif, pending_req->id, res.operation, res.status); blkif_put(pending_req->blkif); @@ -1429,9 +1413,9 @@ static void dispatch_rw_block_io(blkif_t int ret, i, nr_sects = 0; tap_blkif_t *info; blkif_request_t *target; - int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); - int usr_idx; - uint16_t mmap_idx = pending_req->mem_idx; + unsigned int mmap_idx = pending_req->mem_idx; + unsigned int pending_idx = RTN_PEND_IDX(pending_req, mmap_idx); + unsigned int usr_idx; struct mm_struct *mm; struct vm_area_struct *vma = NULL; @@ -1459,8 +1443,8 @@ static void dispatch_rw_block_io(blkif_t /* Check we have space on user ring - should never fail. */ usr_idx = GET_NEXT_REQ(info->idx_map); - if (usr_idx == INVALID_REQ) { - BUG(); + if (usr_idx >= MAX_PENDING_REQS) { + WARN_ON(1); goto fail_response; } @@ -1643,7 +1627,8 @@ static void dispatch_rw_block_io(blkif_t up_write(&mm->mmap_sem); /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ - info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx); + info->idx_map[usr_idx].mem = mmap_idx; + info->idx_map[usr_idx].req = pending_idx; blkif_get(blkif); /* Finally, write the request message to the user ring. */