[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] linux-2.6.18/blktap: fix locking (again)



>>> On 23.12.11 at 12:33, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:
> c/s 1090 wasn't really helping much - I was doing that under the wrong
> impression that the zap_pte() hook would be called with the mmap_sem
> held. That being wrong, none of the uses of mmap_sem here actually are
> write ones (they only look up vma information), so convert them all to
> read acquires/releases.
> 
> A new spinlock needs to be introduced, protecting idx_map. This has to
> nest inside mmap_sem, which requires some code movement.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

> --- a/drivers/xen/blktap/blktap.c
> +++ b/drivers/xen/blktap/blktap.c
> @@ -113,6 +113,7 @@ typedef struct tap_blkif {
>       pid_t pid;                    /*tapdisk process id                   */
>       enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
>                                                 shutdown                   */
> +     spinlock_t map_lock;          /*protects idx_map                     */
>       struct idx_map {
>               u16 mem, req;
>       } *idx_map;                   /*Record the user ring id to kern
> @@ -295,7 +296,7 @@ static pte_t blktap_clear_pte(struct vm_
>       pte_t copy;
>       tap_blkif_t *info = NULL;
>       unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0;
> -     unsigned long offset, uvstart = 0;
> +     unsigned long offset;
>       struct page *pg;
>       struct grant_handle_pair *khandle;
>       struct gnttab_unmap_grant_ref unmap[2];
> @@ -304,25 +305,28 @@ static pte_t blktap_clear_pte(struct vm_
>        * If the address is before the start of the grant mapped region or
>        * if vm_file is NULL (meaning mmap failed and we have nothing to do)
>        */
> -     if (vma->vm_file != NULL) {
> +     if (vma->vm_file != NULL)
>               info = vma->vm_file->private_data;
> -             uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
> -     }
> -     if (vma->vm_file == NULL || uvaddr < uvstart)
> +     if (info == NULL || uvaddr < info->user_vstart)
>               return ptep_get_and_clear_full(vma->vm_mm, uvaddr, 
>                                              ptep, is_fullmm);
>  
> -     /* TODO Should these be changed to if statements? */
> -     BUG_ON(!info);
> -     BUG_ON(!info->idx_map);
> -
> -     offset = (uvaddr - uvstart) >> PAGE_SHIFT;
> +     offset = (uvaddr - info->user_vstart) >> PAGE_SHIFT;
>       usr_idx = OFFSET_TO_USR_IDX(offset);
>       seg = OFFSET_TO_SEG(offset);
>  
> +     spin_lock(&info->map_lock);
> +
>       pending_idx = info->idx_map[usr_idx].req;
>       mmap_idx = info->idx_map[usr_idx].mem;
>  
> +     /* fast_flush_area() may already have cleared this entry */
> +     if (mmap_idx == INVALID_MIDX) {
> +             spin_unlock(&info->map_lock);
> +             return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
> +                                            is_fullmm);
> +     }
> +
>       pg = idx_to_page(mmap_idx, pending_idx, seg);
>       ClearPageReserved(pg);
>       info->foreign_map.map[offset + RING_PAGES] = NULL;
> @@ -365,6 +369,8 @@ static pte_t blktap_clear_pte(struct vm_
>                       BUG();
>       }
>  
> +     spin_unlock(&info->map_lock);
> +
>       return copy;
>  }
>  
> @@ -489,6 +495,7 @@ found:
>               }
>  
>               info->minor = minor;
> +             spin_lock_init(&info->map_lock);
>               /*
>                * Make sure that we have a minor before others can
>                * see us.
> @@ -1029,25 +1036,30 @@ static void fast_flush_area(pending_req_
>                            unsigned int u_idx, tap_blkif_t *info)
>  {
>       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
> -     unsigned int i, mmap_idx, invcount = 0, locked = 0;
> +     unsigned int i, mmap_idx, invcount = 0;
>       struct grant_handle_pair *khandle;
>       uint64_t ptep;
>       int ret;
>       unsigned long uvaddr;
>       struct mm_struct *mm = info->mm;
>  
> +     if (mm != NULL)
> +             down_read(&mm->mmap_sem);
> +
>       if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) {
> -             down_write(&mm->mmap_sem);
> + slow:
>               blktap_zap_page_range(mm,
>                                     MMAP_VADDR(info->user_vstart, u_idx, 0),
>                                     req->nr_pages);
>               info->idx_map[u_idx].mem = INVALID_MIDX;
> -             up_write(&mm->mmap_sem);
> +             up_read(&mm->mmap_sem);
>               return;
>       }
>  
>       mmap_idx = req->mem_idx;
>  
> +     spin_lock(&info->map_lock);
> +
>       for (i = 0; i < req->nr_pages; i++) {
>               uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
>  
> @@ -1066,15 +1078,13 @@ static void fast_flush_area(pending_req_
>  
>               if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) {
>                       BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
> -                     if (!locked++)
> -                             down_write(&mm->mmap_sem);
>                       if (create_lookup_pte_addr(
>                               mm,
>                               MMAP_VADDR(info->user_vstart, u_idx, i),
>                               &ptep) !=0) {
> -                             up_write(&mm->mmap_sem);
> +                             spin_unlock(&info->map_lock);
>                               WPRINTK("Couldn't get a pte addr!\n");
> -                             return;
> +                             goto slow;
>                       }
>  
>                       gnttab_set_unmap_op(&unmap[invcount], ptep,
> @@ -1091,19 +1101,11 @@ static void fast_flush_area(pending_req_
>               GNTTABOP_unmap_grant_ref, unmap, invcount);
>       BUG_ON(ret);
>       
> -     if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) {
> -             if (!locked++)
> -                     down_write(&mm->mmap_sem);
> -             blktap_zap_page_range(mm, 
> -                                   MMAP_VADDR(info->user_vstart, u_idx, 0), 
> -                                   req->nr_pages);
> -     }
> -
> -     if (!locked && mm != NULL)
> -             down_write(&mm->mmap_sem);
>       info->idx_map[u_idx].mem = INVALID_MIDX;
> +
> +     spin_unlock(&info->map_lock);
>       if (mm != NULL)
> -             up_write(&mm->mmap_sem);
> +             up_read(&mm->mmap_sem);
>  }
>  
>  /******************************************************************
> @@ -1406,7 +1408,9 @@ static void dispatch_rw_block_io(blkif_t
>               goto fail_response;
>  
>       /* Check we have space on user ring - should never fail. */
> +     spin_lock(&info->map_lock);
>       usr_idx = GET_NEXT_REQ(info->idx_map);
> +     spin_unlock(&info->map_lock);
>       if (usr_idx >= MAX_PENDING_REQS) {
>               WARN_ON(1);
>               goto fail_response;
> @@ -1445,7 +1449,7 @@ static void dispatch_rw_block_io(blkif_t
>       op = 0;
>       mm = info->mm;
>       if (!xen_feature(XENFEAT_auto_translated_physmap))
> -             down_write(&mm->mmap_sem);
> +             down_read(&mm->mmap_sem);
>       for (i = 0; i < nseg; i++) {
>               unsigned long uvaddr;
>               unsigned long kvaddr;
> @@ -1462,9 +1466,9 @@ static void dispatch_rw_block_io(blkif_t
>                       /* Now map it to user. */
>                       ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
>                       if (ret) {
> -                             up_write(&mm->mmap_sem);
> +                             up_read(&mm->mmap_sem);
>                               WPRINTK("Couldn't get a pte addr!\n");
> -                             goto fail_flush;
> +                             goto fail_response;
>                       }
>  
>                       gnttab_set_map_op(&map[op], ptep,
> @@ -1478,12 +1482,15 @@ static void dispatch_rw_block_io(blkif_t
>                            req->seg[i].first_sect + 1);
>       }
>  
> +     if (xen_feature(XENFEAT_auto_translated_physmap))
> +             down_read(&mm->mmap_sem);
> +
> +     spin_lock(&info->map_lock);
> +
>       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
>       BUG_ON(ret);
>  
>       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> -             up_write(&mm->mmap_sem);
> -
>               for (i = 0; i < (nseg*2); i+=2) {
>                       unsigned long uvaddr;
>                       unsigned long offset;
> @@ -1548,18 +1555,18 @@ static void dispatch_rw_block_io(blkif_t
>               }
>       }
>  
> +     /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
> +     info->idx_map[usr_idx].mem = mmap_idx;
> +     info->idx_map[usr_idx].req = pending_idx;
> +
> +     spin_unlock(&info->map_lock);
> +
>       if (ret)
>               goto fail_flush;
>  
> -     if (xen_feature(XENFEAT_auto_translated_physmap))
> -             down_write(&mm->mmap_sem);
> -     /* Mark mapped pages as reserved: */
> -     for (i = 0; i < req->nr_segments; i++) {
> -             struct page *pg;
> -
> -             pg = idx_to_page(mmap_idx, pending_idx, i);
> -             SetPageReserved(pg);
> -             if (xen_feature(XENFEAT_auto_translated_physmap)) {
> +     if (xen_feature(XENFEAT_auto_translated_physmap)) {
> +             for (i = 0; i < nseg; i++) {
> +                     struct page *pg = idx_to_page(mmap_idx, pending_idx, i);
>                       unsigned long uvaddr = MMAP_VADDR(info->user_vstart,
>                                                         usr_idx, i);
>                       if (vma && uvaddr >= vma->vm_end) {
> @@ -1577,18 +1584,12 @@ static void dispatch_rw_block_io(blkif_t
>                                       continue;
>                       }
>                       ret = vm_insert_page(vma, uvaddr, pg);
> -                     if (ret) {
> -                             up_write(&mm->mmap_sem);
> +                     if (ret)
>                               goto fail_flush;
> -                     }
>               }
>       }
> -     if (xen_feature(XENFEAT_auto_translated_physmap))
> -             up_write(&mm->mmap_sem);
>       
> -     /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
> -     info->idx_map[usr_idx].mem = mmap_idx;
> -     info->idx_map[usr_idx].req = pending_idx;
> +     up_read(&mm->mmap_sem);
>  
>       blkif_get(blkif);
>       /* Finally, write the request message to the user ring. */
> @@ -1611,6 +1612,7 @@ static void dispatch_rw_block_io(blkif_t
>       return;
>  
>   fail_flush:
> +     up_read(&mm->mmap_sem);
>       WPRINTK("Reached Fail_flush\n");
>       fast_flush_area(pending_req, pending_idx, usr_idx, info);
>   fail_response:



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.