[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 12/14] xen-blkback: safely unmap grants in case they are still in use



El 19/01/15 a les 16.51, David Vrabel ha escrit:
> From: Jenny Herbert <jennifer.herbert@xxxxxxxxxx>
> 
> Use gnttab_unmap_refs_async() to wait until the mapped pages are no
> longer in use before unmapping them.
> 
> This allows blkback to use network storage which may retain refs to
> pages in queued skbs after the block I/O has completed.
> 
> Signed-off-by: Jenny Herbert <jennifer.herbert@xxxxxxxxxx>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>

AFAICT it looks good, just a couple of comment below.

> ---
>  drivers/block/xen-blkback/blkback.c |  175 
> +++++++++++++++++++++++++----------
>  drivers/block/xen-blkback/common.h  |    3 +
>  2 files changed, 127 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c 
> b/drivers/block/xen-blkback/blkback.c
> index 908e630..39a7aba 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -47,6 +47,7 @@
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  #include <xen/balloon.h>
> +#include <xen/grant_table.h>
>  #include "common.h"
>  
>  /*
> @@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
>       atomic_dec(&blkif->persistent_gnt_in_use);
>  }
>  
> +static void free_persistent_gnts_unmap_callback(int result,
> +                                             struct gntab_unmap_queue_data 
> *data)
> +{
> +     struct completion* c = data->data;

This should be "struct completion *c".

> +
> +     /* BUG_ON used to reproduce existing behaviour,
> +        but is this the best way to deal with this? */
> +     BUG_ON(result);
> +     complete(c);
> +}
> +
>  static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root 
> *root,
>                                   unsigned int num)
>  {
> @@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif 
> *blkif, struct rb_root *root,
>       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>       struct persistent_gnt *persistent_gnt;
>       struct rb_node *n;
> -     int ret = 0;
>       int segs_to_unmap = 0;
> +     struct gntab_unmap_queue_data unmap_data;
> +     struct completion unmap_completion;
> +
> +     init_completion(&unmap_completion);
> +
> +     unmap_data.data = &unmap_completion;
> +     unmap_data.done = &free_persistent_gnts_unmap_callback;
> +     unmap_data.pages = pages;
> +     unmap_data.unmap_ops = unmap;
> +     unmap_data.kunmap_ops = NULL;
>  
>       foreach_grant_safe(persistent_gnt, n, root, node) {
>               BUG_ON(persistent_gnt->handle ==
> @@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif 
> *blkif, struct rb_root *root,
>  
>               if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
>                       !rb_next(&persistent_gnt->node)) {
> -                     ret = gnttab_unmap_refs(unmap, NULL, pages,
> -                             segs_to_unmap);
> -                     BUG_ON(ret);
> +
> +                     unmap_data.count = segs_to_unmap;
> +                     gnttab_unmap_refs_async(&unmap_data);
> +                     wait_for_completion(&unmap_completion);
> +
>                       put_free_pages(blkif, pages, segs_to_unmap);
>                       segs_to_unmap = 0;
>               }
> @@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif)
>       shrink_free_pagepool(blkif, 0 /* All */);
>  }
>  
> -/*
> - * Unmap the grant references, and also remove the M2P over-rides
> - * used in the 'pending_req'.
> - */
> -static void xen_blkbk_unmap(struct xen_blkif *blkif,
> -                            struct grant_page *pages[],
> -                            int num)
> +static unsigned int xen_blkbk_unmap_prepare(
> +     struct xen_blkif *blkif,
> +     struct grant_page **pages,
> +     unsigned int num,
> +     struct gnttab_unmap_grant_ref *unmap_ops,
> +     struct page **unmap_pages)
>  {
> -     struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> -     struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>       unsigned int i, invcount = 0;
> -     int ret;
>  
>       for (i = 0; i < num; i++) {
>               if (pages[i]->persistent_gnt != NULL) {
> @@ -674,21 +693,99 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
>               if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
>                       continue;
>               unmap_pages[invcount] = pages[i]->page;
> -             gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
> +             gnttab_set_unmap_op(&unmap_ops[invcount], 
> vaddr(pages[invcount]->page),
>                                   GNTMAP_host_map, pages[i]->handle);
>               pages[i]->handle = BLKBACK_INVALID_HANDLE;
> -             if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
> -                     ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
> -                                             invcount);
> +             invcount++;
> +       }
> +
> +       return invcount;
> +}
> +
> +static void xen_blkbk_unmap_done(struct xen_blkif *blkif,
> +                              struct page *unmap_pages[],
> +                              unsigned int num)
> +{
> +     put_free_pages(blkif, unmap_pages, num);
> +}

I'm unsure this is really needed, IMHO I would just call put_free_pages
directly.

> +
> +static void xen_blkbk_unmap_and_respond_callback(int result, struct 
> gntab_unmap_queue_data *data)
> +{
> +     struct pending_req* pending_req = (struct pending_req*) (data->data);
> +     struct xen_blkif *blkif = pending_req->blkif;
> +
> +     /* BUG_ON used to reproduce existing behaviour,
> +        but is this the best way to deal with this? */
> +     BUG_ON(result);
> +
> +     xen_blkbk_unmap_done(blkif, data->pages, data->count);

So I would change this to a direct call to put_free_pages.

> +     make_response(blkif, pending_req->id,
> +                   pending_req->operation, pending_req->status);
> +     free_req(blkif, pending_req);
> +     /*
> +      * Make sure the request is freed before releasing blkif,
> +      * or there could be a race between free_req and the
> +      * cleanup done in xen_blkif_free during shutdown.
> +      *
> +      * NB: The fact that we might try to wake up pending_free_wq
> +      * before drain_complete (in case there's a drain going on)
> +      * it's not a problem with our current implementation
> +      * because we can assure there's no thread waiting on
> +      * pending_free_wq if there's a drain going on, but it has
> +      * to be taken into account if the current model is changed.
> +      */
> +     if (atomic_dec_and_test(&blkif->inflight) && 
> atomic_read(&blkif->drain)) {
> +             complete(&blkif->drain_complete);
> +     }
> +     xen_blkif_put(blkif);
> +}
> +
> +static void xen_blkbk_unmap_and_respond(struct pending_req *req)
> +{
> +     struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
> +     struct xen_blkif *blkif = req->blkif;
> +     struct grant_page **pages = req->segments;
> +     unsigned int invcount;
> +
> +     invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
> +                                        req->unmap, req->unmap_pages);
> +
> +     work->data = req;
> +     work->done = xen_blkbk_unmap_and_respond_callback;
> +     work->unmap_ops = req->unmap;
> +     work->kunmap_ops = NULL;
> +     work->pages = req->unmap_pages;
> +     work->count = invcount;
> +
> +     gnttab_unmap_refs_async(&req->gnttab_unmap_data);
> +}
> +
> +
> +/*
> + * Unmap the grant references, and also remove the M2P over-rides
> + * used in the 'pending_req'.
> + */
> +static void xen_blkbk_unmap(struct xen_blkif *blkif,
> +                            struct grant_page *pages[],
> +                            int num)
> +{
> +     struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +     struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +     unsigned int invcount = 0;
> +     int ret;
> +
> +     while (num) {
> +             unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
> +             
> +             invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
> +                                                unmap, unmap_pages);

I would add:

BUG_ON(invcount != batch);

> +             if (invcount) {
> +                     ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, 
> invcount);
>                       BUG_ON(ret);
> -                     put_free_pages(blkif, unmap_pages, invcount);
> -                     invcount = 0;
> +                     xen_blkbk_unmap_done(blkif, unmap_pages, invcount);
>               }
> -     }
> -     if (invcount) {
> -             ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
> -             BUG_ON(ret);
> -             put_free_pages(blkif, unmap_pages, invcount);
> +             pages += batch;
> +             num -= batch;

This loop is sort of buggy, it should work, but it is sub-optimal.

The pages array can contain both persistent grants and normal grants
(the ones that we should unmap). So blindly increasing the pages pointer
with batch will mean that we could be iterating over grants that have
already been freed. It is not really an issue because we set handle to
BLKBACK_INVALID_HANDLE, but it's a waste.

>       }
>  }
>  
> @@ -982,32 +1079,8 @@ static void __end_block_io_op(struct pending_req 
> *pending_req, int error)
>        * the grant references associated with 'request' and provide
>        * the proper response on the ring.
>        */
> -     if (atomic_dec_and_test(&pending_req->pendcnt)) {
> -             struct xen_blkif *blkif = pending_req->blkif;
> -
> -             xen_blkbk_unmap(blkif,
> -                             pending_req->segments,
> -                             pending_req->nr_pages);
> -             make_response(blkif, pending_req->id,
> -                           pending_req->operation, pending_req->status);
> -             free_req(blkif, pending_req);
> -             /*
> -              * Make sure the request is freed before releasing blkif,
> -              * or there could be a race between free_req and the
> -              * cleanup done in xen_blkif_free during shutdown.
> -              *
> -              * NB: The fact that we might try to wake up pending_free_wq
> -              * before drain_complete (in case there's a drain going on)
> -              * it's not a problem with our current implementation
> -              * because we can assure there's no thread waiting on
> -              * pending_free_wq if there's a drain going on, but it has
> -              * to be taken into account if the current model is changed.
> -              */
> -             if (atomic_dec_and_test(&blkif->inflight) && 
> atomic_read(&blkif->drain)) {
> -                     complete(&blkif->drain_complete);
> -             }
> -             xen_blkif_put(blkif);
> -     }
> +     if (atomic_dec_and_test(&pending_req->pendcnt))
> +             xen_blkbk_unmap_and_respond(pending_req);
>  }
>  
>  /*
> diff --git a/drivers/block/xen-blkback/common.h 
> b/drivers/block/xen-blkback/common.h
> index f65b807..428a34d 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -350,6 +350,9 @@ struct pending_req {
>       struct grant_page       *indirect_pages[MAX_INDIRECT_PAGES];
>       struct seg_buf          seg[MAX_INDIRECT_SEGMENTS];
>       struct bio              *biolist[MAX_INDIRECT_SEGMENTS];
> +     struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
> +     struct page *unmap_pages[MAX_INDIRECT_SEGMENTS];

Missing alignment here.

We are increasing the size of pending_req one more time, which I would
prefer to avoid but at least it allows us to perform the unmaps with a
single hypercall.

> +     struct gntab_unmap_queue_data gnttab_unmap_data;
>  };
>  
>  
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.