[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [Qemu-devel] [PATCH RFC 3/3] xen_disk: add persistent grant support to xen_disk backend



On Mon, Dec 31, 2012 at 12:16 PM, Roger Pau Monne <roger.pau@xxxxxxxxxx> wrote:
> This protocol extension reuses the same set of grant pages for all
> transactions between the front/back drivers, avoiding expensive tlb
> flushes, grant table lock contention and switches between userspace
> and kernel space. The full description of the protocol can be found in
> the public blkif.h header.
>
> Speed improvement with 15 guests performing I/O is ~450%.
>
> Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
> Cc: xen-devel@xxxxxxxxxxxxx
> Cc: Stefano Stabellini <Stefano.Stabellini@xxxxxxxxxxxxx>
> Cc: Anthony PERARD <anthony.perard@xxxxxxxxxx>
> ---
> Performance comparison with the previous implementation can be seen in
> the followign graph:
>
> http://xenbits.xen.org/people/royger/persistent_read_qemu.png
> ---
>  hw/xen_disk.c |  155 ++++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 files changed, 138 insertions(+), 17 deletions(-)
>
> diff --git a/hw/xen_disk.c b/hw/xen_disk.c
> index 1eb485a..bafeceb 100644
> --- a/hw/xen_disk.c
> +++ b/hw/xen_disk.c
> @@ -52,6 +52,11 @@ static int max_requests = 32;
>  #define BLOCK_SIZE  512
>  #define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
>
> +struct persistent_gnt {

This should be PersistentGrant and don't forget to add a typedef.

> +    void *page;
> +    struct XenBlkDev *blkdev;
> +};
> +
>  struct ioreq {
>      blkif_request_t     req;
>      int16_t             status;
> @@ -69,6 +74,7 @@ struct ioreq {
>      int                 prot;
>      void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>      void                *pages;
> +    int                 num_unmap;
>
>      /* aio status */
>      int                 aio_inflight;
> @@ -105,6 +111,12 @@ struct XenBlkDev {
>      int                 requests_inflight;
>      int                 requests_finished;
>
> +    /* Persistent grants extension */
> +    gboolean            feature_persistent;
> +    GTree               *persistent_gnts;
> +    unsigned int        persistent_gnt_c;
> +    unsigned int        max_grants;
> +
>      /* qemu block driver */
>      DriveInfo           *dinfo;
>      BlockDriverState    *bs;
> @@ -138,6 +150,29 @@ static void ioreq_reset(struct ioreq *ioreq)
>      qemu_iovec_reset(&ioreq->v);
>  }
>
> +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
> +{
> +    uint ua = GPOINTER_TO_UINT(a);
> +    uint ub = GPOINTER_TO_UINT(b);
> +    return (ua > ub) - (ua < ub);
> +}
> +
> +static void destroy_grant(gpointer pgnt)
> +{
> +    struct persistent_gnt *grant = pgnt;
> +    XenGnttab gnt = grant->blkdev->xendev.gnttabdev;
> +
> +    if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) {
> +        xen_be_printf(&grant->blkdev->xendev, 0,
> +                      "xc_gnttab_munmap failed: %s\n",
> +                      strerror(errno));
> +    }
> +    grant->blkdev->persistent_gnt_c--;
> +    xen_be_printf(&grant->blkdev->xendev, 3,
> +                  "unmapped grant %p\n", grant->page);
> +    g_free(grant);
> +}
> +
>  static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
>  {
>      struct ioreq *ioreq = NULL;
> @@ -266,21 +301,21 @@ static void ioreq_unmap(struct ioreq *ioreq)
>      XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
>      int i;
>
> -    if (ioreq->v.niov == 0 || ioreq->mapped == 0) {
> +    if (ioreq->num_unmap == 0 || ioreq->mapped == 0) {
>          return;
>      }
>      if (batch_maps) {
>          if (!ioreq->pages) {
>              return;
>          }
> -        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
> +        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
>              xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap 
> failed: %s\n",
>                            strerror(errno));
>          }
> -        ioreq->blkdev->cnt_map -= ioreq->v.niov;
> +        ioreq->blkdev->cnt_map -= ioreq->num_unmap;
>          ioreq->pages = NULL;
>      } else {
> -        for (i = 0; i < ioreq->v.niov; i++) {
> +        for (i = 0; i < ioreq->num_unmap; i++) {
>              if (!ioreq->page[i]) {
>                  continue;
>              }
> @@ -298,41 +333,107 @@ static void ioreq_unmap(struct ioreq *ioreq)
>  static int ioreq_map(struct ioreq *ioreq)
>  {
>      XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
> -    int i;
> +    uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    int i, j, new_maps = 0;
> +    struct persistent_gnt *grant;
>
>      if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
>          return 0;
>      }
> -    if (batch_maps) {
> +    if (ioreq->blkdev->feature_persistent) {
> +        for (i = 0; i < ioreq->v.niov; i++) {
> +            grant = g_tree_lookup(ioreq->blkdev->persistent_gnts,
> +                                    GUINT_TO_POINTER(ioreq->refs[i]));
> +
> +            if (grant != NULL) {
> +                page[i] = grant->page;
> +                xen_be_printf(&ioreq->blkdev->xendev, 3,
> +                              "using persistent-grant %" PRIu32 "\n",
> +                              ioreq->refs[i]);
> +            } else {
> +                    /* Add the grant to the list of grants that
> +                     * should be mapped
> +                     */
> +                    domids[new_maps] = ioreq->domids[i];
> +                    refs[new_maps] = ioreq->refs[i];
> +                    page[i] = NULL;
> +                    new_maps++;
> +            }
> +        }
> +        /* Set the protection to RW, since grants may be reused later
> +         * with a different protection than the one needed for this request
> +         */
> +        ioreq->prot = PROT_WRITE | PROT_READ;
> +    } else {
> +        /* All grants in the request should be mapped */
> +        memcpy(refs, ioreq->refs, sizeof(refs));
> +        memcpy(domids, ioreq->domids, sizeof(domids));
> +        memset(page, 0, sizeof(page));
> +        new_maps = ioreq->v.niov;
> +    }
> +
> +    if (batch_maps && new_maps) {
>          ioreq->pages = xc_gnttab_map_grant_refs
> -            (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
> +            (gnt, new_maps, domids, refs, ioreq->prot);
>          if (ioreq->pages == NULL) {
>              xen_be_printf(&ioreq->blkdev->xendev, 0,
>                            "can't map %d grant refs (%s, %d maps)\n",
> -                          ioreq->v.niov, strerror(errno), 
> ioreq->blkdev->cnt_map);
> +                          new_maps, strerror(errno), ioreq->blkdev->cnt_map);
>              return -1;
>          }
> -        for (i = 0; i < ioreq->v.niov; i++) {
> -            ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
> -                (uintptr_t)ioreq->v.iov[i].iov_base;
> +        for (i = 0, j = 0; i < ioreq->v.niov; i++) {
> +            if (page[i] == NULL)
> +                page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE;
>          }
> -        ioreq->blkdev->cnt_map += ioreq->v.niov;
> -    } else  {
> -        for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->blkdev->cnt_map += new_maps;
> +    } else if (new_maps)  {
> +        for (i = 0; i < new_maps; i++) {
>              ioreq->page[i] = xc_gnttab_map_grant_ref
> -                (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
> +                (gnt, domids[i], refs[i], ioreq->prot);
>              if (ioreq->page[i] == NULL) {
>                  xen_be_printf(&ioreq->blkdev->xendev, 0,
>                                "can't map grant ref %d (%s, %d maps)\n",
> -                              ioreq->refs[i], strerror(errno), 
> ioreq->blkdev->cnt_map);
> +                              refs[i], strerror(errno), 
> ioreq->blkdev->cnt_map);
>                  ioreq_unmap(ioreq);
>                  return -1;
>              }
> -            ioreq->v.iov[i].iov_base = ioreq->page[i] + 
> (uintptr_t)ioreq->v.iov[i].iov_base;
>              ioreq->blkdev->cnt_map++;
>          }
> +        for (i = 0, j = 0; i < ioreq->v.niov; i++) {
> +            if (page[i] == NULL)
> +                page[i] = ioreq->page[j++];
> +        }
> +    }
> +    if (ioreq->blkdev->feature_persistent) {
> +        while((ioreq->blkdev->persistent_gnt_c < ioreq->blkdev->max_grants) 
> &&
> +              new_maps) {
> +            /* Go through the list of newly mapped grants and add as many
> +             * as possible to the list of persistently mapped grants
> +             */
> +            grant = g_malloc0(sizeof(*grant));
> +            new_maps--;
> +            if (batch_maps)
> +                grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE;
> +            else
> +                grant->page = ioreq->page[new_maps];
> +            grant->blkdev = ioreq->blkdev;
> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
> +                          "adding grant %" PRIu32 " page: %p\n",
> +                          refs[new_maps], grant->page);
> +            g_tree_insert(ioreq->blkdev->persistent_gnts,
> +                          GUINT_TO_POINTER(refs[new_maps]),
> +                          grant);
> +            ioreq->blkdev->persistent_gnt_c++;
> +        }
> +    }
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->v.iov[i].iov_base = page[i] +
> +                                   (uintptr_t)ioreq->v.iov[i].iov_base;
>      }
>      ioreq->mapped = 1;
> +    ioreq->num_unmap = new_maps;
>      return 0;
>  }
>
> @@ -690,6 +791,7 @@ static int blk_init(struct XenDevice *xendev)
>
>      /* fill info */
>      xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
> +    xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
>      xenstore_write_be_int(&blkdev->xendev, "info",            info);
>      xenstore_write_be_int(&blkdev->xendev, "sector-size",     
> blkdev->file_blk);
>      xenstore_write_be_int(&blkdev->xendev, "sectors",
> @@ -713,6 +815,7 @@ out_error:
>  static int blk_connect(struct XenDevice *xendev)
>  {
>      struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, 
> xendev);
> +    int pers;
>
>      if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) 
> == -1) {
>          return -1;
> @@ -721,6 +824,11 @@ static int blk_connect(struct XenDevice *xendev)
>                               &blkdev->xendev.remote_port) == -1) {
>          return -1;
>      }
> +    if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) {
> +        blkdev->feature_persistent = FALSE;
> +    } else {
> +        blkdev->feature_persistent = !!pers;
> +    }
>
>      blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
>      if (blkdev->xendev.protocol) {
> @@ -764,6 +872,15 @@ static int blk_connect(struct XenDevice *xendev)
>      }
>      }
>
> +    if (blkdev->feature_persistent) {
> +        /* Init persistent grants */
> +        blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
> +        blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
> +                                             NULL, NULL,
> +                                             (GDestroyNotify)destroy_grant);
> +        blkdev->persistent_gnt_c = 0;
> +    }
> +
>      xen_be_bind_evtchn(&blkdev->xendev);
>
>      xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
> @@ -804,6 +921,10 @@ static int blk_free(struct XenDevice *xendev)
>          blk_disconnect(xendev);
>      }
>
> +    /* Free persistent grants */
> +    if (blkdev->feature_persistent)
> +        g_tree_destroy(blkdev->persistent_gnts);
> +
>      while (!QLIST_EMPTY(&blkdev->freelist)) {
>          ioreq = QLIST_FIRST(&blkdev->freelist);
>          QLIST_REMOVE(ioreq, list);
> --
> 1.7.7.5 (Apple Git-26)
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.