WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] Re: [PATCH V4 2/3] xen-blkfront: teach blkfront driver to ha

On Tue, Sep 13, 2011 at 04:33:27PM +0800, Li Dongyang wrote:
> Hi, Konrad
> On Thu, Sep 1, 2011 at 11:25 PM, Konrad Rzeszutek Wilk
> <konrad.wilk@xxxxxxxxxx> wrote:
> > On Thu, Sep 01, 2011 at 06:39:09PM +0800, Li Dongyang wrote:
> >> The blkfront driver now will read discard related nodes from xenstore,
> >> and set up the request queue, then we can forward the
> >> discard requests to backend driver.
> >
> >
> > A better description is as follow:
> >
> > xen-blkfront: Handle discard requests.
> >
> >    If the backend advertises 'feature-discard', then interrogate
> >    the backend for alignment, granularity, and max discard block size.
> >    Setup the request queue with the appropiate values and send the
> >    discard operation as required.
> >
> >
> >>
> >> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
> >> ---
> >>  drivers/block/xen-blkfront.c |  111 
> >> +++++++++++++++++++++++++++++++++---------
> >>  1 files changed, 88 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> >> index 9ea8c25..86e2c63 100644
> >> --- a/drivers/block/xen-blkfront.c
> >> +++ b/drivers/block/xen-blkfront.c
> >> @@ -98,6 +98,9 @@ struct blkfront_info
> >>       unsigned long shadow_free;
> >>       unsigned int feature_flush;
> >>       unsigned int flush_op;
> >> +     unsigned int feature_discard;
> >> +     unsigned int discard_granularity;
> >> +     unsigned int discard_alignment;
> >>       int is_ready;
> >>  };
> >>
> >> @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
> >>               ring_req->operation = info->flush_op;
> >>       }
> >>
> >> -     ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
> >> -     BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
> >> +     if (unlikely(req->cmd_flags & REQ_DISCARD)) {
> >> +             /* id, sector_number and handle are set above. */
> >> +             ring_req->operation = BLKIF_OP_DISCARD;
> >> +             ring_req->nr_segments = 0;
> >> +             ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
> >> +     } else {
> >> +             ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
> >> +             BUG_ON(ring_req->nr_segments > 
> >> BLKIF_MAX_SEGMENTS_PER_REQUEST);
> >>
> >> -     for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
> >> -             buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
> >> -             fsect = sg->offset >> 9;
> >> -             lsect = fsect + (sg->length >> 9) - 1;
> >> -             /* install a grant reference. */
> >> -             ref = gnttab_claim_grant_reference(&gref_head);
> >> -             BUG_ON(ref == -ENOSPC);
> >> +             for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
> >> +                     buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
> >> +                     fsect = sg->offset >> 9;
> >> +                     lsect = fsect + (sg->length >> 9) - 1;
> >> +                     /* install a grant reference. */
> >> +                     ref = gnttab_claim_grant_reference(&gref_head);
> >> +                     BUG_ON(ref == -ENOSPC);
> >>
> >> -             gnttab_grant_foreign_access_ref(
> >> -                             ref,
> >> -                             info->xbdev->otherend_id,
> >> -                             buffer_mfn,
> >> -                             rq_data_dir(req) );
> >> -
> >> -             info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
> >> -             ring_req->u.rw.seg[i] =
> >> -                             (struct blkif_request_segment) {
> >> -                                     .gref       = ref,
> >> -                                     .first_sect = fsect,
> >> -                                     .last_sect  = lsect };
> >> +                     gnttab_grant_foreign_access_ref(
> >> +                                     ref,
> >> +                                     info->xbdev->otherend_id,
> >> +                                     buffer_mfn,
> >> +                                     rq_data_dir(req));
> >> +
> >> +                     info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
> >> +                     ring_req->u.rw.seg[i] =
> >> +                                     (struct blkif_request_segment) {
> >> +                                             .gref       = ref,
> >> +                                             .first_sect = fsect,
> >> +                                             .last_sect  = lsect };
> >> +             }
> >>       }
> >>
> >>       info->ring.req_prod_pvt++;
> >> @@ -399,6 +409,7 @@ wait:
> >>  static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
> >>  {
> >>       struct request_queue *rq;
> >> +     struct blkfront_info *info = gd->private_data;
> >>
> >>       rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
> >>       if (rq == NULL)
> >> @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, 
> >> u16 sector_size)
> >>
> >>       queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
> >>
> >> +     if (info->feature_discard) {
> >> +             queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
> >> +             blk_queue_max_discard_sectors(rq, get_capacity(gd));
> >
> > This is not correct. I took a look at the SCSI support for this
> > ('sd_config_discard') and if you look there carefully you will see that
> > the value can be 64KB for example.
> >
> > You need to provide a mechanism similar to 'discard-*' to fetch that data
> > from the backend.
> >
> >> +             rq->limits.discard_granularity = info->discard_granularity;
> >> +             rq->limits.discard_alignment = info->discard_alignment;
> >> +     }
> >> +
> >>       /* Hard sector size and max sectors impersonate the equiv. hardware. 
> >> */
> >>       blk_queue_logical_block_size(rq, sector_size);
> >>       blk_queue_max_hw_sectors(rq, 512);
> >> @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void 
> >> *dev_id)
> >>
> >>               error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
> >>               switch (bret->operation) {
> >> +             case BLKIF_OP_DISCARD:
> >> +                     if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> >> +                             struct request_queue *rq = info->rq;
> >> +                             printk(KERN_WARNING "blkfront: %s: discard 
> >> op failed\n",
> >> +                                        info->gd->disk_name);
> >> +                             error = -EOPNOTSUPP;
> >> +                             info->feature_discard = 0;
> >> +                             spin_lock(rq->queue_lock);
> we should not take the spin_lock here,
> as the rq->queue_lock is actually blkif_io_lock, we pass the
> blkif_io_lock to blk_init_queue
> when we init the queue,
> and blkif_io_lock has already acquired above in blkif_interrupt(), so
> we will deadlock here.
> 
> I'm so sorry if this has already in your tree, I've tested with
> BLKIF_RSP_ERROR but forgot the EOPNOTSUPP case,
> I feel like I'm a stupid ass.
> could u get rid od the spin_lock and spin_unlock below? Thanks

Just send me a patch with the change and I will apply it to the tree.

Thanks for testing it even further!

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel