|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 04/10] xen/blkfront: split per device io_lock
On Mon, Nov 02, 2015 at 12:21:40PM +0800, Bob Liu wrote:
> The per device io_lock became a coarser grained lock after multi-queues/rings
> was introduced, this patch introduced a fine-grained ring_lock for each ring.
s/was introduced/was introduced (see commit titled XYZ)/
s/introdued/introduces/
>
> The old io_lock was renamed to dev_lock and only protect the ->grants list
s/was/is/
s/protect/protects/
> which is shared by all rings.
>
> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
> ---
> drivers/block/xen-blkfront.c | 57
> ++++++++++++++++++++++++++------------------
> 1 file changed, 34 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index eab78e7..8cc5995 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -121,6 +121,7 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of
> pages to be used for the
> */
> struct blkfront_ring_info {
> struct blkif_front_ring ring;
Can you add a comment explaining the lock semantic? As in under what conditions
should it be taken? Like you have it below.
> + spinlock_t ring_lock;
> unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
> unsigned int evtchn, irq;
> struct work_struct work;
> @@ -138,7 +139,8 @@ struct blkfront_ring_info {
> */
> struct blkfront_info
> {
> - spinlock_t io_lock;
> + /* Lock to proect info->grants list shared by multi rings */
s/proect/protect/
Missing full stop.
> + spinlock_t dev_lock;
Shouldn't it be right next to what it is protecting?
That is right below (or above): 'struct list_head grants;'?
> struct mutex mutex;
> struct xenbus_device *xbdev;
> struct gendisk *gd;
> @@ -224,6 +226,7 @@ static int fill_grant_buffer(struct blkfront_ring_info
> *rinfo, int num)
> struct grant *gnt_list_entry, *n;
> int i = 0;
>
> + spin_lock_irq(&info->dev_lock);
Why there? Why not where you add it to the list?
> while(i < num) {
> gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
> if (!gnt_list_entry)
> @@ -242,6 +245,7 @@ static int fill_grant_buffer(struct blkfront_ring_info
> *rinfo, int num)
> list_add(&gnt_list_entry->node, &info->grants);
Right here that is?
You are holding the lock for the duration of 'kzalloc' and 'alloc_page'.
And more interestingly, GFP_NOIO translates to __GFP_WAIT which means
it can call 'schedule'. - And you have taken an spinlock. That should
have thrown lots of warnings?
> i++;
> }
> + spin_unlock_irq(&info->dev_lock);
>
> return 0;
>
> @@ -254,6 +258,7 @@ out_of_memory:
> kfree(gnt_list_entry);
> i--;
> }
> + spin_unlock_irq(&info->dev_lock);
Just do it around the 'list_del' operation. You are using an
'safe'
> BUG_ON(i != 0);
> return -ENOMEM;
> }
> @@ -265,6 +270,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
> struct grant *gnt_list_entry;
> unsigned long buffer_gfn;
>
> + spin_lock(&info->dev_lock);
> BUG_ON(list_empty(&info->grants));
> gnt_list_entry = list_first_entry(&info->grants, struct grant,
> node);
> @@ -272,8 +278,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
>
> if (gnt_list_entry->gref != GRANT_INVALID_REF) {
> info->persistent_gnts_c--;
> + spin_unlock(&info->dev_lock);
> return gnt_list_entry;
> }
> + spin_unlock(&info->dev_lock);
Just have one spin_unlock. Put it right before the 'if
(gnt_list_entry->gref)..'.
>
> /* Assign a gref to this page */
> gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
> @@ -639,7 +647,7 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
> struct blkfront_ring_info *rinfo = (struct blkfront_ring_info
> *)hctx->driver_data;
>
> blk_mq_start_request(qd->rq);
> - spin_lock_irq(&info->io_lock);
> + spin_lock_irq(&rinfo->ring_lock);
> if (RING_FULL(&rinfo->ring))
> goto out_busy;
>
> @@ -650,15 +658,15 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
> goto out_busy;
>
> flush_requests(rinfo);
> - spin_unlock_irq(&info->io_lock);
> + spin_unlock_irq(&rinfo->ring_lock);
> return BLK_MQ_RQ_QUEUE_OK;
>
> out_err:
> - spin_unlock_irq(&info->io_lock);
> + spin_unlock_irq(&rinfo->ring_lock);
> return BLK_MQ_RQ_QUEUE_ERROR;
>
> out_busy:
> - spin_unlock_irq(&info->io_lock);
> + spin_unlock_irq(&rinfo->ring_lock);
> blk_mq_stop_hw_queue(hctx);
> return BLK_MQ_RQ_QUEUE_BUSY;
> }
> @@ -959,21 +967,22 @@ static void xlvbd_release_gendisk(struct blkfront_info
> *info)
> info->gd = NULL;
> }
>
> -/* Must be called with io_lock holded */
> static void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&rinfo->ring_lock, flags);
> if (!RING_FULL(&rinfo->ring))
> blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
> + spin_unlock_irqrestore(&rinfo->ring_lock, flags);
> }
>
> static void blkif_restart_queue(struct work_struct *work)
> {
> struct blkfront_ring_info *rinfo = container_of(work, struct
> blkfront_ring_info, work);
>
> - spin_lock_irq(&rinfo->dev_info->io_lock);
> if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
> kick_pending_request_queues(rinfo);
> - spin_unlock_irq(&rinfo->dev_info->io_lock);
> }
>
> static void blkif_free_ring(struct blkfront_ring_info *rinfo)
> @@ -1065,7 +1074,7 @@ static void blkif_free(struct blkfront_info *info, int
> suspend)
> int i;
>
> /* Prevent new requests being issued until we fix things up. */
> - spin_lock_irq(&info->io_lock);
> + spin_lock_irq(&info->dev_lock);
I would move that lock to be right before:
if (!list_empty(&info->grants)).
> info->connected = suspend ?
> BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
> /* No more blkif_request(). */
> @@ -1091,7 +1100,7 @@ static void blkif_free(struct blkfront_info *info, int
> suspend)
>
> for (i = 0; i < info->nr_rings; i++)
> blkif_free_ring(&info->rinfo[i]);
> - spin_unlock_irq(&info->io_lock);
> + spin_unlock_irq(&info->dev_lock);
Please move them before this loop. The blkif_free_ring is not touching
the 'grants' list.
> }
>
> static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info
> *rinfo,
> @@ -1103,6 +1112,7 @@ static void blkif_completion(struct blk_shadow *s,
> struct blkfront_ring_info *ri
> void *shared_data;
> int nseg;
> struct blkfront_info *info = rinfo->dev_info;
> + unsigned long flags;
>
> nseg = s->req.operation == BLKIF_OP_INDIRECT ?
> s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
> @@ -1120,6 +1130,7 @@ static void blkif_completion(struct blk_shadow *s,
> struct blkfront_ring_info *ri
> kunmap_atomic(shared_data);
> }
> }
> +
???
Stray?
> /* Add the persistent grant into the list of free grants */
> for (i = 0; i < nseg; i++) {
> if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
> @@ -1132,8 +1143,10 @@ static void blkif_completion(struct blk_shadow *s,
> struct blkfront_ring_info *ri
> if (!info->feature_persistent)
> pr_alert_ratelimited("backed has not unmapped
> grant: %u\n",
> s->grants_used[i]->gref);
> + spin_lock_irqsave(&info->dev_lock, flags);
> list_add(&s->grants_used[i]->node, &info->grants);
> info->persistent_gnts_c++;
> + spin_unlock_irqrestore(&info->dev_lock, flags);
Are we also protecting the persistent_gnts_c value? If so that should
be documented in the 'struct blkfront_info'
> } else {
> /*
> * If the grant is not mapped by the backend we end the
> @@ -1143,7 +1156,9 @@ static void blkif_completion(struct blk_shadow *s,
> struct blkfront_ring_info *ri
> */
> gnttab_end_foreign_access(s->grants_used[i]->gref, 0,
> 0UL);
> s->grants_used[i]->gref = GRANT_INVALID_REF;
> + spin_lock_irqsave(&info->dev_lock, flags);
> list_add_tail(&s->grants_used[i]->node, &info->grants);
> + spin_unlock_irqrestore(&info->dev_lock, flags);
> }
> }
> if (s->req.operation == BLKIF_OP_INDIRECT) {
> @@ -1152,8 +1167,10 @@ static void blkif_completion(struct blk_shadow *s,
> struct blkfront_ring_info *ri
> if (!info->feature_persistent)
> pr_alert_ratelimited("backed has not
> unmapped grant: %u\n",
>
> s->indirect_grants[i]->gref);
> + spin_lock_irqsave(&info->dev_lock, flags);
> list_add(&s->indirect_grants[i]->node,
> &info->grants);
> info->persistent_gnts_c++;
> + spin_unlock_irqrestore(&info->dev_lock, flags);
> } else {
> struct page *indirect_page;
>
> @@ -1162,12 +1179,14 @@ static void blkif_completion(struct blk_shadow *s,
> struct blkfront_ring_info *ri
> * Add the used indirect page back to the list
> of
> * available pages for indirect grefs.
> */
> + spin_lock_irqsave(&info->dev_lock, flags);
> if (!info->feature_persistent) {
> indirect_page =
> pfn_to_page(s->indirect_grants[i]->pfn);
> list_add(&indirect_page->lru,
> &rinfo->indirect_pages);
> }
> s->indirect_grants[i]->gref = GRANT_INVALID_REF;
> list_add_tail(&s->indirect_grants[i]->node,
> &info->grants);
> + spin_unlock_irqrestore(&info->dev_lock, flags);
> }
> }
> }
> @@ -1183,13 +1202,10 @@ static irqreturn_t blkif_interrupt(int irq, void
> *dev_id)
> struct blkfront_info *info = rinfo->dev_info;
> int error;
>
> - spin_lock_irqsave(&info->io_lock, flags);
> -
> - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
> - spin_unlock_irqrestore(&info->io_lock, flags);
> + if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
> return IRQ_HANDLED;
> - }
>
> + spin_lock_irqsave(&rinfo->ring_lock, flags);
> again:
> rp = rinfo->ring.sring->rsp_prod;
> rmb(); /* Ensure we see queued responses up to 'rp'. */
> @@ -1280,10 +1296,9 @@ static irqreturn_t blkif_interrupt(int irq, void
> *dev_id)
> } else
> rinfo->ring.sring->rsp_event = i + 1;
>
> + spin_unlock_irqrestore(&rinfo->ring_lock, flags);
> kick_pending_request_queues(rinfo);
We drop the lock and then take it again in 'kick_pending_request_queues'
Would it be better if we have an argument 'locked' which would
bypass the taking of the lock once more?
>
> - spin_unlock_irqrestore(&info->io_lock, flags);
> -
> return IRQ_HANDLED;
> }
>
> @@ -1529,10 +1544,11 @@ static int blkfront_probe(struct xenbus_device *dev,
> INIT_LIST_HEAD(&rinfo->indirect_pages);
> rinfo->dev_info = info;
> INIT_WORK(&rinfo->work, blkif_restart_queue);
> + spin_lock_init(&rinfo->ring_lock);
> }
>
> mutex_init(&info->mutex);
> - spin_lock_init(&info->io_lock);
> + spin_lock_init(&info->dev_lock);
> info->xbdev = dev;
> info->vdevice = vdevice;
> INIT_LIST_HEAD(&info->grants);
> @@ -1629,8 +1645,6 @@ static int blkif_recover(struct blkfront_info *info)
> }
> xenbus_switch_state(info->xbdev, XenbusStateConnected);
>
> - spin_lock_irq(&info->io_lock);
> -
> /* Now safe for us to use the shared ring */
> info->connected = BLKIF_STATE_CONNECTED;
>
> @@ -1648,7 +1662,6 @@ static int blkif_recover(struct blkfront_info *info)
> BUG_ON(req->nr_phys_segments > segs);
> blk_mq_requeue_request(req);
> }
> - spin_unlock_irq(&info->io_lock);
> blk_mq_kick_requeue_list(info->rq);
>
> while ((bio = bio_list_pop(&bio_list)) != NULL) {
> @@ -1994,11 +2007,9 @@ static void blkfront_connect(struct blkfront_info
> *info)
> xenbus_switch_state(info->xbdev, XenbusStateConnected);
>
> /* Kick pending requests. */
> - spin_lock_irq(&info->io_lock);
> info->connected = BLKIF_STATE_CONNECTED;
> for (i = 0; i < info->nr_rings; i++)
> kick_pending_request_queues(&info->rinfo[i]);
> - spin_unlock_irq(&info->io_lock);
>
> add_disk(info->gd);
>
> --
> 1.8.3.1
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |