[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC] Support of non-indirect grant backend on 64KB guest
Hi, Firstly, this patch is not ready at all and mostly here for collecting comment about the way to do it. It's not clean so no need to complain about the coding style. The qdisk backend in QEMU is not supporting indirect grant, this is means that a request can only support 11 * 4KB = 44KB. When using 64KB page, a Linux block request (struct *request) may contain up to 64KB of data. This is because the block segment size must at least be the size of a Linux page. So when indirect is not supported by the backend, we are not able to fit all the data in a single request. We therefore need to create a second request to copy the rest of the data. I've wrote a patch last week which make 64KB guest booting with qdisk. Although, I'm not sure this is the right way to do it. I would appreciate if one of the block maintainers give me insight about it. The patch can be found below. Regards, commit 62922ae04af371bcb6e4467eb2e470d83dac2a81 Author: Julien Grall <julien.grall@xxxxxxxxxx> Date: Thu Aug 13 13:13:35 2015 +0100 blkfront: Start to handle non-indirect grant diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 68ca4e5..76247ab 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -79,6 +79,13 @@ struct blk_shadow { struct blk_grant **indirect_grants; struct scatterlist *sg; unsigned int num_sg; + enum + { + REQ_WAITING, + REQ_DONE, + REQ_FAIL + } status; + unsigned long associated_id; }; struct split_bio { @@ -467,6 +474,7 @@ static unsigned long blkif_ring_get_request(struct blkfront_info *info, id = get_id_from_freelist(info); info->shadow[id].request = req; + info->shadow[id].status = REQ_WAITING; (*ring_req)->u.rw.id = id; @@ -508,6 +516,9 @@ struct setup_rw_req { bool need_copy; unsigned int bvec_off; char *bvec_data; + + bool require_extra_req; + struct blkif_request *ring_req2; }; static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, @@ -517,12 +528,20 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, int n, ref; struct blk_grant *gnt_list_entry; unsigned int fsect, lsect; + struct blkif_request *ring_req; /* Convenient aliases */ unsigned int grant_idx = setup->grant_idx; - struct blkif_request *ring_req = setup->ring_req; struct blkfront_info *info = setup->info; struct blk_shadow *shadow = &info->shadow[setup->id]; + if (likely(!setup->require_extra_req || + grant_idx < BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + ring_req = setup->ring_req; + } else { + grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST; + ring_req = setup->ring_req2; + } + if ((ring_req->operation == BLKIF_OP_INDIRECT) && (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) { if (setup->segments) @@ -537,7 +556,7 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, gnt_list_entry = get_grant(&setup->gref_head, gfn, info); ref = gnt_list_entry->gref; - shadow->grants_used[grant_idx] = gnt_list_entry; + shadow->grants_used[setup->grant_idx] = gnt_list_entry; if (setup->need_copy) { void *shared_data; @@ -579,11 +598,31 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, (setup->grant_idx)++; } +static void blkif_setup_extra_req(struct blkif_request *first, + struct blkif_request *second) +{ + uint16_t nr_segments = first->u.rw.nr_segments; + + + /* The second request is only present when the first request uses + * all its segments. It's always the continuity of the first one + */ + first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; + + second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST; + second->u.rw.sector_number = first->u.rw.sector_number + + (BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512; + + second->u.rw.handle = first->u.rw.handle; + second->operation = first->operation; +} + static int blkif_queue_rw_req(struct request *req) { struct blkfront_info *info = req->rq_disk->private_data; - struct blkif_request *ring_req; - unsigned long id; + struct blkif_request *ring_req, *ring_req2 = NULL; + unsigned long id, id2 = ~0; + bool require_extra_req = false; int i; struct setup_rw_req setup = { .grant_idx = 0, @@ -628,19 +667,28 @@ static int blkif_queue_rw_req(struct request *req) /* Fill out a communications ring structure. */ id = blkif_ring_get_request(info, req, &ring_req); - BUG_ON(info->max_indirect_segments == 0 && - GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST); - BUG_ON(info->max_indirect_segments && - GREFS(req->nr_phys_segments) > info->max_indirect_segments); - num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); num_grant = 0; /* Calculate the number of grant used */ for_each_sg(info->shadow[id].sg, sg, num_sg, i) num_grant += gnttab_count_grant(sg->offset, sg->length); + require_extra_req = info->max_indirect_segments == 0 && + num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST; + BUG_ON((XEN_PAGE_SIZE == PAGE_SIZE) && require_extra_req); + + if (unlikely(require_extra_req)) + { + id2 = blkif_ring_get_request(info, req, &ring_req2); + info->shadow[id2].num_sg = 0; + info->shadow[id2].associated_id = id; + } + + info->shadow[id].associated_id = id2; + info->shadow[id].num_sg = num_sg; - if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) { + if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST && + likely(!require_extra_req)) { /* * The indirect operation can only be a BLKIF_OP_READ or * BLKIF_OP_WRITE @@ -680,10 +728,17 @@ static int blkif_queue_rw_req(struct request *req) } } ring_req->u.rw.nr_segments = num_grant; + if (unlikely(require_extra_req)) + blkif_setup_extra_req(ring_req, ring_req2); } setup.ring_req = ring_req; setup.id = id; + + setup.require_extra_req = require_extra_req; + if (unlikely(require_extra_req)) + setup.ring_req2 = ring_req2; + for_each_sg(info->shadow[id].sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -706,6 +761,8 @@ static int blkif_queue_rw_req(struct request *req) /* Keep a private copy so we can reissue requests when recovering. */ info->shadow[id].req = *ring_req; + if (unlikely(require_extra_req)) + info->shadow[id2].req = *ring_req2; if (new_persistent_gnts) gnttab_free_grant_references(setup.gref_head); @@ -797,7 +854,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, memset(&info->tag_set, 0, sizeof(info->tag_set)); info->tag_set.ops = &blkfront_mq_ops; info->tag_set.nr_hw_queues = 1; - info->tag_set.queue_depth = BLK_RING_SIZE(info); + info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2; info->tag_set.numa_node = NUMA_NO_NODE; info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; info->tag_set.cmd_size = 0; @@ -822,6 +879,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); } + /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); blk_queue_physical_block_size(rq, physical_sector_size); @@ -1229,7 +1287,21 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, }; num_grant = s->req.operation == BLKIF_OP_INDIRECT ? - s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; + s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; + + if (unlikely(s->associated_id != ~0)) { + struct blk_shadow *s2 = &info->shadow[s->associated_id]; + BUG_ON(s->req.operation == BLKIF_OP_INDIRECT); + + num_grant += s2->req.u.rw.nr_segments; + + /* Only the first request can have sg != 0 */ + if (s2->num_sg != 0) { + data.s = s2; + s = s2; + } + } + num_sg = s->num_sg; if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { @@ -1248,6 +1320,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, kunmap_atomic(data.bvec_data); } } + /* Add the persistent grant into the list of free grants */ for (i = 0; i < num_grant; i++) { if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { @@ -1337,9 +1410,22 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) } req = info->shadow[id].request; - if (bret->operation != BLKIF_OP_DISCARD) + if (bret->operation != BLKIF_OP_DISCARD) { + unsigned long id2 = info->shadow[id].associated_id; + + if (unlikely(id2 != ~0)) { + info->shadow[id].status = (bret->status == BLKIF_RSP_OKAY) ? REQ_DONE : REQ_FAIL; + + if (info->shadow[id2].status == REQ_WAITING) + continue; + } + blkif_completion(&info->shadow[id], info, bret); + if (unlikely(id2 != ~0)) + BUG_ON(add_id_to_freelist(info, id2)); + } + if (add_id_to_freelist(info, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", info->gd->disk_name, op_name(bret->operation), id); @@ -1874,7 +1960,13 @@ static int blkfront_setup_indirect(struct blkfront_info *info) xen_blkif_max_segments); grants = info->max_indirect_segments; } + psegs = grants / GRANTS_PER_PSEG; + if (!psegs) + { + psegs = 1; + grants = GRANTS_PER_PSEG; + } err = fill_grant_buffer(info, (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info)); -- Julien Grall _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |