|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] vscsiif: allow larger segments-per-request values
On Tue, Nov 27, 2012 at 11:37:31AM +0000, Jan Beulich wrote:
> At least certain tape devices require fixed size blocks to be operated
> upon, i.e. breaking up of I/O requests is not permitted. Consequently
> we need an interface extension that (leaving aside implementation
> limitations) doesn't impose a limit on the number of segments that can
> be associated with an individual request.
>
> This, in turn, excludes the blkif extension FreeBSD folks implemented,
> as that still imposes an upper limit (the actual I/O request still
> specifies the full number of segments - as an 8-bit quantity -, and
> subsequent ring slots get used to carry the excess segment
> descriptors).
>
> The alternative therefore is to allow the frontend to pre-set segment
> descriptors _before_ actually issuing the I/O request. I/O will then
> be done by the backend for the accumulated set of segments.
How do you deal with migration to older backends?
>
> To properly associate segment preset operations with the main request,
> the rqid-s between them should match (originally I had hoped to use
> this to avoid producing individual responses for the pre-set
> operations, but that turned out to violate the underlying shared ring
> implementation).
Right. If we could seperate those two it would be solve that.
So seperate 'request' and 'response' ring.
>
> Negotiation of the maximum number of segments a particular backend
> implementation supports happens through a new "segs-per-req" xenstore
> node.
No 'feature-segs-per-req'?
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> As I have no plans to backport this to the 2.6.18 tree, I'm attaching
> for reference the full kernel side patch we're intending to use.
>
> --- a/xen/include/public/io/vscsiif.h
> +++ b/xen/include/public/io/vscsiif.h
> @@ -34,6 +34,7 @@
> #define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */
> #define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/
> #define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/
> +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */
>
> /*
> * Maximum scatter/gather segments per request.
> @@ -50,6 +51,12 @@
> #define VSCSIIF_MAX_COMMAND_SIZE 16
> #define VSCSIIF_SENSE_BUFFERSIZE 96
>
> +struct scsiif_request_segment {
> + grant_ref_t gref;
> + uint16_t offset;
> + uint16_t length;
> +};
> +typedef struct scsiif_request_segment vscsiif_segment_t;
>
> struct vscsiif_request {
> uint16_t rqid; /* private guest value, echoed in resp */
> @@ -66,18 +73,26 @@ struct vscsiif_request {
> DMA_NONE(3) requests */
> uint8_t nr_segments; /* Number of pieces of scatter-gather
> */
>
> - struct scsiif_request_segment {
> - grant_ref_t gref;
> - uint16_t offset;
> - uint16_t length;
> - } seg[VSCSIIF_SG_TABLESIZE];
> + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
> uint32_t reserved[3];
> };
> typedef struct vscsiif_request vscsiif_request_t;
>
> +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
> + / sizeof(vscsiif_segment_t))
> +
> +struct vscsiif_sg_list {
> + /* First two fields must match struct vscsiif_request! */
> + uint16_t rqid; /* private guest value, must match main req */
> + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */
> + uint8_t nr_segments; /* Number of pieces of scatter-gather */
> + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
> +};
> +typedef struct vscsiif_sg_list vscsiif_sg_list_t;
> +
> struct vscsiif_response {
> uint16_t rqid;
> - uint8_t padding;
> + uint8_t act; /* valid only when backend supports SG_PRESET
> */
> uint8_t sense_len;
> uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
> int32_t rslt;
>
>
>
> vscsiif: allow larger segments-per-request values
>
> At least certain tape devices require fixed size blocks to be operated
> upon, i.e. breaking up of I/O requests is not permitted. Consequently
> we need an interface extension that (leaving aside implementation
> limitations) doesn't impose a limit on the number of segments that can
> be associated with an individual request.
>
> This, in turn, excludes the blkif extension FreeBSD folks implemented,
> as that still imposes an upper limit (the actual I/O request still
> specifies the full number of segments - as an 8-bit quantity -, and
> subsequent ring slots get used to carry the excess segment
> descriptors).
>
> The alternative therefore is to allow the frontend to pre-set segment
> descriptors _before_ actually issuing the I/O request. I/O will then
> be done by the backend for the accumulated set of segments.
>
> To properly associate segment preset operations with the main request,
> the rqid-s between them should match (originally I had hoped to use
> this to avoid producing individual responses for the pre-set
> operations, but that turned out to violate the underlying shared ring
> implementation).
>
> Negotiation of the maximum number of segments a particular backend
> implementation supports happens through a new "segs-per-req" xenstore
> node.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> As I have no plans to backport this to the 2.6.18 tree, I'm attaching
> for reference the full kernel side patch we're intending to use.
>
> --- a/xen/include/public/io/vscsiif.h
> +++ b/xen/include/public/io/vscsiif.h
> @@ -34,6 +34,7 @@
> #define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */
> #define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/
> #define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/
> +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */
>
> /*
> * Maximum scatter/gather segments per request.
> @@ -50,6 +51,12 @@
> #define VSCSIIF_MAX_COMMAND_SIZE 16
> #define VSCSIIF_SENSE_BUFFERSIZE 96
>
> +struct scsiif_request_segment {
> + grant_ref_t gref;
> + uint16_t offset;
> + uint16_t length;
> +};
> +typedef struct scsiif_request_segment vscsiif_segment_t;
>
> struct vscsiif_request {
> uint16_t rqid; /* private guest value, echoed in resp */
> @@ -66,18 +73,26 @@ struct vscsiif_request {
> DMA_NONE(3) requests */
> uint8_t nr_segments; /* Number of pieces of scatter-gather
> */
>
> - struct scsiif_request_segment {
> - grant_ref_t gref;
> - uint16_t offset;
> - uint16_t length;
> - } seg[VSCSIIF_SG_TABLESIZE];
> + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
> uint32_t reserved[3];
> };
> typedef struct vscsiif_request vscsiif_request_t;
>
> +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
> + / sizeof(vscsiif_segment_t))
> +
> +struct vscsiif_sg_list {
> + /* First two fields must match struct vscsiif_request! */
> + uint16_t rqid; /* private guest value, must match main req */
> + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */
> + uint8_t nr_segments; /* Number of pieces of scatter-gather */
> + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
> +};
> +typedef struct vscsiif_sg_list vscsiif_sg_list_t;
> +
> struct vscsiif_response {
> uint16_t rqid;
> - uint8_t padding;
> + uint8_t act; /* valid only when backend supports SG_PRESET
> */
> uint8_t sense_len;
> uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
> int32_t rslt;
> --- sle11sp3.orig/drivers/xen/scsiback/common.h 2012-06-06
> 13:53:26.000000000 +0200
> +++ sle11sp3/drivers/xen/scsiback/common.h 2012-11-22 14:55:58.000000000
> +0100
> @@ -94,10 +94,15 @@ struct vscsibk_info {
> unsigned int waiting_reqs;
> struct page **mmap_pages;
>
> + struct pending_req *preq;
> +
> + union {
> + struct gnttab_map_grant_ref *gmap;
> + struct gnttab_unmap_grant_ref *gunmap;
> + };
> };
>
> -typedef struct {
> - unsigned char act;
> +typedef struct pending_req {
> struct vscsibk_info *info;
> struct scsi_device *sdev;
>
> @@ -114,7 +119,8 @@ typedef struct {
>
> uint32_t request_bufflen;
> struct scatterlist *sgl;
> - grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
> + grant_ref_t *gref;
> + vscsiif_segment_t *segs;
>
> int32_t rslt;
> uint32_t resid;
> @@ -123,7 +129,7 @@ typedef struct {
> struct list_head free_list;
> } pending_req_t;
>
> -
> +extern unsigned int vscsiif_segs;
>
> #define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
> #define scsiback_put(_b) \
> @@ -163,7 +169,7 @@ void scsiback_release_translation_entry(
>
> void scsiback_cmd_exec(pending_req_t *pending_req);
> void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
> - uint32_t resid, pending_req_t *pending_req);
> + uint32_t resid, pending_req_t *, uint8_t act);
> void scsiback_fast_flush_area(pending_req_t *req);
>
> void scsiback_rsp_emulation(pending_req_t *pending_req);
> --- sle11sp3.orig/drivers/xen/scsiback/emulate.c 2012-01-11
> 12:14:54.000000000 +0100
> +++ sle11sp3/drivers/xen/scsiback/emulate.c 2012-11-22 14:29:27.000000000
> +0100
> @@ -352,7 +352,9 @@ void scsiback_req_emulation_or_cmdexec(p
> else {
> scsiback_fast_flush_area(pending_req);
> scsiback_do_resp_with_sense(pending_req->sense_buffer,
> - pending_req->rslt, pending_req->resid, pending_req);
> + pending_req->rslt,
> + pending_req->resid, pending_req,
> + VSCSIIF_ACT_SCSI_CDB);
> }
> }
>
> --- sle11sp3.orig/drivers/xen/scsiback/interface.c 2011-10-10
> 11:58:37.000000000 +0200
> +++ sle11sp3/drivers/xen/scsiback/interface.c 2012-11-13 13:21:10.000000000
> +0100
> @@ -51,6 +51,13 @@ struct vscsibk_info *vscsibk_info_alloc(
> if (!info)
> return ERR_PTR(-ENOMEM);
>
> + info->gmap = kcalloc(max(sizeof(*info->gmap), sizeof(*info->gunmap)),
> + vscsiif_segs, GFP_KERNEL);
> + if (!info->gmap) {
> + kfree(info);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> info->domid = domid;
> spin_lock_init(&info->ring_lock);
> atomic_set(&info->nr_unreplied_reqs, 0);
> @@ -120,6 +127,7 @@ void scsiback_disconnect(struct vscsibk_
>
> void scsiback_free(struct vscsibk_info *info)
> {
> + kfree(info->gmap);
> kmem_cache_free(scsiback_cachep, info);
> }
>
> --- sle11sp3.orig/drivers/xen/scsiback/scsiback.c 2012-11-22
> 15:36:11.000000000 +0100
> +++ sle11sp3/drivers/xen/scsiback/scsiback.c 2012-11-22 15:36:16.000000000
> +0100
> @@ -56,6 +56,10 @@ int vscsiif_reqs = VSCSIIF_BACK_MAX_PEND
> module_param_named(reqs, vscsiif_reqs, int, 0);
> MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
>
> +unsigned int vscsiif_segs = VSCSIIF_SG_TABLESIZE;
> +module_param_named(segs, vscsiif_segs, uint, 0);
> +MODULE_PARM_DESC(segs, "Number of segments to allow per request");
> +
> static unsigned int log_print_stat = 0;
> module_param(log_print_stat, int, 0644);
>
> @@ -67,7 +71,7 @@ static grant_handle_t *pending_grant_han
>
> static int vaddr_pagenr(pending_req_t *req, int seg)
> {
> - return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
> + return (req - pending_reqs) * vscsiif_segs + seg;
> }
>
> static unsigned long vaddr(pending_req_t *req, int seg)
> @@ -82,7 +86,7 @@ static unsigned long vaddr(pending_req_t
>
> void scsiback_fast_flush_area(pending_req_t *req)
> {
> - struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
> + struct gnttab_unmap_grant_ref *unmap = req->info->gunmap;
> unsigned int i, invcount = 0;
> grant_handle_t handle;
> int err;
> @@ -117,6 +121,7 @@ static pending_req_t * alloc_req(struct
> if (!list_empty(&pending_free)) {
> req = list_entry(pending_free.next, pending_req_t, free_list);
> list_del(&req->free_list);
> + req->nr_segments = 0;
> }
> spin_unlock_irqrestore(&pending_free_lock, flags);
> return req;
> @@ -144,7 +149,8 @@ static void scsiback_notify_work(struct
> }
>
> void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
> - uint32_t resid, pending_req_t *pending_req)
> + uint32_t resid, pending_req_t *pending_req,
> + uint8_t act)
> {
> vscsiif_response_t *ring_res;
> struct vscsibk_info *info = pending_req->info;
> @@ -159,6 +165,7 @@ void scsiback_do_resp_with_sense(char *s
> ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
> info->ring.rsp_prod_pvt++;
>
> + ring_res->act = act;
> ring_res->rslt = result;
> ring_res->rqid = pending_req->rqid;
>
> @@ -186,7 +193,8 @@ void scsiback_do_resp_with_sense(char *s
> if (notify)
> notify_remote_via_irq(info->irq);
>
> - free_req(pending_req);
> + if (act != VSCSIIF_ACT_SCSI_SG_PRESET)
> + free_req(pending_req);
> }
>
> static void scsiback_print_status(char *sense_buffer, int errors,
> @@ -225,25 +233,25 @@ static void scsiback_cmd_done(struct req
> scsiback_rsp_emulation(pending_req);
>
> scsiback_fast_flush_area(pending_req);
> - scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
> + scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req,
> + VSCSIIF_ACT_SCSI_CDB);
> scsiback_put(pending_req->info);
>
> __blk_put_request(req->q, req);
> }
>
>
> -static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
> - pending_req_t *pending_req)
> +static int scsiback_gnttab_data_map(const vscsiif_segment_t *segs,
> + unsigned int nr_segs,
> + pending_req_t *pending_req)
> {
> u32 flags;
> - int write;
> - int i, err = 0;
> - unsigned int data_len = 0;
> - struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
> + int write, err = 0;
> + unsigned int i, j, data_len = 0;
> struct vscsibk_info *info = pending_req->info;
> -
> + struct gnttab_map_grant_ref *map = info->gmap;
> int data_dir = (int)pending_req->sc_data_direction;
> - unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
> + unsigned int nr_segments = pending_req->nr_segments + nr_segs;
>
> write = (data_dir == DMA_TO_DEVICE);
>
> @@ -264,14 +272,20 @@ static int scsiback_gnttab_data_map(vscs
> if (write)
> flags |= GNTMAP_readonly;
>
> - for (i = 0; i < nr_segments; i++)
> + for (i = 0; i < pending_req->nr_segments; i++)
> gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
> - ring_req->seg[i].gref,
> + pending_req->segs[i].gref,
> + info->domid);
> + for (j = 0; i < nr_segments; i++, j++)
> + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
> + segs[j].gref,
> info->domid);
>
> +
> err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map,
> nr_segments);
> BUG_ON(err);
>
> + j = 0;
> for_each_sg (pending_req->sgl, sg, nr_segments, i) {
> struct page *pg;
>
> @@ -294,8 +308,15 @@ static int scsiback_gnttab_data_map(vscs
> set_phys_to_machine(page_to_pfn(pg),
> FOREIGN_FRAME(map[i].dev_bus_addr >>
> PAGE_SHIFT));
>
> - sg_set_page(sg, pg, ring_req->seg[i].length,
> - ring_req->seg[i].offset);
> + if (i < pending_req->nr_segments)
> + sg_set_page(sg, pg,
> + pending_req->segs[i].length,
> + pending_req->segs[i].offset);
> + else {
> + sg_set_page(sg, pg, segs[j].length,
> + segs[j].offset);
> + ++j;
> + }
> data_len += sg->length;
>
> barrier();
> @@ -306,6 +327,8 @@ static int scsiback_gnttab_data_map(vscs
>
> }
>
> + pending_req->nr_segments = nr_segments;
> +
> if (err)
> goto fail_flush;
> }
> @@ -471,7 +494,8 @@ static void scsiback_device_reset_exec(p
> scsiback_get(info);
> err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
>
> - scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
> + scsiback_do_resp_with_sense(NULL, err, 0, pending_req,
> + VSCSIIF_ACT_SCSI_RESET);
> scsiback_put(info);
>
> return;
> @@ -489,13 +513,11 @@ static int prepare_pending_reqs(struct v
> {
> struct scsi_device *sdev;
> struct ids_tuple vir;
> + unsigned int nr_segs;
> int err = -EINVAL;
>
> DPRINTK("%s\n",__FUNCTION__);
>
> - pending_req->rqid = ring_req->rqid;
> - pending_req->act = ring_req->act;
> -
> pending_req->info = info;
>
> pending_req->v_chn = vir.chn = ring_req->channel;
> @@ -525,11 +547,10 @@ static int prepare_pending_reqs(struct v
> goto invalid_value;
> }
>
> - pending_req->nr_segments = ring_req->nr_segments;
> + nr_segs = ring_req->nr_segments;
> barrier();
> - if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
> - DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
> - pending_req->nr_segments);
> + if (pending_req->nr_segments + nr_segs > vscsiif_segs) {
> + DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs);
> err = -EINVAL;
> goto invalid_value;
> }
> @@ -546,7 +567,7 @@ static int prepare_pending_reqs(struct v
>
> pending_req->timeout_per_command = ring_req->timeout_per_command;
>
> - if(scsiback_gnttab_data_map(ring_req, pending_req)) {
> + if (scsiback_gnttab_data_map(ring_req->seg, nr_segs, pending_req)) {
> DPRINTK("scsiback: invalid buffer\n");
> err = -EINVAL;
> goto invalid_value;
> @@ -558,6 +579,20 @@ invalid_value:
> return err;
> }
>
> +static void latch_segments(pending_req_t *pending_req,
> + const struct vscsiif_sg_list *sgl)
> +{
> + unsigned int nr_segs = sgl->nr_segments;
> +
> + barrier();
> + if (pending_req->nr_segments + nr_segs <= vscsiif_segs) {
> + memcpy(pending_req->segs + pending_req->nr_segments,
> + sgl->seg, nr_segs * sizeof(*sgl->seg));
> + pending_req->nr_segments += nr_segs;
> + }
> + else
> + DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs);
> +}
>
> static int _scsiback_do_cmd_fn(struct vscsibk_info *info)
> {
> @@ -575,9 +610,11 @@ static int _scsiback_do_cmd_fn(struct vs
> rmb();
>
> while ((rc != rp)) {
> + int act, rqid;
> +
> if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
> break;
> - pending_req = alloc_req(info);
> + pending_req = info->preq ?: alloc_req(info);
> if (NULL == pending_req) {
> more_to_do = 1;
> break;
> @@ -586,32 +623,55 @@ static int _scsiback_do_cmd_fn(struct vs
> ring_req = RING_GET_REQUEST(ring, rc);
> ring->req_cons = ++rc;
>
> + act = ring_req->act;
> + rqid = ring_req->rqid;
> + barrier();
> + if (!pending_req->nr_segments)
> + pending_req->rqid = rqid;
> + else if (pending_req->rqid != rqid)
> + DPRINTK("scsiback: invalid rqid %04x, expected %04x\n",
> + rqid, pending_req->rqid);
> +
> + info->preq = NULL;
> + if (pending_req->rqid != rqid) {
> + scsiback_do_resp_with_sense(NULL, DRIVER_INVALID << 24,
> + 0, pending_req, act);
> + continue;
> + }
> +
> + if (act == VSCSIIF_ACT_SCSI_SG_PRESET) {
> + latch_segments(pending_req, (void *)ring_req);
> + info->preq = pending_req;
> + scsiback_do_resp_with_sense(NULL, 0, 0,
> + pending_req, act);
> + continue;
> + }
> +
> err = prepare_pending_reqs(info, ring_req,
> pending_req);
> if (err == -EINVAL) {
> scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
> - 0, pending_req);
> + 0, pending_req, act);
> continue;
> } else if (err == -ENODEV) {
> scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT <<
> 16),
> - 0, pending_req);
> + 0, pending_req, act);
> continue;
> }
>
> - if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) {
> -
> + if (act == VSCSIIF_ACT_SCSI_CDB) {
> /* The Host mode is through as for Emulation. */
> if (info->feature == VSCSI_TYPE_HOST)
> scsiback_cmd_exec(pending_req);
> else
> scsiback_req_emulation_or_cmdexec(pending_req);
>
> - } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) {
> + } else if (act == VSCSIIF_ACT_SCSI_RESET) {
> scsiback_device_reset_exec(pending_req);
> } else {
> pr_err("scsiback: invalid parameter for request\n");
> scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
> - 0, pending_req);
> + 0, pending_req, act);
> continue;
> }
> }
> @@ -673,17 +733,32 @@ static int __init scsiback_init(void)
> if (!is_running_on_xen())
> return -ENODEV;
>
> - mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
> + if (vscsiif_segs < VSCSIIF_SG_TABLESIZE)
> + vscsiif_segs = VSCSIIF_SG_TABLESIZE;
> + if (vscsiif_segs != (uint8_t)vscsiif_segs)
> + return -EINVAL;
> + mmap_pages = vscsiif_reqs * vscsiif_segs;
>
> pending_reqs = kzalloc(sizeof(pending_reqs[0]) *
> vscsiif_reqs, GFP_KERNEL);
> + if (!pending_reqs)
> + return -ENOMEM;
> pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
> mmap_pages, GFP_KERNEL);
> pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
>
> - if (!pending_reqs || !pending_grant_handles || !pending_pages)
> + if (!pending_grant_handles || !pending_pages)
> goto out_of_memory;
>
> + for (i = 0; i < vscsiif_reqs; ++i) {
> + pending_reqs[i].gref = kcalloc(sizeof(*pending_reqs->gref),
> + vscsiif_segs, GFP_KERNEL);
> + pending_reqs[i].segs = kcalloc(sizeof(*pending_reqs->segs),
> + vscsiif_segs, GFP_KERNEL);
> + if (!pending_reqs[i].gref || !pending_reqs[i].segs)
> + goto out_of_memory;
> + }
> +
> for (i = 0; i < mmap_pages; i++)
> pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
>
> @@ -705,6 +780,10 @@ static int __init scsiback_init(void)
> out_interface:
> scsiback_interface_exit();
> out_of_memory:
> + for (i = 0; i < vscsiif_reqs; ++i) {
> + kfree(pending_reqs[i].gref);
> + kfree(pending_reqs[i].segs);
> + }
> kfree(pending_reqs);
> kfree(pending_grant_handles);
> free_empty_pages_and_pagevec(pending_pages, mmap_pages);
> @@ -715,12 +794,17 @@ out_of_memory:
> #if 0
> static void __exit scsiback_exit(void)
> {
> + unsigned int i;
> +
> scsiback_xenbus_unregister();
> scsiback_interface_exit();
> + for (i = 0; i < vscsiif_reqs; ++i) {
> + kfree(pending_reqs[i].gref);
> + kfree(pending_reqs[i].segs);
> + }
> kfree(pending_reqs);
> kfree(pending_grant_handles);
> - free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs *
> VSCSIIF_SG_TABLESIZE));
> -
> + free_empty_pages_and_pagevec(pending_pages, vscsiif_reqs *
> vscsiif_segs);
> }
> #endif
>
> --- sle11sp3.orig/drivers/xen/scsiback/xenbus.c 2011-06-30
> 17:04:59.000000000 +0200
> +++ sle11sp3/drivers/xen/scsiback/xenbus.c 2012-11-13 14:36:16.000000000
> +0100
> @@ -339,6 +339,13 @@ static int scsiback_probe(struct xenbus_
> if (val)
> be->info->feature = VSCSI_TYPE_HOST;
>
> + if (vscsiif_segs > VSCSIIF_SG_TABLESIZE) {
> + err = xenbus_printf(XBT_NIL, dev->nodename, "segs-per-req",
> + "%u", vscsiif_segs);
> + if (err)
> + xenbus_dev_error(dev, err, "writing segs-per-req");
> + }
> +
> err = xenbus_switch_state(dev, XenbusStateInitWait);
> if (err)
> goto fail;
> --- sle11sp3.orig/drivers/xen/scsifront/common.h 2011-01-31
> 17:29:16.000000000 +0100
> +++ sle11sp3/drivers/xen/scsifront/common.h 2012-11-22 13:45:50.000000000
> +0100
> @@ -95,7 +95,7 @@ struct vscsifrnt_shadow {
>
> /* requested struct scsi_cmnd is stored from kernel */
> unsigned long req_scsi_cmnd;
> - int gref[VSCSIIF_SG_TABLESIZE];
> + int gref[SG_ALL];
> };
>
> struct vscsifrnt_info {
> @@ -110,7 +110,6 @@ struct vscsifrnt_info {
>
> grant_ref_t ring_ref;
> struct vscsiif_front_ring ring;
> - struct vscsiif_response ring_res;
>
> struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
> uint32_t shadow_free;
> @@ -119,6 +118,12 @@ struct vscsifrnt_info {
> wait_queue_head_t wq;
> unsigned int waiting_resp;
>
> + struct {
> + struct scsi_cmnd *sc;
> + unsigned int rqid;
> + unsigned int done;
> + vscsiif_segment_t segs[];
> + } active;
> };
>
> #define DPRINTK(_f, _a...) \
> --- sle11sp3.orig/drivers/xen/scsifront/scsifront.c 2011-06-28
> 18:57:14.000000000 +0200
> +++ sle11sp3/drivers/xen/scsifront/scsifront.c 2012-11-22
> 16:37:35.000000000 +0100
> @@ -106,6 +106,66 @@ irqreturn_t scsifront_intr(int irq, void
> return IRQ_HANDLED;
> }
>
> +static bool push_cmd_to_ring(struct vscsifrnt_info *info,
> + vscsiif_request_t *ring_req)
> +{
> + unsigned int left, rqid = info->active.rqid;
> + struct scsi_cmnd *sc;
> +
> + for (; ; ring_req = NULL) {
> + struct vscsiif_sg_list *sgl;
> +
> + if (!ring_req) {
> + struct vscsiif_front_ring *ring = &info->ring;
> +
> + ring_req = RING_GET_REQUEST(ring, ring->req_prod_pvt);
> + ring->req_prod_pvt++;
> + ring_req->rqid = rqid;
> + }
> +
> + left = info->shadow[rqid].nr_segments - info->active.done;
> + if (left <= VSCSIIF_SG_TABLESIZE)
> + break;
> +
> + sgl = (void *)ring_req;
> + sgl->act = VSCSIIF_ACT_SCSI_SG_PRESET;
> +
> + if (left > VSCSIIF_SG_LIST_SIZE)
> + left = VSCSIIF_SG_LIST_SIZE;
> + memcpy(sgl->seg, info->active.segs + info->active.done,
> + left * sizeof(*sgl->seg));
> +
> + sgl->nr_segments = left;
> + info->active.done += left;
> +
> + if (RING_FULL(&info->ring))
> + return false;
> + }
> +
> + sc = info->active.sc;
> +
> + ring_req->act = VSCSIIF_ACT_SCSI_CDB;
> + ring_req->id = sc->device->id;
> + ring_req->lun = sc->device->lun;
> + ring_req->channel = sc->device->channel;
> + ring_req->cmd_len = sc->cmd_len;
> +
> + if ( sc->cmd_len )
> + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
> + else
> + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
> +
> + ring_req->sc_data_direction = sc->sc_data_direction;
> + ring_req->timeout_per_command = sc->request->timeout / HZ;
> + ring_req->nr_segments = left;
> +
> + memcpy(ring_req->seg, info->active.segs + info->active.done,
> + left * sizeof(*ring_req->seg));
> +
> + info->active.sc = NULL;
> +
> + return !RING_FULL(&info->ring);
> +}
>
> static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id)
> {
> @@ -194,6 +254,16 @@ int scsifront_cmd_done(struct vscsifrnt_
>
> ring_res = RING_GET_RESPONSE(&info->ring, i);
>
> + if (info->host->sg_tablesize > VSCSIIF_SG_TABLESIZE) {
> + u8 act = ring_res->act;
> +
> + if (act == VSCSIIF_ACT_SCSI_SG_PRESET)
> + continue;
> + if (act != info->shadow[ring_res->rqid].act)
> + DPRINTK("Bogus backend response (%02x vs
> %02x)\n",
> + act, info->shadow[ring_res->rqid].act);
> + }
> +
> if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
> scsifront_cdb_cmd_done(info, ring_res);
> else
> @@ -208,8 +278,16 @@ int scsifront_cmd_done(struct vscsifrnt_
> info->ring.sring->rsp_event = i + 1;
> }
>
> - spin_unlock_irqrestore(&info->io_lock, flags);
> + spin_unlock(&info->io_lock);
> +
> + spin_lock(info->host->host_lock);
> +
> + if (info->active.sc && !RING_FULL(&info->ring)) {
> + push_cmd_to_ring(info, NULL);
> + scsifront_do_request(info);
> + }
>
> + spin_unlock_irqrestore(info->host->host_lock, flags);
>
> /* Yield point for this unbounded loop. */
> cond_resched();
> @@ -242,7 +320,8 @@ int scsifront_schedule(void *data)
>
>
> static int map_data_for_request(struct vscsifrnt_info *info,
> - struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
> + struct scsi_cmnd *sc,
> + struct vscsifrnt_shadow *shadow)
> {
> grant_ref_t gref_head;
> struct page *page;
> @@ -254,7 +333,7 @@ static int map_data_for_request(struct v
> if (sc->sc_data_direction == DMA_NONE)
> return 0;
>
> - err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
> + err = gnttab_alloc_grant_references(info->host->sg_tablesize,
> &gref_head);
> if (err) {
> pr_err("scsifront: gnttab_alloc_grant_references() error\n");
> return -ENOMEM;
> @@ -266,7 +345,7 @@ static int map_data_for_request(struct v
> unsigned int data_len = scsi_bufflen(sc);
>
> nr_pages = (data_len + sgl->offset + PAGE_SIZE - 1) >>
> PAGE_SHIFT;
> - if (nr_pages > VSCSIIF_SG_TABLESIZE) {
> + if (nr_pages > info->host->sg_tablesize) {
> pr_err("scsifront: Unable to map request_buffer for
> command!\n");
> ref_cnt = (-E2BIG);
> goto big_to_sg;
> @@ -294,10 +373,10 @@ static int map_data_for_request(struct v
> gnttab_grant_foreign_access_ref(ref,
> info->dev->otherend_id,
> buffer_pfn, write);
>
> - info->shadow[id].gref[ref_cnt] = ref;
> - ring_req->seg[ref_cnt].gref = ref;
> - ring_req->seg[ref_cnt].offset = (uint16_t)off;
> - ring_req->seg[ref_cnt].length =
> (uint16_t)bytes;
> + shadow->gref[ref_cnt] = ref;
> + info->active.segs[ref_cnt].gref = ref;
> + info->active.segs[ref_cnt].offset = off;
> + info->active.segs[ref_cnt].length = bytes;
>
> buffer_pfn++;
> len -= bytes;
> @@ -336,34 +415,27 @@ static int scsifront_queuecommand(struct
> return SCSI_MLQUEUE_HOST_BUSY;
> }
>
> + if (info->active.sc && !push_cmd_to_ring(info, NULL)) {
> + scsifront_do_request(info);
> + spin_unlock_irqrestore(shost->host_lock, flags);
> + return SCSI_MLQUEUE_HOST_BUSY;
> + }
> +
> sc->result = 0;
>
> ring_req = scsifront_pre_request(info);
> rqid = ring_req->rqid;
> - ring_req->act = VSCSIIF_ACT_SCSI_CDB;
> -
> - ring_req->id = sc->device->id;
> - ring_req->lun = sc->device->lun;
> - ring_req->channel = sc->device->channel;
> - ring_req->cmd_len = sc->cmd_len;
>
> BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
>
> - if ( sc->cmd_len )
> - memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
> - else
> - memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
> -
> - ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
> - ring_req->timeout_per_command = (sc->request->timeout / HZ);
> -
> info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc;
> info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
> - info->shadow[rqid].act = ring_req->act;
> + info->shadow[rqid].act = VSCSIIF_ACT_SCSI_CDB;
>
> - ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
> + ref_cnt = map_data_for_request(info, sc, &info->shadow[rqid]);
> if (ref_cnt < 0) {
> add_id_to_freelist(info, rqid);
> + scsifront_do_request(info);
> spin_unlock_irqrestore(shost->host_lock, flags);
> if (ref_cnt == (-ENOMEM))
> return SCSI_MLQUEUE_HOST_BUSY;
> @@ -372,9 +444,13 @@ static int scsifront_queuecommand(struct
> return 0;
> }
>
> - ring_req->nr_segments = (uint8_t)ref_cnt;
> info->shadow[rqid].nr_segments = ref_cnt;
>
> + info->active.sc = sc;
> + info->active.rqid = rqid;
> + info->active.done = 0;
> + push_cmd_to_ring(info, ring_req);
> +
> scsifront_do_request(info);
> spin_unlock_irqrestore(shost->host_lock, flags);
>
> --- sle11sp3.orig/drivers/xen/scsifront/xenbus.c 2012-10-02
> 14:32:45.000000000 +0200
> +++ sle11sp3/drivers/xen/scsifront/xenbus.c 2012-11-21 13:35:47.000000000
> +0100
> @@ -43,6 +43,10 @@
> #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN
> #endif
>
> +static unsigned int max_nr_segs = VSCSIIF_SG_TABLESIZE;
> +module_param_named(max_segs, max_nr_segs, uint, 0);
> +MODULE_PARM_DESC(max_segs, "Maximum number of segments per request");
> +
> extern struct scsi_host_template scsifront_sht;
>
> static void scsifront_free(struct vscsifrnt_info *info)
> @@ -181,7 +185,9 @@ static int scsifront_probe(struct xenbus
> int i, err = -ENOMEM;
> char name[DEFAULT_TASK_COMM_LEN];
>
> - host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
> + host = scsi_host_alloc(&scsifront_sht,
> + offsetof(struct vscsifrnt_info,
> + active.segs[max_nr_segs]));
> if (!host) {
> xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
> return err;
> @@ -223,7 +229,7 @@ static int scsifront_probe(struct xenbus
> host->max_id = VSCSIIF_MAX_TARGET;
> host->max_channel = 0;
> host->max_lun = VSCSIIF_MAX_LUN;
> - host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
> + host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
> host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
>
> err = scsi_add_host(host, &dev->dev);
> @@ -278,6 +284,23 @@ static int scsifront_disconnect(struct v
> return 0;
> }
>
> +static void scsifront_read_backend_params(struct xenbus_device *dev,
> + struct vscsifrnt_info *info)
> +{
> + unsigned int nr_segs;
> + int ret;
> + struct Scsi_Host *host = info->host;
> +
> + ret = xenbus_scanf(XBT_NIL, dev->otherend, "segs-per-req", "%u",
> + &nr_segs);
> + if (ret == 1 && nr_segs > host->sg_tablesize) {
> + host->sg_tablesize = min(nr_segs, max_nr_segs);
> + dev_info(&dev->dev, "using up to %d SG entries\n",
> + host->sg_tablesize);
> + host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
> + }
> +}
> +
> #define VSCSIFRONT_OP_ADD_LUN 1
> #define VSCSIFRONT_OP_DEL_LUN 2
>
> @@ -368,6 +391,7 @@ static void scsifront_backend_changed(st
> break;
>
> case XenbusStateConnected:
> + scsifront_read_backend_params(dev, info);
> if (xenbus_read_driver_state(dev->nodename) ==
> XenbusStateInitialised) {
> scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
> @@ -413,8 +437,13 @@ static DEFINE_XENBUS_DRIVER(scsifront, ,
> .otherend_changed = scsifront_backend_changed,
> );
>
> -int scsifront_xenbus_init(void)
> +int __init scsifront_xenbus_init(void)
> {
> + if (max_nr_segs > SG_ALL)
> + max_nr_segs = SG_ALL;
> + if (max_nr_segs < VSCSIIF_SG_TABLESIZE)
> + max_nr_segs = VSCSIIF_SG_TABLESIZE;
> +
> return xenbus_register_frontend(&scsifront_driver);
> }
>
> --- sle11sp3.orig/include/xen/interface/io/vscsiif.h 2008-07-21
> 11:00:33.000000000 +0200
> +++ sle11sp3/include/xen/interface/io/vscsiif.h 2012-11-22
> 14:32:31.000000000 +0100
> @@ -34,6 +34,7 @@
> #define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */
> #define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/
> #define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/
> +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */
>
>
> #define VSCSIIF_BACK_MAX_PENDING_REQS 128
> @@ -53,6 +54,12 @@
> #define VSCSIIF_MAX_COMMAND_SIZE 16
> #define VSCSIIF_SENSE_BUFFERSIZE 96
>
> +struct scsiif_request_segment {
> + grant_ref_t gref;
> + uint16_t offset;
> + uint16_t length;
> +};
> +typedef struct scsiif_request_segment vscsiif_segment_t;
>
> struct vscsiif_request {
> uint16_t rqid; /* private guest value, echoed in resp */
> @@ -69,18 +76,26 @@ struct vscsiif_request {
> DMA_NONE(3) requests */
> uint8_t nr_segments; /* Number of pieces of scatter-gather
> */
>
> - struct scsiif_request_segment {
> - grant_ref_t gref;
> - uint16_t offset;
> - uint16_t length;
> - } seg[VSCSIIF_SG_TABLESIZE];
> + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
> uint32_t reserved[3];
> };
> typedef struct vscsiif_request vscsiif_request_t;
>
> +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
> + / sizeof(vscsiif_segment_t))
> +
> +struct vscsiif_sg_list {
> + /* First two fields must match struct vscsiif_request! */
> + uint16_t rqid; /* private guest value, must match main req */
> + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */
> + uint8_t nr_segments; /* Number of pieces of scatter-gather */
> + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
> +};
> +typedef struct vscsiif_sg_list vscsiif_sg_list_t;
> +
> struct vscsiif_response {
> uint16_t rqid;
> - uint8_t padding;
> + uint8_t act; /* valid only when backend supports SG_PRESET
> */
> uint8_t sense_len;
> uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
> int32_t rslt;
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |