Re: [Xen-devel] [PATCH] Use timeout on xenstore read_reply to av

To:	Ian Campbell <Ian.Campbell@xxxxxxxxxx>
Subject:	Re: [Xen-devel] [PATCH] Use timeout on xenstore read_reply to avoid task hunging
From:	Frank Pan <frankpzh@xxxxxxxxx>
Date:	Thu, 3 Mar 2011 11:31:33 +0800
Cc:	"xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Jeremy Fitzhardinge <Jeremy.Fitzhardinge@xxxxxxxxxx>
Delivery-date:	Wed, 02 Mar 2011 19:32:56 -0800
Dkim-signature:	v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:mime-version:in-reply-to:references:from:date :message-id:subject:to:cc:content-type:content-transfer-encoding; bh=6jKSr+KC95BfqB4xv3Gyfg+gBgwvGx9EiTfVeeEy/b8=; b=KrKW5GQpALRb6jeRqnXOFQfh1tT849oAM1Ej8/+Qe+8K6+knjxIYeHp5R6uwIDjmC+ ds53io6k70VEbZVMB896p+0qFhhdld8IJ8yplLuLbCB7c5ZguvhXBa2hPgAKCLTd1SGW RCs/PnhLv+cklbxCTr0CPrVtNCp7fSnHwP7rc=
Domainkey-signature:	a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=mime-version:in-reply-to:references:from:date:message-id:subject:to :cc:content-type:content-transfer-encoding; b=xiMnOUWSU4DJQZBMvGE9jS2PxGuMCcHg1oPSvaoisEAjpTdgXoSxmGBh26ej6YLAGa Wh3+6uf8EGEQxeNm7N1AoylbOobYTTCbjUMpuERKED8ZB4ByrrwZE11lt4wEF5F7YOBu fHBepRQuTZwJP839bTRpXhaKjFdGGAGH2Y5pM=
Envelope-to:	www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to:	<AANLkTim_L9Gkk7HYmSU9-ouEy+7Jo5uOZWbwx98T6eh+@xxxxxxxxxxxxxx>
List-help:	<mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id:	Xen developer discussion <xen-devel.lists.xensource.com>
List-post:	<mailto:xen-devel@lists.xensource.com>
List-subscribe:	<http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe:	<http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References:	<AANLkTimsaSKxCxRqBkpgVs30_jgdHrgw2ibFsU_gKCo6@xxxxxxxxxxxxxx> <1299062062.17907.1442.camel@xxxxxxxxxxxxxxxxxxxxxx> <AANLkTim_L9Gkk7HYmSU9-ouEy+7Jo5uOZWbwx98T6eh+@xxxxxxxxxxxxxx>
Sender:	xen-devel-bounces@xxxxxxxxxxxxxxxxxxx

Also post patch as content. Sorry for gmail's line wrapping.

>From 379c83fa345444b696b5ab96f55ef55a18a9f309 Mon Sep 17 00:00:00 2001
From: Frank Pan <frankpzh@xxxxxxxxx>
Date: Wed, 2 Mar 2011 17:52:52 +0800
Subject: [PATCH] Use timeout on xenstore read_reply to avoid task hunging.

---
 linux-2.6-xen/drivers/xen/xenbus/xenbus_xs.c |   57 ++++++++++++++++++-------
 1 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/linux-2.6-xen/drivers/xen/xenbus/xenbus_xs.c
b/linux-2.6-xen/drivers/xen/xenbus/xenbus_xs.c
index 5534690..4e66b30 100644
--- a/linux-2.6-xen/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen/drivers/xen/xenbus/xenbus_xs.c
@@ -73,6 +73,9 @@ struct xs_handle {
        spinlock_t reply_lock;
        wait_queue_head_t reply_waitq;

+       /* Sequence number of last request */
+       uint32_t reply_seq;
+
        /*
         * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
         * response_mutex is never taken simultaneously with the other three.
@@ -136,26 +139,45 @@ static int get_error(const char *errorstring)
        return xsd_errors[i].errnum;
 }

-static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
+#define XENSTORE_TIMEOUT (5*HZ)
+
+static void *read_reply(enum xsd_sockmsg_type *type, unsigned int
*len, uint32_t seq)
 {
-       struct xs_stored_msg *msg;
+       unsigned long remain_time = XENSTORE_TIMEOUT;
+       struct xs_stored_msg *msg = 0;
        char *body;

-       spin_lock(&xs_state.reply_lock);
-
-       while (list_empty(&xs_state.reply_list)) {
-               spin_unlock(&xs_state.reply_lock);
+       while (remain_time && !msg) {
                /* XXX FIXME: Avoid synchronous wait for response here. */
-               wait_event(xs_state.reply_waitq,
-                          !list_empty(&xs_state.reply_list));
+               remain_time = wait_event_timeout(xs_state.reply_waitq,
+                                                
!list_empty(&xs_state.reply_list),
+                                                remain_time);
+
                spin_lock(&xs_state.reply_lock);
-       }

-       msg = list_entry(xs_state.reply_list.next,
-                        struct xs_stored_msg, list);
-       list_del(&msg->list);
+               while (!list_empty(&xs_state.reply_list)) {
+                       msg = list_entry(xs_state.reply_list.next,
+                                        struct xs_stored_msg, list);
+                       list_del(&msg->list);
+
+                       /* Check sequence number */
+                       if (msg->hdr.req_id == seq)
+                               break;

-       spin_unlock(&xs_state.reply_lock);
+                       if (!IS_ERR(msg->u.reply.body))
+                               kfree(msg->u.reply.body);
+                       kfree(msg);
+                       msg = 0;
+               }
+
+               spin_unlock(&xs_state.reply_lock);
+       }
+
+       if (!msg) {
+               *type = XS_ERROR;
+               *len = 0;
+               return ERR_PTR(-EIO);
+       }

        *type = msg->hdr.type;
        if (len)
@@ -202,13 +224,14 @@ void *xenbus_dev_request_and_reply(struct
xsd_sockmsg *msg)
                transaction_start();

        mutex_lock(&xs_state.request_mutex);
+       msg->req_id = xs_state.reply_seq++;

        err = xb_write(msg, sizeof(*msg) + msg->len);
        if (err) {
                msg->type = XS_ERROR;
                ret = ERR_PTR(err);
        } else
-               ret = read_reply(&msg->type, &msg->len);
+               ret = read_reply(&msg->type, &msg->len, msg->req_id);

        mutex_unlock(&xs_state.request_mutex);

@@ -234,13 +257,13 @@ static void *xs_talkv(struct xenbus_transaction t,
        int err;

        msg.tx_id = t.id;
-       msg.req_id = 0;
        msg.type = type;
        msg.len = 0;
        for (i = 0; i < num_vecs; i++)
                msg.len += iovec[i].iov_len;

        mutex_lock(&xs_state.request_mutex);
+       msg.req_id = xs_state.reply_seq++;

        err = xb_write(&msg, sizeof(msg));
        if (err) {
@@ -256,7 +279,7 @@ static void *xs_talkv(struct xenbus_transaction t,
                }
        }

-       ret = read_reply(&msg.type, len);
+       ret = read_reply(&msg.type, len, msg.req_id);

        mutex_unlock(&xs_state.request_mutex);

@@ -872,6 +895,8 @@ int xs_init(void)
        int err;
        struct task_struct *task;

+       xs_state.reply_seq = 0;
+
        INIT_LIST_HEAD(&xs_state.reply_list);
        spin_lock_init(&xs_state.reply_lock);
        init_waitqueue_head(&xs_state.reply_waitq);
-- 
1.7.1


On Wed, Mar 2, 2011 at 7:35 PM, Frank Pan <frankpzh@xxxxxxxxx> wrote:
> Oh sorry.
> Patch attached.
>
> On Wed, Mar 2, 2011 at 6:34 PM, Ian Campbell <Ian.Campbell@xxxxxxxxxx> wrote:
>> It all sounds very plausible to me but you've forgotten the patch ;-)
>>
>> Why wait_event_timeout and not wait_event_interruptible to allow users
>> to interrupt? In particular I'm concerned about the arbitrarily chosen
>> 5s timeout not being sufficient on a busy system.
>
> FP: I wait_event_interruptible is a choice. But it needs user
> operation such as kill command. User-level tool(xenstore-ls for
> example) can also set SIGALRM or something else, but it sounds not so
> good.
>
> The timeout parameter is something discussible. 5s may not be a good
> one, but I believe xenstored on a healthy system should be response in
> 5s. What do you think?
>
>> Once specific pitfall which I remember was that userspace clients are at
>> liberty to make use of the req_id themselves (and some do). Fixing this
>> might involve shadowing the user provided req_id with a kernel generated
>> ID on the ring and unshadowing in the responses...
>
> FP: Yes, that's what I supposed to do. But I cannot find any
> dereference on the req_id section of the reply msg. If it exist
> somewhere, shadowing is surely needed.
>
> --
> Frank Pan
>
> Computer Science and Technology
> Tsinghua University
>



-- 
潘震皓, Frank Pan

Computer Science and Technology
Tsinghua University

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

WARNING - OLD ARCHIVES

xen-devel

Re: [Xen-devel] [PATCH] Use timeout on xenstore read_reply to avoid task