[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 10/10] tools/xenstored: Delay new transaction while Live-Update is pending


  • To: Julien Grall <julien@xxxxxxx>
  • From: Luca Fancellu <luca.fancellu@xxxxxxx>
  • Date: Mon, 21 Jun 2021 10:30:41 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=8ASjjV7ehB6D0BbhjjaMU4QkbWhPk+IeC69pZ9yy5qI=; b=V6EpkMTKzR8MniK4yjMa5vFsfWKxElGeJGkJFOBDfOUv6WKEbSSwlmI3rCsZXhwSaM91gAfkLI8UfrpkkrnJH0lzuRE84/9gcbyHIWSETOuYmXjdksoJgC2LUbu0FBRxAS/yAjhB+nLtSm8U17PsVlMb0qPeJh3n8AYEjH63rSFkwXWLZXTLo69BHMy9mPXJjd4tWV96mvG4qvNl+DV03ViB7F2Qqfdah8genJzXbWnhs1GKmrLU1NWgWnlNrKP8ZSBWlf1Y4d0VwiZoTPEMC+7WrBt2Tl+AZdbv5OaMo+Mb7VB96zQvMs1k2iGdLG95P2VEaOvKaf5gGRT2q8qkSg==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=esS0ioUkbx7dNMuJHTEtHolhLDUuUOt9v3MZyajYPY9v7/QTshbVp1yBOgfrfF5T2/i5iPmdyF6TSlo8vVniiTBjGx2/ZozPqvkyGewbKUTS+jQbQjtSJQlSN1ixPfGJkZXWYAmrO7OCmYAnxH5lL3QkFjYT7MROT9PauYP2s13xHOAZEzn1d30fVpOVkd/wUooHTn8paifzbwDU4oRS33mT5/iGlLgIDTWVB/RGQogOg5JbMO205iEANl0mOdNrt8qKiEIPGosrpJHreBJSCVFM1ESs+jXU93FxmRZ8UNwun/QnifqfYQq+xW6H19oFal8+Mc2v2oiHTDbUvjixIw==
  • Authentication-results-original: xen.org; dkim=none (message not signed) header.d=none;xen.org; dmarc=none action=none header.from=arm.com;
  • Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx, raphning@xxxxxxxxxxxx, doebel@xxxxxxxxx, Julien Grall <jgrall@xxxxxxxxxx>, Ian Jackson <iwj@xxxxxxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Juergen Gross <jgross@xxxxxxxx>
  • Delivery-date: Mon, 21 Jun 2021 09:31:35 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Nodisclaimer: true
  • Original-authentication-results: xen.org; dkim=none (message not signed) header.d=none;xen.org; dmarc=none action=none header.from=arm.com;


> On 16 Jun 2021, at 15:43, Julien Grall <julien@xxxxxxx> wrote:
> 
> From: Julien Grall <jgrall@xxxxxxxxxx>
> 
> At the moment, Live-Update will, by default, not proceed if there are
> in-flight transactions. It is possible force it by passing -F but this
> will break any connection with in-flight transactions.
> 
> There are PV drivers out that may never terminate some transaction. On
> host running such guest, we would need to use -F. Unfortunately, this
> also risks to break well-behaving guests (and even dom0) because
> Live-Update will happen as soon as the timeout is hit.
> 
> Ideally, we would want to preserve transactions but this requires
> some work and a lot of testing to be able to use it in production.
> 
> As a stop gap, we want to limit the damage of -F. This patch will delay
> any transactions that are started after Live-Update has been requested.
> 
> If the request cannot be delayed, the connection will be stalled to
> avoid loosing requests.
> 
> If the connection has already a pending transaction before Live-Update,
> then new transaction will not be delayed. This is to avoid the connection
> to stall.
> 
> With this stop gap in place, domains with long running transactions will
> still break when using -F, but other domains which starts a transaction
> in the middle of Live-Update will continue to work.
> 
> Signed-off-by: Julien Grall <jgrall@xxxxxxxxxx>

Reviewed-by: Luca Fancellu <luca.fancellu@xxxxxxx>

> ---
> tools/xenstore/xenstored_control.c | 10 ++++++
> tools/xenstore/xenstored_control.h |  2 ++
> tools/xenstore/xenstored_core.c    | 49 +++++++++++++++++++++++++++++-
> tools/xenstore/xenstored_core.h    |  3 ++
> 4 files changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/xenstore/xenstored_control.c 
> b/tools/xenstore/xenstored_control.c
> index 1c24d4869eab..a045f102a420 100644
> --- a/tools/xenstore/xenstored_control.c
> +++ b/tools/xenstore/xenstored_control.c
> @@ -131,6 +131,11 @@ unsigned int lu_write_response(FILE *fp)
>       return sizeof(msg) + msg.len;
> }
> 
> +bool lu_is_pending(void)
> +{
> +     return lu_status != NULL;
> +}
> +
> #else
> struct connection *lu_get_connection(void)
> {
> @@ -142,6 +147,11 @@ unsigned int lu_write_response(FILE *fp)
>       /* Unsupported */
>       return 0;
> }
> +
> +bool lu_is_pending(void)
> +{
> +     return false;
> +}
> #endif
> 
> struct cmd_s {
> diff --git a/tools/xenstore/xenstored_control.h 
> b/tools/xenstore/xenstored_control.h
> index 27d7f19e4b7f..98b6fbcea2b1 100644
> --- a/tools/xenstore/xenstored_control.h
> +++ b/tools/xenstore/xenstored_control.h
> @@ -23,3 +23,5 @@ struct connection *lu_get_connection(void);
> 
> /* Write the "OK" response for the live-update command */
> unsigned int lu_write_response(FILE *fp);
> +
> +bool lu_is_pending(void);
> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
> index 9eca58682b51..10b53af76ac5 100644
> --- a/tools/xenstore/xenstored_core.c
> +++ b/tools/xenstore/xenstored_core.c
> @@ -338,7 +338,20 @@ static int destroy_conn(void *_conn)
> 
> static bool conn_can_read(struct connection *conn)
> {
> -     return conn->funcs->can_read(conn) && !conn->is_ignored;
> +     if (!conn->funcs->can_read(conn))
> +             return false;
> +
> +     if (conn->is_ignored)
> +             return false;
> +
> +     /*
> +      * For stalled connection, we want to process the pending
> +      * command as soon as live-update has aborted.
> +      */
> +     if (conn->is_stalled)
> +             return !lu_is_pending();
> +
> +     return true;
> }
> 
> static bool conn_can_write(struct connection *conn)
> @@ -417,6 +430,12 @@ static void initialize_fds(int *p_sock_pollfd_idx, int 
> *ptimeout)
>                       if (!list_empty(&conn->out_list))
>                               events |= POLLOUT;
>                       conn->pollfd_idx = set_fd(conn->fd, events);
> +                     /*
> +                      * For stalled connection, we want to process the
> +                      * pending command as soon as live-update has aborted.
> +                      */
> +                     if (conn->is_stalled && !lu_is_pending())
> +                             *ptimeout = 0;
>               }
>       }
> }
> @@ -1524,6 +1543,9 @@ static bool process_delayed_message(struct 
> delayed_request *req)
>       struct connection *conn = req->data;
>       struct buffered_data *saved_in = conn->in;
> 
> +     if (lu_is_pending())
> +             return false;
> +
>       /*
>        * Part of process_message() expects conn->in to contains the
>        * processed response. So save the current conn->in and restore it
> @@ -1543,6 +1565,30 @@ static void consider_message(struct connection *conn)
>                       sockmsg_string(conn->in->hdr.msg.type),
>                       conn->in->hdr.msg.len, conn);
> 
> +     conn->is_stalled = false;
> +     /*
> +      * Currently, Live-Update is not supported if there is active
> +      * transactions. In order to reduce the number of retry, delay
> +      * any new request to start a transaction if Live-Update is pending
> +      * and there are no transactions in-flight.
> +      *
> +      * If we can't delay the request, then mark the connection as
> +      * stalled. This will ignore new requests until Live-Update happened
> +      * or it was aborted.
> +      */
> +     if (lu_is_pending() && conn->transaction_started == 0 &&
> +         conn->in->hdr.msg.type == XS_TRANSACTION_START) {
> +             trace("Delaying transaction start for connection %p req_id 
> %u\n",
> +                   conn, conn->in->hdr.msg.req_id);
> +
> +             if (delay_request(conn, conn->in, process_delayed_message,
> +                               conn, false) != 0) {
> +                     trace("Stalling connection %p\n", conn);
> +                     conn->is_stalled = true;
> +             }
> +             return;
> +     }
> +
>       process_message(conn, conn->in);
> 
>       assert(conn->in == NULL);
> @@ -1629,6 +1675,7 @@ struct connection *new_connection(const struct 
> interface_funcs *funcs)
>       new->pollfd_idx = -1;
>       new->funcs = funcs;
>       new->is_ignored = false;
> +     new->is_stalled = false;
>       new->transaction_started = 0;
>       INIT_LIST_HEAD(&new->out_list);
>       INIT_LIST_HEAD(&new->watches);
> diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
> index dac517156993..258f6ff38279 100644
> --- a/tools/xenstore/xenstored_core.h
> +++ b/tools/xenstore/xenstored_core.h
> @@ -110,6 +110,9 @@ struct connection
>       /* Is this connection ignored? */
>       bool is_ignored;
> 
> +     /* Is the connection stalled? */
> +     bool is_stalled;
> +
>       /* Buffered incoming data. */
>       struct buffered_data *in;
> 
> -- 
> 2.17.1
> 
> 




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.