WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] Require that xenstored writes to a domain complete i

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] Require that xenstored writes to a domain complete in a single chunk
From: David Edmondson <dme@xxxxxxx>
Date: Mon, 26 Feb 2007 16:24:23 +0000
Cancel-lock: sha1:m217nwzsu46bAYTPrG/IG32ZhNw=
Delivery-date: Mon, 26 Feb 2007 08:21:54 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Organization: Sun Microsystems Ltd.
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
If xenstored is part-way through writing a header+payload into the
buffer shared with a guest domain when the guest domain decides to
suspend, the buffer is corrupted, as xenstored doesn't know that it
has a partial write to complete when the domain revives.  The domain
is expecting proper completion of the partial header+payload and is
disappointed.

The attached patch modifies xenstored such that it checks for
sufficient space for header+payload before making any changes to the
shared buffer.

It is against 3.0.4-1, but the code in unstable looks the same.

Require that writes to a domain complete in a single chunk (i.e
both header and payload are written at the same time) to avoid leaving
the ring in an inconsistent state across suspend/resume.

Signed-off-by: David Edmondson <dme@xxxxxxx>

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -264,10 +264,17 @@ static bool write_messages(struct connec
 {
        int ret;
        struct buffered_data *out;
+       unsigned int space_required = 0;
+       bool r = false;
 
        out = list_top(&conn->out_list, struct buffered_data, list);
        if (out == NULL)
                return true;
+
+       if (out->inhdr)
+               space_required += sizeof (out->hdr);
+       space_required += out->hdr.msg.len;
+       space_required -= out->used;
 
        if (out->inhdr) {
                if (verbose)
@@ -277,36 +284,59 @@ static bool write_messages(struct connec
                                out->buffer, conn);
                ret = conn->write(conn, out->hdr.raw + out->used,
                                  sizeof(out->hdr) - out->used);
-               if (ret < 0)
-                       return false;
-
+               if (ret < 0) {
+                       r = false;
+                       goto done;
+               }
+
+               space_required -= ret;
                out->used += ret;
-               if (out->used < sizeof(out->hdr))
-                       return true;
+               if (out->used < sizeof(out->hdr)) {
+                       r = true;
+                       goto done;
+               }
 
                out->inhdr = false;
                out->used = 0;
 
                /* Second write might block if non-zero. */
-               if (out->hdr.msg.len && !conn->domain)
-                       return true;
+               if ((out->hdr.msg.len != 0) &&
+                   (conn->domain == NULL)) {
+                       r = true;
+                       goto done;
+               }
        }
 
        ret = conn->write(conn, out->buffer + out->used,
                          out->hdr.msg.len - out->used);
-       if (ret < 0)
-               return false;
-
+       if (ret < 0) {
+               r = false;
+               goto done;
+       }
+
+       space_required -= ret;
        out->used += ret;
-       if (out->used != out->hdr.msg.len)
-               return true;
-
+       if (out->used != out->hdr.msg.len) {
+               r = true;
+               goto done;
+       }
+
+       r = true;
        trace_io(conn, "OUT", out);
 
        list_del(&out->list);
        talloc_free(out);
 
-       return true;
+done:
+       if ((conn->domain != NULL) && (space_required != 0))
+               /*
+                * Not all of the data was consumed.  This can leave
+                * the shared ring pointers in an inconsistent state
+                * across suspend/resume.
+                */
+               eprintf("partial write to shared page of domain");
+
+       return r;
 }
 
 static int destroy_conn(void *_conn)
@@ -1997,7 +2027,7 @@ int main(int argc, char *argv[])
                                goto more;
                        }
 
-                       if (domain_can_write(i) && !list_empty(&i->out_list)) {
+                       if (domain_can_write(i)) {
                                handle_output(i);
                                goto more;
                        }
diff --git a/tools/xenstore/xenstored_domain.c 
b/tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -109,23 +109,29 @@ static int writechn(struct connection *c
        void *dest;
        struct xenstore_domain_interface *intf = conn->domain->interface;
        XENSTORE_RING_IDX cons, prod;
-
-       /* Must read indexes once, and before anything else, and verified. */
-       cons = intf->rsp_cons;
-       prod = intf->rsp_prod;
-       mb();
-       if (!check_indexes(cons, prod)) {
-               errno = EIO;
-               return -1;
-       }
-
-       dest = get_output_chunk(cons, prod, intf->rsp, &avail);
-       if (avail < len)
-               len = avail;
-
-       memcpy(dest, data, len);
-       mb();
-       intf->rsp_prod += len;
+       unsigned int remain = len;
+
+       while (remain > 0) {
+               /* Must read indexes once, and before anything else,
+                * and verified. */
+               cons = intf->rsp_cons;
+               prod = intf->rsp_prod;
+               mb();
+               if (!check_indexes(cons, prod)) {
+                       errno = EIO;
+                       return -1;
+               }
+
+               dest = get_output_chunk(cons, prod, intf->rsp, &avail);
+               if (avail > remain)
+                       avail = remain;
+               memcpy(dest, data, avail);
+               data += avail;
+               remain -= avail;
+
+               mb();
+               intf->rsp_prod += avail;
+       }
 
        xc_evtchn_notify(xce_handle, conn->domain->port);
 
@@ -236,7 +242,30 @@ bool domain_can_write(struct connection 
 bool domain_can_write(struct connection *conn)
 {
        struct xenstore_domain_interface *intf = conn->domain->interface;
-       return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
+       struct buffered_data *out;
+       unsigned int space_required = 0, space_available;
+
+       out = list_top(&conn->out_list, struct buffered_data, list);
+       if (out == NULL)
+               return false;
+
+       if (out->inhdr)
+               space_required += sizeof (out->hdr);
+       space_required += out->hdr.msg.len;
+       space_required -= out->used;
+
+       if (space_required > XENSTORE_RING_SIZE) {
+               eprintf("attempt to write %d bytes "
+                       "to domain %d will never succeed",
+                       space_required, conn->domain->domid);
+               talloc_free(conn);
+               return false;
+       }
+
+       space_available = XENSTORE_RING_SIZE -
+               (intf->rsp_prod - intf->rsp_cons);
+
+       return (space_available >= space_required);
 }
 
 static char *talloc_domain_path(void *context, unsigned int domid)
dme.
-- 
David Edmondson, Sun Microsystems, http://www.dme.org
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel