[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] another xend race: qemu-dm versus device hotplug



We spawn the device model without waiting for devices, but that's
obviously bogus: qemu-dm may go ahead and use these devices even if
they're not ready.

I think this broke when the "fix" to not wait for devices with the
domain list lock held was committed.

The "obvious" solution is to just waitForDevices in image.py, but that
just introduces the long wait with the lock again.

Instead I introduced a new xenstore entry for synchronizing with
qemu-dm. Untested xen-unstable patch below. Obviously this isn't
suitable to merge at this late point, but I thought I'd bring this up
now.

regards
john

diff --git a/tools/ioemu/xenstore.c b/tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c
+++ b/tools/ioemu/xenstore.c
@@ -23,8 +23,13 @@ static char *media_filename[MAX_DISKS + 
 static char *media_filename[MAX_DISKS + MAX_SCSI_DISKS];
 static QEMUTimer *insert_timer = NULL;
 
-#define UWAIT_MAX (30*1000000) /* thirty seconds */
-#define UWAIT     (100000)     /* 1/10th second  */
+/*
+ * Wait for xend's timeout time (100 seconds), plus a little slop, so if
+ * we timeout, then xend will give up first (since qemu-dm has no
+ * sensible error reporting at all).
+ */
+#define DEVICE_CREATE_TIMEOUT (110*1000000)
+#define DEVICE_CREATE_INC     (100000)
 
 static int pasprintf(char **buf, const char *fmt, ...)
 {
@@ -63,20 +68,35 @@ void xenstore_check_new_media_present(in
     qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout);
 }
 
-static void waitForDevice(char *fn)
+/*
+ * Make sure that device hotplug is complete before we attempt to open
+ * any devices.  This is for *all* devices, not just block devices.
+ */
+static int waitForDevices(char *fepath)
 { 
-    struct stat sbuf;
-    int status;
-    int uwait = UWAIT_MAX;
+    int timeout = DEVICE_CREATE_TIMEOUT;
+    unsigned int len;
+    char *status = NULL;
+    char *node = NULL;
+    int ret = 0;
+   
+    if (pasprintf(&node, "%s/device-status", fepath) == -1)
+        goto out;
 
     do {
-        status = stat(fn, &sbuf);
-        if (!status) break;
-        usleep(UWAIT);
-        uwait -= UWAIT;
-    } while (uwait > 0);
-
-    return;
+        status = xs_read(xsh, XBT_NULL, node, &len);
+        if (status != NULL) {
+            ret = (strcmp(status, "connected") == 0);
+            goto out;
+        }
+        usleep(DEVICE_CREATE_INC);
+        timeout -= DEVICE_CREATE_INC;
+    } while (timeout > 0);
+
+out:
+    free(status);
+    free(node);
+    return ret;
 }
 
 #define DIRECT_PCI_STR_LEN 160
@@ -104,6 +124,11 @@ void xenstore_parse_domain_config(int hv
     path = xs_get_domain_path(xsh, hvm_domid);
     if (path == NULL) {
         fprintf(logfile, "xs_get_domain_path() error\n");
+        goto out;
+    }
+
+    if (!waitForDevices(path)) {
+        fprintf(logfile, "Timed out waiting for devices\n");
         goto out;
     }
 
@@ -223,13 +248,6 @@ void xenstore_parse_domain_config(int hv
                 continue;
             free(params);
             params = xs_read(xsh, XBT_NULL, buf , &len);
-            if (params) {
-                /* 
-                 * wait for device, on timeout silently fail because we will 
-                 * fail to open below
-                 */
-                waitForDevice(params);
-            }
         }
 
         bs = bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev);
diff --git a/tools/python/xen/xend/XendDomainInfo.py 
b/tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -834,12 +834,19 @@ class XendDomainInfo:
         return True
 
     def waitForDevices(self):
-        """Wait for this domain's configured devices to connect.
+        """Wait for this domain's configured devices to connect. Write
+        overall status to /device-status for qemu-dm's benefit.
 
         @raise VmError: if any device fails to initialise.
         """
-        for devclass in XendDevices.valid_devices():
-            self.getDeviceController(devclass).waitForDevices()
+        status = "error"
+        try:
+            for devclass in XendDevices.valid_devices():
+                self.getDeviceController(devclass).waitForDevices()
+            status = "connected"
+        finally:
+            self.storeDom("device-status", status)
+
 
     def hvm_destroyPCIDevice(self, vslot):
         log.debug("hvm_destroyPCIDevice called %s", vslot)
@@ -1771,6 +1778,7 @@ class XendDomainInfo:
             try:
                 new_dom = XendDomain.instance().domain_create_from_dict(
                     new_dom_info)
+                new_dom.storeDom("device-status", "connected")
                 for x in prev_vm_xend[0][1]:
                     new_dom._writeVm('xend/%s' % x[0], x[1])
                 new_dom.waitForDevices()

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.