[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH]fix hang on migration



hi,

the migration process may be hang when you start another VM while the migration 
is under way.

Eg.

Start a VM:

# xm create OVM_EL5U3_X86_PVHVM_4GB/vm.cfg

Wait for the VM bootup, then start another VM while this VM is migration:

# xm migrate -l OVM_EL5U3_X86_PVHVM_4GB localhost &
# xm create OVM_EL5U1_X86_HVM_4GB/vm.cfg

The migration will hang on:

# xm list
Name                                        ID   Mem VCPUs      State   Time(s)
Domain-0                                     0   543     2     r-----    110.1
OVM_EL5U1_X86_HVM_4GB                        4   256     1     -b----     39.6
OVM_EL5U3_X86_PVHVM_4GB                      5   512     1     --p---      0.0

The migration will only finish after shutting down VM OVM_EL5U1_X86_HVM_4GB.

This is because we are using a threaded model in xend, and the migration thread 
is running
in the same context, and we are using pipes/sockets in migration.

So the write side of the pipe/socket created while migration will inherited to 
children, thus
the reading side of the pipe/socket will hang if there is a child forked and 
the child never end up
(like qemu-dm).

So we should close all inherited open file descriptors in every forked process.

Ian: please review the patch against qemu-dm forking.

Signed-off-by: Zhigang Wang <zhigang.x.wang@xxxxxxxxxx>
Reviewed-by: Xiaowei Hu <xiaowei.hu@xxxxxxxxxx>

thanks,

zhigang




diff -Nurap xen-unstable.orig/tools/python/xen/util/oshelp.py 
xen-unstable/tools/python/xen/util/oshelp.py
--- xen-unstable.orig/tools/python/xen/util/oshelp.py   2009-07-24 
16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/util/oshelp.py        2009-07-24 
16:52:32.000000000 +0800
@@ -1,6 +1,19 @@
 import fcntl
 import os
 
+def close_fds(pass_fds=()):
+    try:
+        MAXFD = os.sysconf('SC_OPEN_MAX')
+    except:
+        MAXFD = 256
+    for i in range(3, MAXFD):
+        if i in pass_fds:
+            continue
+        try:
+            os.close(i)
+        except OSError:
+            pass
+
 def fcntl_setfd_cloexec(file, bool):
         f = fcntl.fcntl(file, fcntl.F_GETFD)
         if bool: f |= fcntl.FD_CLOEXEC
diff -Nurap xen-unstable.orig/tools/python/xen/util/xpopen.py 
xen-unstable/tools/python/xen/util/xpopen.py
--- xen-unstable.orig/tools/python/xen/util/xpopen.py   2009-07-24 
16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/util/xpopen.py        2009-07-24 
17:17:28.000000000 +0800
@@ -85,7 +85,7 @@ class xPopen3:
 
     sts = -1                    # Child not completed yet
 
-    def __init__(self, cmd, capturestderr=False, bufsize=-1, passfd=()):
+    def __init__(self, cmd, capturestderr=False, bufsize=-1, passfd=(), 
env=None):
         """The parameter 'cmd' is the shell command to execute in a
         sub-process.  The 'capturestderr' flag, if true, specifies that
         the object should capture standard error output of the child process.
@@ -128,6 +128,10 @@ class xPopen3:
                 pass
         try:
             os.execvp(cmd[0], cmd)
+            if env is None:
+                os.execvp(cmd[0], cmd)
+            else:
+                os.execvpe(cmd[0], cmd, env)
         finally:
             os._exit(127)
 
@@ -154,16 +158,26 @@ class xPopen3:
         return self.sts
 
 
-def xpopen2(cmd, bufsize=-1, mode='t', passfd=[]):
+def xpopen2(cmd, bufsize=-1, mode='t', passfd=[], env=None):
     """Execute the shell command 'cmd' in a sub-process.  If 'bufsize' is
     specified, it sets the buffer size for the I/O pipes.  The file objects
     (child_stdout, child_stdin) are returned."""
-    inst = xPopen3(cmd, False, bufsize, passfd)
+    inst = xPopen3(cmd, False, bufsize, passfd, env)
     return inst.fromchild, inst.tochild
 
-def xpopen3(cmd, bufsize=-1, mode='t', passfd=[]):
+def xpopen3(cmd, bufsize=-1, mode='t', passfd=[], env=None):
     """Execute the shell command 'cmd' in a sub-process.  If 'bufsize' is
     specified, it sets the buffer size for the I/O pipes.  The file objects
     (child_stdout, child_stdin, child_stderr) are returned."""
-    inst = xPopen3(cmd, True, bufsize, passfd)
+    inst = xPopen3(cmd, True, bufsize, passfd, env)
     return inst.fromchild, inst.tochild, inst.childerr
+
+def call(*popenargs, **kwargs):
+    """Run command with arguments.  Wait for command to complete, then
+    return the status.
+
+    The arguments are the same as for the xPopen3 constructor.  Example:
+
+    status = call("ls -l")
+    """
+    return xPopen3(*popenargs, **kwargs).wait()
diff -Nurap xen-unstable.orig/tools/python/xen/util/xsm/acm/acm.py 
xen-unstable/tools/python/xen/util/xsm/acm/acm.py
--- xen-unstable.orig/tools/python/xen/util/xsm/acm/acm.py      2009-07-24 
16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/util/xsm/acm/acm.py   2009-07-24 
17:27:30.000000000 +0800
@@ -31,7 +31,7 @@ from xen.xend import XendConstants
 from xen.xend import XendOptions
 from xen.xend.XendLogging import log
 from xen.xend.XendError import VmError
-from xen.util import dictio, xsconstants, auxbin
+from xen.util import dictio, xsconstants, auxbin, xpopen
 from xen.xend.XendConstants import *
 
 #global directories and tools for security management
@@ -1710,7 +1710,7 @@ def run_resource_label_change_script(res
                 log.info("Running resource label change script %s: %s" %
                          (script, parms))
                 parms.update(os.environ)
-                os.spawnve(os.P_WAIT, script[0], script, parms)
+                xpopen.call(" ".join(script, params))
         else:
             log.info("No script given for relabeling of resources.")
     if not __script_runner:
diff -Nurap xen-unstable.orig/tools/python/xen/xend/image.py 
xen-unstable/tools/python/xen/xend/image.py
--- xen-unstable.orig/tools/python/xen/xend/image.py    2009-07-24 
16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/xend/image.py 2009-07-27 17:37:20.000000000 
+0800
@@ -438,9 +438,7 @@ class ImageHandler:
                 os.dup2(null, 0)
                 os.dup2(logfd, 1)
                 os.dup2(logfd, 2)
-                os.close(null)
-                os.close(logfd)
-                self.sentinel_fifo.close()
+                oshelp.close_fds((sentinel_write.fileno(),))
                 try:
                     os.execve(self.device_model, args, env)
                 except Exception, e:
diff -Nurap xen-unstable.orig/tools/python/xen/xend/server/BlktapController.py 
xen-unstable/tools/python/xen/xend/server/BlktapController.py
--- xen-unstable.orig/tools/python/xen/xend/server/BlktapController.py  
2009-07-24 16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/xend/server/BlktapController.py       
2009-07-24 17:40:15.000000000 +0800
@@ -1,9 +1,9 @@
 # Copyright (c) 2005, XenSource Ltd.
 import string, re
-import popen2
 
 from xen.xend.server.blkif import BlkifController
 from xen.xend.XendLogging import log
+from xen.util.xpopen import xPopen3
 
 phantomDev = 0;
 phantomId = 0;
@@ -34,7 +34,7 @@ blktap_disk_types = blktap1_disk_types +
 
 def doexec(args, inputtext=None):
     """Execute a subprocess, then return its return code, stdout and stderr"""
-    proc = popen2.Popen3(args, True)
+    proc = xPopen3(args, True)
     if inputtext != None:
         proc.tochild.write(inputtext)
     stdout = proc.fromchild
diff -Nurap xen-unstable.orig/tools/python/xen/xend/Vifctl.py 
xen-unstable/tools/python/xen/xend/Vifctl.py
--- xen-unstable.orig/tools/python/xen/xend/Vifctl.py   2009-07-24 
16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/xend/Vifctl.py        2009-07-24 
17:26:38.000000000 +0800
@@ -18,10 +18,9 @@
 
 """Xend interface to networking control scripts.
 """
-import os
 
 import XendOptions
-
+from xen.util import xpopen
 
 def network(op):
     """Call a network control script.
@@ -33,4 +32,4 @@ def network(op):
     script = XendOptions.instance().get_network_script()
     if script:
         script.insert(1, op)
-        os.spawnv(os.P_WAIT, script[0], script)
+        xpopen.call(script)
diff -Nurap xen-unstable.orig/tools/python/xen/xend/XendBootloader.py 
xen-unstable/tools/python/xen/xend/XendBootloader.py
--- xen-unstable.orig/tools/python/xen/xend/XendBootloader.py   2009-07-24 
16:07:59.000000000 +0800
+++ xen-unstable/tools/python/xen/xend/XendBootloader.py        2009-07-24 
17:43:22.000000000 +0800
@@ -17,7 +17,7 @@ import random
 import shlex
 from xen.xend import sxp
 
-from xen.util import mkdir
+from xen.util import mkdir, oshelp
 from XendLogging import log
 from XendError import VmError
 
@@ -113,6 +113,7 @@ def bootloader(blexec, disk, dom, quiet 
             log.debug("Launching bootloader as %s." % str(args))
             env = os.environ.copy()
             env['TERM'] = 'vt100'
+            oshelp.close_fds()
             os.execvpe(args[0], args, env)
         except OSError, e:
             print e
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.