[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH]fix hang on migration



sorry ;-). attached this time.

Keir Fraser wrote:
> Patch not attached.
> 
> 
> On 29/07/2009 08:50, "Zhigang Wang" <zhigang.x.wang@xxxxxxxxxx> wrote:
> 
>> Hi Keir,
>>
>> The hang happens often when you runs many VM simultaneously. So I'd like
>> this patch go to 3.4-testing as well.
>>
>> This is the regenerated patch against xen-3.4-testing.
>>
>> Signed-off-by: Zhigang Wang <zhigang.x.wang@xxxxxxxxxx>
>> Reviewed-by: Xiaowei Hu <xiaowei.hu@xxxxxxxxxx>
>>
>> thanks,
>>
>> zhigang
>>
>> Zhigang Wang wrote:
>>> hi,
>>>
>>> the migration process may be hang when you start another VM while the
>>> migration is under way.
>>>
>>> Eg.
>>>
>>> Start a VM:
>>>
>>> # xm create OVM_EL5U3_X86_PVHVM_4GB/vm.cfg
>>>
>>> Wait for the VM bootup, then start another VM while this VM is migration:
>>>
>>> # xm migrate -l OVM_EL5U3_X86_PVHVM_4GB localhost &
>>> # xm create OVM_EL5U1_X86_HVM_4GB/vm.cfg
>>>
>>> The migration will hang on:
>>>
>>> # xm list
>>> Name                                        ID   Mem VCPUs      State
>>> Time(s)
>>> Domain-0                                     0   543     2     r-----
>>> 110.1
>>> OVM_EL5U1_X86_HVM_4GB                        4   256     1     -b----
>>> 39.6
>>> OVM_EL5U3_X86_PVHVM_4GB                      5   512     1     --p---
>>> 0.0
>>>
>>> The migration will only finish after shutting down VM OVM_EL5U1_X86_HVM_4GB.
>>>
>>> This is because we are using a threaded model in xend, and the migration
>>> thread is running
>>> in the same context, and we are using pipes/sockets in migration.
>>>
>>> So the write side of the pipe/socket created while migration will inherited
>>> to children, thus
>>> the reading side of the pipe/socket will hang if there is a child forked and
>>> the child never end up
>>> (like qemu-dm).
>>>
>>> So we should close all inherited open file descriptors in every forked
>>> process.
>>>
>>> Ian: please review the patch against qemu-dm forking.
>>>
>>> Signed-off-by: Zhigang Wang <zhigang.x.wang@xxxxxxxxxx>
>>> Reviewed-by: Xiaowei Hu <xiaowei.hu@xxxxxxxxxx>
>>>
>>> thanks,
>>>
>>> zhigang
>>>
>>>
>>>
>>>
>>>
>>>
>>> ------------------------------------------------------------------------
>>>
>>> _______________________________________________
>>> Xen-devel mailing list
>>> Xen-devel@xxxxxxxxxxxxxxxxxxx
>>> http://lists.xensource.com/xen-devel
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel

diff -Nurap xen-3.4-testing.orig/tools/python/xen/util/oshelp.py 
xen-3.4-testing/tools/python/xen/util/oshelp.py
--- xen-3.4-testing.orig/tools/python/xen/util/oshelp.py        2009-07-29 
15:05:37.000000000 +0800
+++ xen-3.4-testing/tools/python/xen/util/oshelp.py     2009-07-29 
15:14:25.000000000 +0800
@@ -1,6 +1,19 @@
 import fcntl
 import os
 
+def close_fds(pass_fds=()):
+    try:
+        MAXFD = os.sysconf('SC_OPEN_MAX')
+    except:
+        MAXFD = 256
+    for i in range(3, MAXFD):
+        if i in pass_fds:
+            continue
+        try:
+            os.close(i)
+        except OSError:
+            pass
+
 def fcntl_setfd_cloexec(file, bool):
         f = fcntl.fcntl(file, fcntl.F_GETFD)
         if bool: f |= fcntl.FD_CLOEXEC
diff -Nurap xen-3.4-testing.orig/tools/python/xen/util/xpopen.py 
xen-3.4-testing/tools/python/xen/util/xpopen.py
--- xen-3.4-testing.orig/tools/python/xen/util/xpopen.py        2009-07-29 
15:05:37.000000000 +0800
+++ xen-3.4-testing/tools/python/xen/util/xpopen.py     2009-07-29 
15:14:25.000000000 +0800
@@ -85,7 +85,7 @@ class xPopen3:
 
     sts = -1                    # Child not completed yet
 
-    def __init__(self, cmd, capturestderr=False, bufsize=-1, passfd=()):
+    def __init__(self, cmd, capturestderr=False, bufsize=-1, passfd=(), 
env=None):
         """The parameter 'cmd' is the shell command to execute in a
         sub-process.  The 'capturestderr' flag, if true, specifies that
         the object should capture standard error output of the child process.
@@ -128,6 +128,10 @@ class xPopen3:
                 pass
         try:
             os.execvp(cmd[0], cmd)
+            if env is None:
+                os.execvp(cmd[0], cmd)
+            else:
+                os.execvpe(cmd[0], cmd, env)
         finally:
             os._exit(127)
 
@@ -154,16 +158,26 @@ class xPopen3:
         return self.sts
 
 
-def xpopen2(cmd, bufsize=-1, mode='t', passfd=[]):
+def xpopen2(cmd, bufsize=-1, mode='t', passfd=[], env=None):
     """Execute the shell command 'cmd' in a sub-process.  If 'bufsize' is
     specified, it sets the buffer size for the I/O pipes.  The file objects
     (child_stdout, child_stdin) are returned."""
-    inst = xPopen3(cmd, False, bufsize, passfd)
+    inst = xPopen3(cmd, False, bufsize, passfd, env)
     return inst.fromchild, inst.tochild
 
-def xpopen3(cmd, bufsize=-1, mode='t', passfd=[]):
+def xpopen3(cmd, bufsize=-1, mode='t', passfd=[], env=None):
     """Execute the shell command 'cmd' in a sub-process.  If 'bufsize' is
     specified, it sets the buffer size for the I/O pipes.  The file objects
     (child_stdout, child_stdin, child_stderr) are returned."""
-    inst = xPopen3(cmd, True, bufsize, passfd)
+    inst = xPopen3(cmd, True, bufsize, passfd, env)
     return inst.fromchild, inst.tochild, inst.childerr
+
+def call(*popenargs, **kwargs):
+    """Run command with arguments.  Wait for command to complete, then
+    return the status.
+
+    The arguments are the same as for the xPopen3 constructor.  Example:
+
+    status = call("ls -l")
+    """
+    return xPopen3(*popenargs, **kwargs).wait()
diff -Nurap xen-3.4-testing.orig/tools/python/xen/util/xsm/acm/acm.py 
xen-3.4-testing/tools/python/xen/util/xsm/acm/acm.py
--- xen-3.4-testing.orig/tools/python/xen/util/xsm/acm/acm.py   2009-07-29 
15:05:37.000000000 +0800
+++ xen-3.4-testing/tools/python/xen/util/xsm/acm/acm.py        2009-07-29 
15:15:04.000000000 +0800
@@ -31,7 +31,7 @@ from xen.xend import XendConstants
 from xen.xend import XendOptions
 from xen.xend.XendLogging import log
 from xen.xend.XendError import VmError
-from xen.util import dictio, xsconstants
+from xen.util import dictio, xsconstants, xpopen
 from xen.xend.XendConstants import *
 
 #global directories and tools for security management
@@ -1710,7 +1710,7 @@ def run_resource_label_change_script(res
                 log.info("Running resource label change script %s: %s" %
                          (script, parms))
                 parms.update(os.environ)
-                os.spawnve(os.P_WAIT, script[0], script, parms)
+                xpopen.call(" ".join(script, params))
         else:
             log.info("No script given for relabeling of resources.")
     if not __script_runner:
diff -Nurap xen-3.4-testing.orig/tools/python/xen/xend/image.py 
xen-3.4-testing/tools/python/xen/xend/image.py
--- xen-3.4-testing.orig/tools/python/xen/xend/image.py 2009-07-29 
15:05:37.000000000 +0800
+++ xen-3.4-testing/tools/python/xen/xend/image.py      2009-07-29 
15:14:25.000000000 +0800
@@ -423,9 +423,7 @@ class ImageHandler:
                 os.dup2(null, 0)
                 os.dup2(logfd, 1)
                 os.dup2(logfd, 2)
-                os.close(null)
-                os.close(logfd)
-                self.sentinel_fifo.close()
+                oshelp.close_fds((sentinel_write.fileno(),))
                 try:
                     os.execve(self.device_model, args, env)
                 except Exception, e:
diff -Nurap xen-3.4-testing.orig/tools/python/xen/xend/Vifctl.py 
xen-3.4-testing/tools/python/xen/xend/Vifctl.py
--- xen-3.4-testing.orig/tools/python/xen/xend/Vifctl.py        2009-07-29 
15:05:37.000000000 +0800
+++ xen-3.4-testing/tools/python/xen/xend/Vifctl.py     2009-07-29 
15:14:25.000000000 +0800
@@ -18,10 +18,9 @@
 
 """Xend interface to networking control scripts.
 """
-import os
 
 import XendOptions
-
+from xen.util import xpopen
 
 def network(op):
     """Call a network control script.
@@ -33,4 +32,4 @@ def network(op):
     script = XendOptions.instance().get_network_script()
     if script:
         script.insert(1, op)
-        os.spawnv(os.P_WAIT, script[0], script)
+        xpopen.call(script)
diff -Nurap xen-3.4-testing.orig/tools/python/xen/xend/XendBootloader.py 
xen-3.4-testing/tools/python/xen/xend/XendBootloader.py
--- xen-3.4-testing.orig/tools/python/xen/xend/XendBootloader.py        
2009-07-29 15:05:37.000000000 +0800
+++ xen-3.4-testing/tools/python/xen/xend/XendBootloader.py     2009-07-29 
15:14:25.000000000 +0800
@@ -17,7 +17,7 @@ import random
 import shlex
 from xen.xend import sxp
 
-from xen.util import mkdir
+from xen.util import mkdir, oshelp
 from XendLogging import log
 from XendError import VmError
 
@@ -113,6 +113,7 @@ def bootloader(blexec, disk, dom, quiet 
             log.debug("Launching bootloader as %s." % str(args))
             env = os.environ.copy()
             env['TERM'] = 'vt100'
+            oshelp.close_fds()
             os.execvpe(args[0], args, env)
         except OSError, e:
             print e
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.