WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] - fix/improve error handling for failed suspend/migr

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] - fix/improve error handling for failed suspend/migrate
From: Steven Hand <Steven.Hand@xxxxxxxxxxxx>
Date: Thu, 08 May 2008 14:14:34 +0100
Cc: Steven.Hand@xxxxxxxxxxxx
Delivery-date: Thu, 08 May 2008 06:14:56 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This has been broken since cset 16964:5d84464dc1fc
Also deal better with very early errors (close sender side socket)

Signed-off-by: Steven Hand <steven.hand@xxxxxxxxxxxx>


diff -r b0d7780794eb tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu May 08 13:40:40 2008 +0100
+++ b/tools/python/xen/xend/XendCheckpoint.py   Thu May 08 14:08:39 2008 +0100
@@ -81,8 +81,6 @@ def save(fd, dominfo, network, live, dst
     # thing is useful for debugging.
     dominfo.setName('migrating-' + domain_name)
 
-    done_suspend = 0
-
     try:
         dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
 
@@ -110,7 +108,6 @@ def save(fd, dominfo, network, live, dst
                 log.debug("Suspending %d ...", dominfo.getDomid())
                 dominfo.shutdown('suspend')
                 dominfo.waitForShutdown()
-                done_suspend = 1
                 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
                                        domain_name)
                 log.info("Domain %d suspended.", dominfo.getDomid())
@@ -154,16 +151,9 @@ def save(fd, dominfo, network, live, dst
             pass
 
     except Exception, exn:
-        log.exception("Save failed on domain %s (%s).", domain_name,
+        log.exception("Save failed on domain %s (%s) - resuming.", domain_name,
                       dominfo.getDomid())
-        
-        # If we didn't get as far as suspending the domain (for
-        # example, we couldn't balloon enough memory for the new
-        # domain), then we don't want to re-plumb the devices, as the
-        # domU will not be expecting it.
-        if done_suspend:
-            log.debug("XendCheckpoint.save: resumeDomain")
-            dominfo.resumeDomain()
+        dominfo.resumeDomain()
  
         try:
             dominfo.setName(domain_name)
diff -r b0d7780794eb tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu May 08 13:40:40 2008 +0100
+++ b/tools/python/xen/xend/XendDomain.py       Thu May 08 14:05:56 2008 +0100
@@ -1308,8 +1308,10 @@ class XendDomain:
 
         sock.send("receive\n")
         sock.recv(80)
-        XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node)
-        sock.close()
+        try:
+            XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, 
node=node)
+        finally:
+            sock.close()
 
     def domain_save(self, domid, dst, checkpoint=False):
         """Start saving a domain to file.
diff -r b0d7780794eb tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu May 08 13:40:40 2008 +0100
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu May 08 14:07:20 2008 +0100
@@ -2378,8 +2378,19 @@ class XendDomainInfo:
     def resumeDomain(self):
         log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid))
 
-        if self.domid is None:
+        # resume a suspended domain (e.g. after live checkpoint, or after
+        # a later error during save or migate); checks that the domain
+        # is currently suspended first so safe to call from anywhere
+
+        xeninfo = dom_get(self.domid)
+        if xeninfo is None: 
             return
+        if not xeninfo['shutdown']:
+            return
+        reason = shutdown_reason(xeninfo['shutdown_reason'])
+        if reason != 'suspend':
+            return
+
         try:
             # could also fetch a parsed note from xenstore
             fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] - fix/improve error handling for failed suspend/migrate, Steven Hand <=