WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] Fix xend xenstore handling

To: "Daniel P. Berrange" <berrange@xxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] Fix xend xenstore handling
From: John Levon <levon@xxxxxxxxxxxxxxxxx>
Date: Fri, 21 Dec 2007 15:27:13 +0000
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Fri, 21 Dec 2007 07:28:05 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
In-reply-to: <20071221150803.GA5345@xxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <20071221145726.GA16971@xxxxxxxxxxxxxxxxxxxxxxx> <20071221150803.GA5345@xxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.9i
On Fri, Dec 21, 2007 at 03:08:03PM +0000, Daniel P. Berrange wrote:

> > This is the cause of the "reboot loop" xend failures I reported earlier.
> > Note that I've only tested this patch against 3.1, however the code,
> > and the fix, is the same in unstable. I realise it's late in the cycle
> > but this bug is bad enough to need fixing IMHO.
> 
> I think the entire code segment from the point at which xstransact() is
> created needs to be in a try...finally   block to be safe against the
> code throwing exceptions, otherwise you could very ocassionally get the
> final xs.abort() being missed in error conditions.

OK, how about this one? (I'm starting testing it now)

regards
john

# HG changeset patch
# User john.levon@xxxxxxx
# Date 1198250774 28800
# Node ID c847f62cbad09ccbf0ba63ee30204228c16d60fb
# Parent  5092403708afc964a908cf1a193f3fc68fb4b950
Fix xend xenstore handling.

xend can get into a situation where two processes are attempting to
interact with the xenstore socket, with disastrous results. Fix the two
bad users of xstransact, add a big warning, and fix the destructor so
future mistakes will be detected earlier.

Signed-off-by: John Levon <john.levon@xxxxxxx>

diff --git a/tools/python/xen/xend/XendDomainInfo.py 
b/tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1529,18 +1529,19 @@ class XendDomainInfo:
 
         log.debug("Releasing devices")
         t = xstransact("%s/device" % self.dompath)
-        for devclass in XendDevices.valid_devices():
-            for dev in t.list(devclass):
-                try:
-                    log.debug("Removing %s", dev);
-                    self.destroyDevice(devclass, dev, False);
-                except:
-                    # Log and swallow any exceptions in removal --
-                    # there's nothing more we can do.
+        try:
+            for devclass in XendDevices.valid_devices():
+                for dev in t.list(devclass):
+                    try:
+                        log.debug("Removing %s", dev);
+                        self.destroyDevice(devclass, dev, False);
+                    except:
+                        # Log and swallow any exceptions in removal --
+                        # there's nothing more we can do.
                         log.exception("Device release failed: %s; %s; %s",
                                       self.info['name_label'], devclass, dev)
-
-            
+        finally:
+            t.abort()
 
     def getDeviceController(self, name):
         """Get the device controller for this domain, and if it
@@ -1852,16 +1853,18 @@ class XendDomainInfo:
         # build list of phantom devices to be removed after normal devices
         plist = []
         if self.domid is not None:
-            from xen.xend.xenstore.xstransact import xstransact
             t = xstransact("%s/device/vbd" % GetDomainPath(self.domid))
-            for dev in t.list():
-                backend_phantom_vbd = 
xstransact.Read("%s/device/vbd/%s/phantom_vbd" \
-                                      % (self.dompath, dev))
-                if backend_phantom_vbd is not None:
-                    frontend_phantom_vbd =  xstransact.Read("%s/frontend" \
-                                      % backend_phantom_vbd)
-                    plist.append(backend_phantom_vbd)
-                    plist.append(frontend_phantom_vbd)
+            try:
+                for dev in t.list():
+                    backend_phantom_vbd = 
xstransact.Read("%s/device/vbd/%s/phantom_vbd" \
+                                          % (self.dompath, dev))
+                    if backend_phantom_vbd is not None:
+                        frontend_phantom_vbd =  xstransact.Read("%s/frontend" \
+                                          % backend_phantom_vbd)
+                        plist.append(backend_phantom_vbd)
+                        plist.append(frontend_phantom_vbd)
+            finally:
+                t.abort()
         return plist
 
     def _cleanup_phantom_devs(self, plist):
diff --git a/tools/python/xen/xend/server/pciif.py 
b/tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py
+++ b/tools/python/xen/xend/server/pciif.py
@@ -22,8 +22,6 @@ from xen.xend import sxp
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
-
-from xen.xend.xenstore.xstransact import xstransact
 
 from xen.xend.server.DevController import DevController
 
diff --git a/tools/python/xen/xend/xenstore/xstransact.py 
b/tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py
+++ b/tools/python/xen/xend/xenstore/xstransact.py
@@ -7,8 +7,16 @@
 
 from xen.xend.xenstore.xsutil import xshandle
 
+class xstransact:
+    """WARNING: Be very careful if you're instantiating an xstransact object
+       yourself (i.e. not using the capitalized static helpers like .Read().
+       It is essential that you clean up the object in place via
+       t.commit/abort(): GC can happen at any time, including contexts where
+       it's not safe to to use the shared xenstore socket fd. In particular,
+       if xend forks, and GC occurs, we can have two processes trying to
+       use the same xenstore fd, and all hell breaks loose.
+       """
 
-class xstransact:
 
     def __init__(self, path = ""):
         
@@ -22,8 +30,9 @@ class xstransact:
         self.in_transaction = True
 
     def __del__(self):
+        # see above.
         if self.in_transaction:
-            xshandle().transaction_end(self.transaction, True)
+            raise RuntimeError("ERROR: GC of live transaction")
 
     def commit(self):
         if not self.in_transaction:

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel