[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] initial kdump support for domU, and xswatch question


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: Olaf Hering <olaf@xxxxxxxxx>
  • Date: Fri, 21 Jan 2011 17:09:19 +0100
  • Delivery-date: Fri, 21 Jan 2011 08:13:31 -0800
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

This week I worked on kdump support with SLES11SP1 as dom0/domU.
I came up with the patch below which works ok in my testing. There is
also a kernel patch, which is not finished yet (proper crashkernel
detection missing).

During my testing and "fine-tuning" I came across an issue that I havent
figured out yet:
In the added function _handleCrashDumpWatch() a new watch on
${backend}/state should be registered. This watch does never trigger for
some reason, even though the values do change. Is registering a watch
within a watch supposed to work?  I would like to send an event from
_handleCrashDumpWatchCallback() when the backend switched state to avoid
a hardcoded sleep, similar to what the hotplug scripts do.
Any ideas what is wrong with my attempt?

Olaf


 tools/python/xen/xend/XendDomainInfo.py |  107 ++++++++++++++

--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -35,6 +35,7 @@ import stat
 import shutil
 import traceback
 from types import StringTypes
+from threading import Event
 
 import xen.lowlevel.xc
 from xen.util import asserts, auxbin, mkdir
@@ -2391,6 +2392,96 @@ class XendDomainInfo:
         return self.getDeviceController(deviceClass).reconfigureDevice(
             devid, devconfig)
 
+    def _handleCrashDumpWatchCallback(self, arg, ev):
+        log.debug("_handleCrashDumpWatchCallback called with '%s'" % arg)
+        v = None
+        try:
+            v = xstransact.Read(arg)
+        except:
+            log.exception("_handleCrashDumpWatchCallback exception")
+            ev.set()
+            return False
+        if not int(v) == 4:
+            ev.set()
+        return True
+
+    # reset all devices where frontend and backend is in state 
XenbusStateConnected
+    # protocol:
+    # initial value is 0
+    # crashed guest writes 1, this function resets all devices
+    # this function writes 2, to notify the guest about the finished reset 
process
+    def _handleCrashDumpWatch(self, arg):
+        log.debug("_handleCrashDumpWatch called with '%s'" % arg)
+        v = None
+        delay = 3.0
+        try:
+            v = xstransact.Read(arg)
+        except:
+            log.exception("_handleCrashDumpWatch exception")
+        log.debug("_handleCrashDumpWatch trigger value %s" % v)
+        if not int(v) == 1:
+            return True
+        try:
+            ev = Event()
+            t = xstransact("%s/device" % self.vmpath)
+            try:
+                for devclass in XendDevices.valid_devices():
+                    for dev in t.list(devclass):
+                        self.crashWatchCallback = backend = frontend = f_state 
= b_state = None
+                        try:
+                            log.debug("Reading dev %s", dev)
+                            frontend = xstransact.Read("%s/device/%s" % 
(self.vmpath, dev), "frontend")
+                            f_state = xstransact.Read(frontend, "state")
+                            backend = xstransact.Read("%s/device/%s" % 
(self.vmpath, dev), "backend")
+                            b_state = xstransact.Read(backend, "state")
+                            log.debug('backend %s(%s) frontend %s(%s)', 
backend, b_state, frontend, f_state)
+                        except:
+                            log.exception("Reading frontend/backend state 
failed: %s; %s; %s",
+                                          self.info['name_label'],
+                                          devclass, dev)
+                            pass
+                        try:
+                            # XenbusStateConnected
+                            if b_state == "4" and f_state == "4":
+                                ev.clear()
+                                self.crashWatchCallback = xswatch(backend + 
'/state', self._handleCrashDumpWatchCallback, ev)
+                                # XenbusStateClosing
+                                log.debug("Set %s to XenbusStateClosing", 
frontend)
+                                xstransact.Write(frontend, "state", "5")
+                                ev.wait(delay)
+                                b_state = xstransact.Read(backend, "state")
+                                log.debug('backend %s/state == %s', backend, 
b_state)
+                                # XenbusStateClosed
+                                log.debug("Set %s to XenbusStateClosed", 
frontend)
+                                xstransact.Write(frontend, "state", "6")
+                                ev.wait(delay)
+                                b_state = xstransact.Read(backend, "state")
+                                log.debug('backend %s/state == %s', backend, 
b_state)
+                                # XenbusStateInitialising
+                                log.debug("Set %s to XenbusStateInitialising", 
frontend)
+                                xstransact.Write(frontend, "state", "1")
+                                ev.wait(delay)
+                                b_state = xstransact.Read(backend, "state")
+                                log.debug('backend %s/state == %s', backend, 
b_state)
+                                if self.crashWatchCallback:
+                                    try:
+                                        self.crashWatchCallback.unwatch()
+                                    except:
+                                        pass
+                        except:
+                            log.debug("state write failed for %s" % frontend)
+                            pass
+            finally:
+                t.abort()
+                log.debug("_handleCrashDumpWatch notify guest")
+                xstransact.Write(self.dompath, 'device-reset-trigger', "2")
+        except:
+            log.debug("_handleCrashDumpWatch exception")
+            pass
+        log.debug("_handleCrashDumpWatch done")
+        # release this watch
+        return False
+
     def _createDevices(self, resume = False):
         """Create the devices for a vm.
 
@@ -2439,6 +2530,12 @@ class XendDomainInfo:
                     self.info['devices'][dev_uuid][1]['devid'] = devid
 
 
+        xstransact.Write(self.dompath, 'device-reset-trigger', "0")
+        xstransact.SetPermissions(self.dompath + '/device-reset-trigger',
+                                     { 'dom': self.getDomid(), 'read': True, 
'write': True })
+        self.crashWatch = xswatch(self.dompath + '/device-reset-trigger',
+                                     self._handleCrashDumpWatch)
+
         if self.image:
             self.image.createDeviceModel(resume)
             self.image.createXenPaging()
@@ -2479,6 +2576,16 @@ class XendDomainInfo:
         finally:
             t.abort()
 
+        try:
+            if self.crashWatch:
+                try:
+                    self.crashWatch.unwatch()
+                except:
+                    pass
+        finally:
+            self.crashWatch = None
+
+
     def getDeviceController(self, name):
         """Get the device controller for this domain, and if it
         doesn't exist, create it.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.