[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 6/7] XendCheckpoint: implement colo
In colo mode, XendCheckpoit.py will communicate with both master and xc_restore. This patch implements this communication. In colo mode, the signature is "GuestColoRestore". Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx> Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- tools/python/xen/xend/XendCheckpoint.py | 138 +++++++++++++++++++++++--------- 1 file changed, 101 insertions(+), 37 deletions(-) diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py index fa09757..261d9d1 100644 --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -25,6 +25,7 @@ from xen.xend.XendConstants import * from xen.xend import XendNode SIGNATURE = "LinuxGuestRecord" +COLO_SIGNATURE = "GuestColoRestore" QEMU_SIGNATURE = "QemuDeviceModelRecord" dm_batch = 512 XC_SAVE = "xc_save" @@ -203,10 +204,15 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): signature = read_exact(fd, len(SIGNATURE), "not a valid guest state file: signature read") - if signature != SIGNATURE: + if signature != SIGNATURE and signature != COLO_SIGNATURE: raise XendError("not a valid guest state file: found '%s'" % signature) + if signature == COLO_SIGNATURE: + colo = True + else + colo = False + l = read_exact(fd, sizeof_int, "not a valid guest state file: config size read") vmconfig_size = unpack("!i", l)[0] @@ -305,6 +311,7 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() + restore_handler = RestoreHandler(fd, colo, dominfo, inputHandler) forkHelper(cmd, fd, handler.handler, True) @@ -321,35 +328,9 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): raise XendError('Could not read store MFN') if not is_hvm and handler.console_mfn is None: - raise XendError('Could not read console MFN') - - restore_image.setCpuid() - - # xc_restore will wait for source to close connection - - dominfo.completeRestore(handler.store_mfn, handler.console_mfn) - - # - # We shouldn't hold the domains_lock over a waitForDevices - # As this function sometime gets called holding this lock, - # we must release it and re-acquire it appropriately - # - from xen.xend import XendDomain + raise XendError('Could not read console MFN') - lock = True; - try: - XendDomain.instance().domains_lock.release() - except: - lock = False; - - try: - dominfo.waitForDevices() # Wait for backends to set up - finally: - if lock: - XendDomain.instance().domains_lock.acquire() - - if not paused: - dominfo.unpause() + restorehandler.resume(True, paused, None) return dominfo except Exception, exn: @@ -358,23 +339,106 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): raise exn +class RestoreHandler: + def __init__(self, fd, colo, dominfo, inputHandler): + self.fd = fd + self.colo = colo + self.firsttime = True + self.inputHandler = inputHandler + self.dominfo = dominfo + + def resume(self, finish, paused, child): + fd = self.fd + dominfo = self.dominfo + handler = self.inputHandler + restore_image.setCpuid() + dominfo.completeRestore(handler.store_mfn, handler.console_mfn) + + if self.colo and not finish: + # notify master that checkpoint finishes + write_exact(fd, "finish", "failed to write finish done") + buf = read_exact(fd, 6, "failed to read resume flag") + if buf != "resume": + return False + + from xen.xend import XendDomain + + if self.firsttime: + lock = True; + try: + XendDomain.instance().domains_lock.release() + except: + lock = False; + + try: + dominfo.waitForDevices() # Wait for backends to set up + finally: + if lock: + XendDomain.instance().domains_lock.acquire() + if not paused: + dominfo.unpause() + else: + # colo + xc.domain_resume(dominfo.domid, 0) + ResumeDomain(dominfo.domid) + + if self.colo and not finish: + child.tochild.write("resume\n") + child.tochild.flush() + buf = child.fromchild.readline() + if buf != "resume\n": + return False + if self.firsttime: + util.runcmd("/etc/xen/scripts/HA_fw_runtime.sh slaver") + # notify master side VM resumed + write_exact(fd, "resume", "failed to write resume done"); + + # wait new checkpoint + buf = read_exact(fd, 8, "failed to read continue flag") + if buf != "continue": + return False + + child.tochild.write("suspend\n") + buf = child.fromchild.readline() + if buf != "suspend\n": + return False + + # notify master side suspend done. + write_exact(fd, "suspend", "failed to write suspend done") + buf = read_exact(fd, 5, "failed to read start flag") + if buf != "start": + return False + + child.tochild.write("start\n") + child.tochild.flush() + + self.firsttime = False + class RestoreInputHandler: - def __init__(self): + def __init__(self, colo): self.store_mfn = None self.console_mfn = None - def handler(self, line, _): + def handler(self, line, child, restorehandler): + if line == "finish\n": + # colo + return restorehandler.resume(False, False, child) + m = re.match(r"^(store-mfn) (\d+)$", line) if m: self.store_mfn = int(m.group(2)) - else: - m = re.match(r"^(console-mfn) (\d+)$", line) - if m: - self.console_mfn = int(m.group(2)) + return True + + m = re.match(r"^(console-mfn) (\d+)$", line) + if m: + self.console_mfn = int(m.group(2)) + return True + + return False -def forkHelper(cmd, fd, inputHandler, closeToChild): +def forkHelper(cmd, fd, inputHandler, closeToChild, restorehandler): child = xPopen3(cmd, True, -1, [fd]) if closeToChild: @@ -392,7 +456,7 @@ def forkHelper(cmd, fd, inputHandler, closeToChild): else: line = line.rstrip() log.debug('%s', line) - inputHandler(line, child.tochild) + inputHandler(line, child, restorehandler) except IOError, exn: raise XendError('Error reading from child process for %s: %s' % -- 1.8.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |