Another idea ( Just RFC):
Use SCSI reserve method to implement the lock.
The advantage is as follows:
1) It is a mandatory lock, not advisory lock.
2) It can resolve the stale lock issue.
3) It doesn't need a cluster filesystem.
A 3rd idea is using SFEX (a component in Linux-HA) to implement the
locking.
It is much like the above method but it is an advisory lock.
Attached is the patch using SFEX method, the lock mechanism is still
based on
Jim's patch.
The disadvantage of this patch is it still has issues in live
migration.
Thanks,
Jiaju
diff -Nupr xen-3.3.1-testing.orig/tools/examples/domain-lock-via-sfex
xen-3.3.1-testing/tools/examples/domain-lock-via-sfex
---
xen-3.3.1-testing.orig/tools/examples/domain-lock-via-sfex 1970-01-01
08:00:00.000000000 +0800
+++
xen-3.3.1-testing/tools/examples/domain-lock-via-sfex 2009-06-23
16:33:38.000000000 +0800
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+#error code:
+# 0: success
+# 1: generic error
+# 2: not running
+
+if [ `uname -m` = "x86_64" ]; then
+ SFEX_DAEMON=/usr/lib64/heartbeat/sfex_daemon
+else
+ SFEX_DAEMON=/usr/lib/heartbeat/sfex_daemon
+fi
+COLLISION_TIMEOUT=1
+LOCK_TIMEOUT=10
+MONITOR_INTERVAL=5
+PID_FILE=/var/run/sfex.pid
+
+usage() {
+ echo "usage: domain-lock-via-sfex [-l|-u|-s] -i <vm uuid> -x <sfex
device> index"
+ echo ""
+ echo "-l lock"
+ echo "-u unlock"
+ echo "-s status (default)"
+ echo "-i Virtual Machine Id or UUID"
+ echo "-x SFEX device which used for sfex lock"
+ echo "path A per-VM, unique location where external lock will be
managed"
+ exit 1
+}
+
+create_lock() {
+ local suffix=$1
+ local device=$2
+ local index=$3
+
+ get_status $suffix
+ if [ $? -eq 0 ]; then
+ return 0
+ fi
+
+ $SFEX_DAEMON -i $index -c $COLLISION_TIMEOUT -t $LOCK_TIMEOUT -m
$MONITOR_INTERVAL -r default -d $PID_FILE"_"$suffix $device
+
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ return 1
+ fi
+
+ sleep 10
+ get_status $suffix
+ if [ $? -eq 0 ]; then
+ return 0
+ fi
+ return 1
+}
+
+remove_lock(){
+ local suffix=$1
+
+ /sbin/killproc -p $PID_FILE"_"$suffix $SFEX_DAEMON
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+ sleep 4
+ get_status $suffix
+ rc=$?
+ if [ $rc -ne 2 ]; then
+ return $rc
+ fi
+ return 0
+
+}
+
+get_status() {
+ local suffix=$1
+
+ /sbin/checkproc -k -p $PID_FILE"_"$suffix $SFEX_DAEMON
+ rc=$?
+ return $rc
+}
+
+mode="status"
+
+while getopts ":lusi:x:" opt; do
+ case $opt in
+ l )
+ mode="lock"
+ ;;
+ u )
+ mode="unlock"
+ ;;
+ s )
+ mode="status"
+ ;;
+ i )
+ vm_uuid=$OPTARG
+ ;;
+ x )
+ vm_sfex_device=$OPTARG
+ ;;
+ \? )
+ usage
+ ;;
+ esac
+done
+
+shift $(($OPTIND - 1))
+vm_index=$1
+[ -z $vm_index ] && usage
+[ -z $vm_uuid ] && usage
+[ -z $vm_sfex_device ] && usage
+
+case $mode in
+ lock )
+ create_lock $vm_uuid $vm_sfex_device $vm_index
+ ;;
+ unlock )
+ remove_lock $vm_uuid
+ ;;
+ status )
+ get_status $vm_uuid
+ ;;
+esac
diff -Nupr xen-3.3.1-testing.orig/tools/examples/Makefile
xen-3.3.1-testing/tools/examples/Makefile
--- xen-3.3.1-testing.orig/tools/examples/Makefile 2009-06-18
01:44:28.000000000 +0800
+++ xen-3.3.1-testing/tools/examples/Makefile 2009-06-18
03:43:39.000000000 +0800
@@ -36,6 +36,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
XEN_SCRIPTS += external-device-migrate
XEN_SCRIPTS += vscsi
XEN_SCRIPTS += domain-lock
+XEN_SCRIPTS += domain-lock-via-sfex
XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh
vif-common.sh
XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh
vtpm-hotplug-common.sh
diff -Nupr xen-3.3.1-testing.orig/tools/examples/xend-config.sxp
xen-3.3.1-testing/tools/examples/xend-config.sxp
---
xen-3.3.1-testing.orig/tools/examples/xend-config.sxp 2009-06-18
01:44:28.000000000 +0800
+++ xen-3.3.1-testing/tools/examples/xend-config.sxp 2009-06-18
02:56:21.000000000 +0800
@@ -266,7 +266,7 @@
# environment that protects shared resources, but may be useful in
# some circumstances nonetheless.
#
-#(xend-domain-lock no)
+(xend-domain-lock yes)
# Path where domain lock is stored if xend-domain-lock is enabled.
# Note: This path must be accessible to all VM Servers participating
@@ -297,4 +297,9 @@
# <xend-domain-lock-path>/<vm-uuid>/lock and write <vm-name>,
<vm-id>,
# and <vm-host> (if supplied) to the lock file in that order.
#
-#(xend-domain-lock-utility domain-lock)
+(xend-domain-lock-utility domain-lock-via-sfex)
+# This is the sfex device, when you enable the domain-lock-via-sfex,
+# you MUST enable this. it should be a partition in shared disk used
+# for locking. Please note that there is no default value for this
+# option, you MUST modify the following configuration!
+(xend-domain-lock-sfex-device /dev/sdb1)
diff -Nupr xen-3.3.1-testing.orig/tools/python/xen/xend/XendConfig.py
xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py
---
xen-3.3.1-testing.orig/tools/python/xen/xend/XendConfig.py 2009-06-18
01:44:28.000000000 +0800
+++
xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py 2009-06-22
18:01:14.000000000 +0800
@@ -188,6 +188,7 @@ XENAPI_CFG_TYPES = {
'VCPUs_max': int,
'VCPUs_at_startup': int,
'VCPUs_live': int,
+ 'sfex_index': int,
'actions_after_shutdown': str,
'actions_after_reboot': str,
'actions_after_crash': str,
@@ -221,6 +222,7 @@ LEGACY_UNSUPPORTED_BY_XENAPI_CFG = [
'vcpu_avail',
'features',
# read/write
+ 'sfex_index',
'on_xend_start',
'on_xend_stop',
# read-only
@@ -241,6 +243,7 @@ LEGACY_CFG_TYPES = {
'memory': int,
'shadow_memory': int,
'maxmem': int,
+ 'sfex_index': int,
'start_time': float,
'cpu_time': float,
'features': str,
@@ -268,6 +271,7 @@ LEGACY_XENSTORE_VM_PARAMS = [
'memory',
'shadow_memory',
'maxmem',
+ 'sfex_index',
'start_time',
'name',
'on_poweroff',
@@ -576,6 +580,10 @@ class XendConfig(dict):
cfg["memory"] = int(sxp.child_value(sxp_cfg, "memory"))
if sxp.child_value(sxp_cfg, "maxmem") != None:
cfg["maxmem"] = int(sxp.child_value(sxp_cfg, "maxmem"))
+
+ # Parse sfex index
+ if sxp.child_value(sxp_cfg, "sfex_index") != None:
+ cfg["sfex_index"] = int(sxp.child_value(sxp_cfg,
"sfex_index"))
# Convert scheduling parameters to vcpus_params
if 'vcpus_params' not in cfg:
@@ -823,6 +831,9 @@ class XendConfig(dict):
self._memory_sanity_check()
+ if "sfex_index" in cfg:
+ self["sfex_index"] = int(cfg["sfex_index"])
+
def update_with(n, o):
if not self.get(n):
self[n] = cfg.get(o, '')
diff -Nupr
xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
---
xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py 2009-06-18
01:44:28.000000000 +0800
+++
xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py 2009-06-23
16:53:07.000000000 +0800
@@ -3598,23 +3598,32 @@ class XendDomainInfo:
path = xoptions.get_xend_domain_lock_path()
path = os.path.join(path, self.get_uuid())
+ status = 0
- if self.is_dom_locked(path):
- raise XendError("The VM is locked and appears to be
running on host %s." % self.get_lock_host(path))
+ if xoptions.get_xend_domain_lock_utility() !=
"domain-lock-via-sfex":
+ if self.is_dom_locked(path):
+ raise XendError("The VM is locked and appears to be
running on host %s." % self.get_lock_host(path))
+
+ try:
+ if not os.path.exists(path):
+ mkdir.parents(path, stat.S_IRWXU)
+ except:
+ log.exception("%s could not be created." % path)
+ raise XendError("%s could not be created." % path)
+
+ status = os.system('%s -l -p %s -n %s -i %s %s' % \
+
(xoptions.get_xend_domain_lock_utility(), \
+ XendNode.instance().get_name(), \
+ self.info['name_label'], \
+ self.info['uuid'], \
+ path))
+ else:
+ status = os.system('/etc/xen/scripts/%s -l -i %s -x %s %s' %
\
+
(xoptions.get_xend_domain_lock_utility(), \
+ self.info['uuid'], \
+
xoptions.get_xend_domain_lock_sfex_device(), \
+ self.info['sfex_index']))
- try:
- if not os.path.exists(path):
- mkdir.parents(path, stat.S_IRWXU)
- except:
- log.exception("%s could not be created." % path)
- raise XendError("%s could not be created." % path)
-
- status = os.system('%s -l -p %s -n %s -i %s %s' % \
- (xoptions.get_xend_domain_lock_utility(),
\
- XendNode.instance().get_name(), \
- self.info['name_label'], \
- self.info['uuid'], \
- path))
if status != 0:
raise XendError('Acquire running lock failed: %s' %
status)
@@ -3625,16 +3634,27 @@ class XendDomainInfo:
path = xoptions.get_xend_domain_lock_path()
path = os.path.join(path, self.get_uuid())
- status = os.system('%s -u %s' % \
- (xoptions.get_xend_domain_lock_utility(),
\
- path))
+ status = 0
+
+ if xoptions.get_xend_domain_lock_utility() !=
"domain-lock-via-sfex":
+ status = os.system('%s -u %s' % \
+
(xoptions.get_xend_domain_lock_utility(), \
+ path))
+ if status != 0:
+ log.exception("Release running lock failed: %s" %
status)
+ try:
+ if len(os.listdir(path)) == 0:
+ shutil.rmtree(path)
+ except:
+ log.exception("Failed to remove unmanaged directory
%s." % path)
+ else:
+ status = os.system('/etc/xen/scripts/%s -u -i %s -x %s %s' %
\
+
(xoptions.get_xend_domain_lock_utility(), \
+ self.info['uuid'], \
+ xoptions.get_xend_domain_lock_sfex_device(), \
+ self.info['sfex_index']))
if status != 0:
log.exception("Release running lock failed: %s" % status)
- try:
- if len(os.listdir(path)) == 0:
- shutil.rmtree(path)
- except:
- log.exception("Failed to remove unmanaged directory %s." %
path)
def __str__(self):
diff -Nupr xen-3.3.1-testing.orig/tools/python/xen/xend/XendOptions.py
xen-3.3.1-testing/tools/python/xen/xend/XendOptions.py
---
xen-3.3.1-testing.orig/tools/python/xen/xend/XendOptions.py 2009-06-18
01:44:28.000000000 +0800
+++
xen-3.3.1-testing/tools/python/xen/xend/XendOptions.py 2009-06-19
10:42:57.000000000 +0800
@@ -145,6 +145,9 @@ class XendOptions:
"""Default script to acquire/release domain lock"""
xend_domain_lock_utility = osdep.scripts_dir + "/domain-lock"
+ """Default sfex device used by domain lock """
+ xend_domain_lock_sfex_device = ''
+
def __init__(self):
self.configure()
@@ -382,6 +385,8 @@ class XendOptions:
def get_xend_domain_lock_utility(self):
return self.get_config_string('xend-domain-lock-utility',
self.xend_domain_lock_utility)
+ def get_xend_domain_lock_sfex_device(self):
+ return self.get_config_string('xend-domain-lock-sfex-device',
self.xend_domain_lock_sfex_device)
class XendOptionsFile(XendOptions):
diff -Nupr xen-3.3.1-testing.orig/tools/python/xen/xm/create.py
xen-3.3.1-testing/tools/python/xen/xm/create.py
---
xen-3.3.1-testing.orig/tools/python/xen/xm/create.py 2009-06-18
01:44:28.000000000 +0800
+++ xen-3.3.1-testing/tools/python/xen/xm/create.py 2009-06-22
14:23:55.000000000 +0800
@@ -189,6 +189,10 @@ gopts.var('maxmem', val='MEMORY',
fn=set_int, default=None,
use="Maximum domain memory in MB.")
+gopts.var('sfex_index', val='SFEX',
+ fn=set_int, default=None,
+ use="Sfex index.")
+
gopts.var('shadow_memory', val='MEMORY',
fn=set_int, default=0,
use="Domain shadow memory in MB.")
@@ -884,7 +888,7 @@ def make_config(vals):
if v:
config.append([n, v])
- map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
+ map(add_conf, ['name', 'memory', 'maxmem', 'sfex_index',
'shadow_memory',
'restart', 'on_poweroff',
'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail',
'features',
'on_xend_start', 'on_xend_stop', 'target', 'cpuid',
>>> On 8/5/2009 at 5:33 PM, in message
<20090805093333.GH24960@xxxxxxxxxxxxxxx>,
Pasi Kärkkäinen<pasik@xxxxxx> wrote:
> On Wed, Aug 05, 2009 at 04:39:23PM +0800, Zhigang Wang wrote:
>> Pasi ??? wrote:
>> > On Mon, Aug 11, 2008 at 10:45:23AM -0600, Jim Fehlig wrote:
>> >> Ian Jackson wrote:
>> >>> Jim Fehlig writes ("[Xen-devel] [PATCH] [RFC] Add lock on domain
start"):
>> >>>
>> >>>> This patch adds a simple lock mechanism when starting domains
by placing
>> >>>> a lock file in xend-domains-path/<dom_uuid>. The lock file is
removed
>> >>>> when domain is stopped. The motivation for such a mechanism is
to
>> >>>> prevent starting the same domain from multiple hosts.
>> >>>>
>> >>> I think this should be dealt with in your next-layer-up
management
>> >>> tools.
>> >>>
>> >> Perhaps. I wanted to see if there was any interest in having
such a
>> >> feature at the xend layer. If not, I will no longer pursue this
option.
>> >>
>> >
>> > Replying a bit late to this.. I think there is demand for this
feature!
>> >
>> > Many people (mostly in a smaller environments) don't want to use
>> > 'next-layer-up' management tools..
>> >
>> >>> Lockfiles are bad because they can become stale.
>> >>>
>> >> Yep. Originally I considered a 'lockless-lock' approach where a
bit it
>> >> set and counter is spun on a 'reserved' sector of vbd, e.g.
first
>> >> sector. Attempting to attach the vbd to another domain would
fail if
>> >> lock bit is set and counter is incrementing. If counter is not
>> >> incrementing assume lock is stale and proceed. This approach is
>> >> certainly more complex. We support various image formats (raw,
qcow,
>> >> vmdk, ...) and such an approach may mean changing the format
(e.g.
>> >> qcow3). Wouldn't work for existing images. Who is responsible
for
>> >> spinning the counter? Anyhow seemed like a lot of complexity as
>> >> compared to the suggested simple approach with override for stale
lock.
>> >>
>> >
>> > I assume you guys have this patch included in OpenSuse/SLES Xen
rpms.
>> >
>> > Is the latest version available from somewhere?
>> >
>> > -- Pasi
>> I ever seen a patch in SUSE xen rpm. maybe Jim can tell you the
latest
> status.
>>
>
>
http://serverfault.com/questions/21699/how-to-manage-xen-virtual-machines-on-shared-
> san-storage
>
> In that discussion someone says xend-lock stuff can be found from
SLES11 Xen.
>
>> In Oracle VM, we add hooks in xend and use a external locking
utility.
>>
>> currently, we use DLM (distributed lock manager) to manage the
domain
> running lock to prevent the same
>> VM starts from two servers simultaneously.
>>
>> We have add hooks to VM start/shutdown/migration for acquire/release
the
> lock.
>>
>> Note during migration, we release the lock before starting the
migration
> process
>> and a lock will be acquired in the destination side. There still a
chance
> for
>> other servers rather than the destination server to acquire the
lock. thus
> cause
>> the migration fail.
>>
>
> Hmm.. I guess that also leaves a small time window for disk
corruption? If
> the domU was started on some other host at _exact_ correct (or bad)
time
> when the lock is not held anymore by the migration source host..
>
>> hope someone can give some advice.
>>
>> here is the patch for your reference.
>>
>
> Thanks. Looks like possible method aswell.
>
> -- Pasi
>
>> thanks,
>>
>> zhigang
>
>> diff -Nurp --exclude '*.orig'
xen-3.4.0.bak/tools/examples/xend-config.sxp
> xen-3.4.0/tools/examples/xend-config.sxp
>> --- xen-3.4.0.bak/tools/examples/xend-config.sxp 2009-08-05
16:17:42.000000000 +0800
>> +++ xen-3.4.0/tools/examples/xend-config.sxp 2009-08-04
10:23:17.000000000 +0800
>> @@ -69,6 +69,12 @@
>>
>> (xend-unix-path /var/lib/xend/xend-socket)
>>
>> +# External locking utility for get/release domain running lock. By
default,
>> +# no utility is specified. Thus there will be no lock as VM
running.
>> +# The locking utility should accept:
>> +# <--lock | --unlock> --name <name> --uuid <uuid>
>> +# command line options, and returns zero on success, others on
error.
>> +#(xend-domains-lock-path '')
>>
>> # Address and port xend should use for the legacy TCP XMLRPC
interface,
>> # if xend-tcp-xmlrpc-server is set.
>> diff -Nurp --exclude '*.orig'
> xen-3.4.0.bak/tools/python/xen/xend/XendDomainInfo.py
> xen-3.4.0/tools/python/xen/xend/XendDomainInfo.py
>> ---
xen-3.4.0.bak/tools/python/xen/xend/XendDomainInfo.py 2009-08-05
> 16:17:42.000000000 +0800
>> +++
xen-3.4.0/tools/python/xen/xend/XendDomainInfo.py 2009-08-05
> 16:35:35.000000000 +0800
>> @@ -359,6 +359,8 @@ class XendDomainInfo:
>> @type state_updated: threading.Condition
>> @ivar refresh_shutdown_lock: lock for polling shutdown state
>> @type refresh_shutdown_lock: threading.Condition
>> + @ivar running_lock: lock for running VM
>> + @type running_lock: bool or None
>> @ivar _deviceControllers: device controller cache for this
domain
>> @type _deviceControllers: dict 'string' to DevControllers
>> """
>> @@ -427,6 +429,8 @@ class XendDomainInfo:
>> self.refresh_shutdown_lock = threading.Condition()
>> self._stateSet(DOM_STATE_HALTED)
>>
>> + self.running_lock = None
>> +
>> self._deviceControllers = {}
>>
>> for state in DOM_STATES_OLD:
>> @@ -453,6 +457,7 @@ class XendDomainInfo:
>>
>> if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED,
> XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED):
>> try:
>> + self.acquire_running_lock();
>> XendTask.log_progress(0, 30,
self._constructDomain)
>> XendTask.log_progress(31, 60, self._initDomain)
>>
>> @@ -485,6 +490,7 @@ class XendDomainInfo:
>> state = self._stateGet()
>> if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED):
>> try:
>> + self.acquire_running_lock();
>> self._constructDomain()
>>
>> try:
>> @@ -2617,6 +2623,11 @@ class XendDomainInfo:
>>
>> self._stateSet(DOM_STATE_HALTED)
>> self.domid = None # Do not push into _stateSet()!
>> +
>> + try:
>> + self.release_running_lock()
>> + except:
>> + log.exception("Release running lock failed: %s" %
status)
>> finally:
>> self.refresh_shutdown_lock.release()
>>
>> @@ -4073,6 +4084,28 @@ class XendDomainInfo:
>> params.get('burst', '50K'))
>> return 1
>>
>> + def acquire_running_lock(self):
>> + if not self.running_lock:
>> + lock_path = xoptions.get_xend_domains_lock_path()
>> + if lock_path:
>> + status = os.system('%s --lock --name %s --uuid %s'
% \
>> + (lock_path,
self.info['name_label'],
> self.info['uuid']))
>> + if status == 0:
>> + self.running_lock = True
>> + else:
>> + raise XendError('Acquire running lock failed:
%s' %
> status)
>> +
>> + def release_running_lock(self):
>> + if self.running_lock:
>> + lock_path = xoptions.get_xend_domains_lock_path()
>> + if lock_path:
>> + status = os.system('%s --unlock --name %s --uuid
%s' % \
>> + (lock_path,
self.info['name_label'],
> self.info['uuid']))
>> + if status == 0:
>> + self.running_lock = False
>> + else:
>> + raise XendError('Release running lock failed:
%s' %
> status)
>> +
>> def __str__(self):
>> return '<domain id=%s name=%s memory=%s state=%s>' % \
>> (str(self.domid), self.info['name_label'],
>> diff -Nurp --exclude '*.orig'
xen-3.4.0.bak/tools/python/xen/xend/XendDomain.py
> xen-3.4.0/tools/python/xen/xend/XendDomain.py
>> ---
xen-3.4.0.bak/tools/python/xen/xend/XendDomain.py 2009-08-05
16:17:09.000000000
> +0800
>> +++ xen-3.4.0/tools/python/xen/xend/XendDomain.py 2009-08-04
10:23:17.000000000
> +0800
>> @@ -1317,6 +1317,7 @@ class XendDomain:
>>
POWER_STATE_NAMES[dominfo._stateGet()])
>>
>> """ The following call may raise a XendError exception """
>> + dominfo.release_running_lock();
>> dominfo.testMigrateDevices(True, dst)
>>
>> if live:
>> diff -Nurp --exclude '*.orig'
xen-3.4.0.bak/tools/python/xen/xend/XendOptions.py
> xen-3.4.0/tools/python/xen/xend/XendOptions.py
>> ---
xen-3.4.0.bak/tools/python/xen/xend/XendOptions.py 2009-08-05
> 16:17:42.000000000 +0800
>> +++ xen-3.4.0/tools/python/xen/xend/XendOptions.py 2009-08-04
10:23:17.000000000
> +0800
>> @@ -281,6 +281,11 @@ class XendOptions:
>> """
>> return self.get_config_string("xend-domains-path",
> self.xend_domains_path_default)
>>
>> + def get_xend_domains_lock_path(self):
>> + """ Get the path of the lock utility for running domains.
>> + """
>> + return self.get_config_string("xend-domains-lock-path")
>> +
>> def get_xend_state_path(self):
>> """ Get the path for persistent domain configuration
storage
>> """
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
advisory
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|