# HG changeset patch # User David Scott # Date 1268739025 0 # Node ID 012936d9d680683cd83cdcd2cd772985de142c50 # Parent d455f3c74966447903add709174b57c8c50cc511 CA-38944: prevent the artificial-reboot-delay being accidentally applied to an API-driven reboot. When the request queueing was fixed, a deliberate race was created between the background event thread and the API VM.reboot (etc). One of these threads wins the race and then the second thread becomes a no-op. Unfortunately the background event thread will apply the artificial-reboot-delay unless it is somehow prevented. We prevent it by storing the next reboot delay in xenstore (rather than the database) and setting it to 0 in the API call path. Signed-off-by: David Scott diff -r d455f3c74966 -r 012936d9d680 ocaml/xapi/events.ml --- a/ocaml/xapi/events.ml Fri Mar 12 15:19:41 2010 +0000 +++ b/ocaml/xapi/events.ml Tue Mar 16 11:30:25 2010 +0000 @@ -103,34 +103,16 @@ with _ -> 0. (* ages ago *) in Unix.gettimeofday () -. start_time + let artificial_reboot_key domid = Hotplug.get_private_path domid ^ "/" ^ Xapi_globs.artificial_reboot_delay + (* When a VM is rebooted too quickly (from within) we insert a delay; on each _contiguous_ quick reboot, this delay doubles *) - let insert_reboot_delay ~__context ~vm = + let calculate_reboot_delay ~__context ~vm domid = let delay_cap = 60 in (* 1 minute cap on reboot delays *) - let other_config = Db.VM.get_other_config ~__context ~self:vm in - let delay = - try - let delay = int_of_string (List.assoc Xapi_globs.last_artificial_reboot_delay_key other_config) in - let next_delay = min (delay*2) delay_cap in (* next delay doubles, capped at delay_cap *) - Db.VM.remove_from_other_config ~__context ~self:vm ~key:Xapi_globs.last_artificial_reboot_delay_key; - Db.VM.add_to_other_config ~__context ~self:vm ~key:Xapi_globs.last_artificial_reboot_delay_key ~value:(string_of_int next_delay); - delay - with _ -> (* no delay on first quick reboot, 2s next time *) - (* in line below, we try to remove key from other-config anyway, just in-case this exn was generated because there _was_ - a last reboot delay key in there, but it had invalid data contained it, causing the int_of_string to fail *) - (try Db.VM.remove_from_other_config ~__context ~self:vm ~key:Xapi_globs.last_artificial_reboot_delay_key with _ -> ()); - Db.VM.add_to_other_config ~__context ~self:vm ~key:Xapi_globs.last_artificial_reboot_delay_key ~value:(string_of_int 2); - 0 in - if Xapi_fist.disable_reboot_delay () - then debug "FIST: disable_reboot_delay" - else begin - debug "Adding artificial delay on reboot for VM: %s. delay time=%d seconds" (Ref.string_of vm) delay; - Thread.delay (float_of_int delay) - end + let delay = try int_of_string (with_xs (fun xs -> xs.Xs.read (artificial_reboot_key domid))) with _ -> 0 in + let next_delay = min (delay * 2 + 2) delay_cap in + delay, next_delay - let clear_reboot_delay ~__context ~vm = - try Db.VM.remove_from_other_config ~__context ~self:vm ~key:Xapi_globs.last_artificial_reboot_delay_key with _ -> () - let perform_destroy ~__context ~vm token = TaskHelper.set_description ~__context "destroy"; Xapi_vm.Shutdown.in_dom0_already_locked { Xapi_vm.TwoPhase.__context = __context; vm=vm; api_call_name="destroy"; clean=false }; @@ -142,13 +124,24 @@ let perform_restart ~__context ~vm token = TaskHelper.set_description ~__context "restart"; - if time_vm_ran_for ~__context ~vm < Xapi_globs.minimum_time_between_reboot_with_no_added_delay - then insert_reboot_delay ~__context ~vm - else clear_reboot_delay ~__context ~vm; - + let domid = Helpers.domid_of_vm ~__context ~self:vm in + let delay, next_delay = + if Xapi_fist.disable_reboot_delay () then begin + debug "FIST: disable_reboot_delay"; + 0, 0 + end else if time_vm_ran_for ~__context ~vm < Xapi_globs.minimum_time_between_reboot_with_no_added_delay then begin + calculate_reboot_delay ~__context ~vm domid + end else 0, 0 in + if delay <> 0 then begin + debug "Adding artificial delay on reboot for VM: %s. delay time=%d seconds" (Ref.string_of vm) delay; + Thread.delay (float_of_int delay); + end; try - Xapi_vm.Reboot.in_dom0_already_locked { Xapi_vm.TwoPhase.__context = __context; vm=vm; api_call_name="reboot"; clean=false }; - update_allowed_ops_using_api ~__context vm + Xapi_vm.Reboot.in_dom0_already_locked { Xapi_vm.TwoPhase.__context = __context; vm=vm; api_call_name="reboot"; clean=false }; + let domid' = Helpers.domid_of_vm ~__context ~self:vm in + assert (domid <> domid'); + (with_xs (fun xs -> xs.Xs.write (artificial_reboot_key domid') (string_of_int next_delay))); + update_allowed_ops_using_api ~__context vm with e -> (* NB this can happen if the user has change the VM configuration to onw which cannot boot (eg not enough memory) and then rebooted inside the guest *) diff -r d455f3c74966 -r 012936d9d680 ocaml/xapi/xapi_globs.ml --- a/ocaml/xapi/xapi_globs.ml Fri Mar 12 15:19:41 2010 +0000 +++ b/ocaml/xapi/xapi_globs.ml Tue Mar 16 11:30:25 2010 +0000 @@ -375,8 +375,8 @@ (* Default backlog supplied to Unix.listen *) let listen_backlog = 128 -(* Key on VM.other_config that records last artificial reboot delay *) -let last_artificial_reboot_delay_key = "last_artificial_reboot_delay" +(* Where the next artificial delay is stored in xenstore *) +let artificial_reboot_delay = "artificial-reboot-delay" (* Xapi script hooks root *) let xapi_hooks_root = "/etc/xapi.d/" diff -r d455f3c74966 -r 012936d9d680 ocaml/xapi/xapi_vm.ml --- a/ocaml/xapi/xapi_vm.ml Fri Mar 12 15:19:41 2010 +0000 +++ b/ocaml/xapi/xapi_vm.ml Tue Mar 16 11:30:25 2010 +0000 @@ -366,6 +366,8 @@ error "VM: %s halted when asked to reboot" (Ref.string_of vm) end else begin debug "%s phase 0/3: no shutdown request required since this is a hard_reboot" api_call_name; + (* Make sure no-one inserts an artificial delay at this point *) + (with_xs (fun xs -> xs.Xs.write (Hotplug.get_private_path domid ^ "/" ^ Xapi_globs.artificial_reboot_delay) "0")); (* The domain might be killed by the event thread. Again, this is ok. *) Helpers.log_exn_continue (Printf.sprintf "Xc.domain_shutdown domid=%d Xc.Reboot" domid) (fun () ->