In the case of delay_for_missed_ticks mode (timer_mode=0), the guest virtual time goes backwards when the vcpu is rescheduled. Therefore guest's HW timer might go backwards, too. Case 1. SMP guest: 1) vcpu#1 is de-scheduled and the guest time freezes. (TIME 0:0.010) 2) vcpu#2 access a timer (0:0.020) 3) vcpu#1 is re-scheduled and the guest time thaws. (0:0.030->0:0.010) 4) vcpu#2 access a timer (0:0.015) // Backwards!!! Case 2. asynchronous callback: 1) vcpu#1 is de-scheduled and the guest time freezes. (0:0.010) 2) pmt_timer_callback() is invoked (0:0.025) 3) vcpu#1 is re-scheduled and the guest time thaws. (0:0.030->0:0.010) 4) vcpu#1 access the PM-TIMER (0:0.015) // Backwards!!! This patch affects only delay_for_missed_ticks mode (timer_mode=0) and ensures the monotonicity of the following timers: - PIT - HPET - ACPI PM-TIMER The following timers are OK since a vcpu never access the other vcpu's timer. - Local APIC ( has some callbacks but it's called from pt_intr_post ) - TSC Just in case, these timers can use the new function hvm_get_base_time() too, but doesn't. It's a little bit less efficient than hvm_get_guest_time(). Also, tidy up virtual platform timer code. Signed-off-by: Kouya Shimura --- xen/arch/x86/hvm/hpet.c | 2 +- xen/arch/x86/hvm/i8254.c | 2 +- xen/arch/x86/hvm/pmtimer.c | 2 +- xen/arch/x86/hvm/vpt.c | 93 ++++++++++++++++++++++++++++++++++------- xen/include/asm-x86/hvm/hvm.h | 2 +- 5 files changed, 82 insertions(+), 19 deletions(-) diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c index 4b4b905..fa44d37 100644 --- a/xen/arch/x86/hvm/hpet.c +++ b/xen/arch/x86/hvm/hpet.c @@ -39,7 +39,7 @@ /* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */ #define STIME_PER_HPET_TICK 16 #define guest_time_hpet(hpet) \ - (hvm_get_guest_time(vhpet_vcpu(hpet)) / STIME_PER_HPET_TICK) + (hvm_get_base_time(vhpet_vcpu(hpet)) / STIME_PER_HPET_TICK) #define HPET_TN_INT_ROUTE_CAP_SHIFT 32 #define HPET_TN_CFG_BITS_READONLY_OR_RESERVED (HPET_TN_RESERVED | \ diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c index c0d6bc2..c45ed88 100644 --- a/xen/arch/x86/hvm/i8254.c +++ b/xen/arch/x86/hvm/i8254.c @@ -54,7 +54,7 @@ static int handle_speaker_io( int dir, uint32_t port, uint32_t bytes, uint32_t *val); #define get_guest_time(v) \ - (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time()) + (is_hvm_vcpu(v) ? hvm_get_base_time(v) : (u64)get_s_time()) static int pit_get_count(PITState *pit, int channel) { diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c index 01ae31d..5c25cfb 100644 --- a/xen/arch/x86/hvm/pmtimer.c +++ b/xen/arch/x86/hvm/pmtimer.c @@ -93,7 +93,7 @@ static void pmt_update_time(PMTState *s) ASSERT(spin_is_locked(&s->lock)); /* Update the timer */ - curr_gtime = hvm_get_guest_time(s->vcpu); + curr_gtime = hvm_get_base_time(s->vcpu); tmp = ((curr_gtime - s->last_gtime) * s->scale) + s->not_accounted; s->not_accounted = (uint32_t)tmp; tmr_val += tmp >> 32; diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c index 46d3ec6..7a3edf3 100644 --- a/xen/arch/x86/hvm/vpt.c +++ b/xen/arch/x86/hvm/vpt.c @@ -36,6 +36,19 @@ void hvm_init_guest_time(struct domain *d) pl->last_guest_time = 0; } +static inline u64 pt_now(struct pl_time *pl, struct vcpu *v) +{ + u64 now = get_s_time() + pl->stime_offset; + + ASSERT(spin_is_locked(&pl->pl_time_lock)); + + if ( (int64_t)(now - pl->last_guest_time) > 0 ) + pl->last_guest_time = now; + else + now = ++pl->last_guest_time; + return now + v->arch.hvm_vcpu.stime_offset; +} + u64 hvm_get_guest_time(struct vcpu *v) { struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; @@ -45,19 +58,33 @@ u64 hvm_get_guest_time(struct vcpu *v) ASSERT(is_hvm_vcpu(v)); spin_lock(&pl->pl_time_lock); - now = get_s_time() + pl->stime_offset; - if ( (int64_t)(now - pl->last_guest_time) > 0 ) - pl->last_guest_time = now; - else - now = ++pl->last_guest_time; + now = pt_now(pl, v); spin_unlock(&pl->pl_time_lock); - - return now + v->arch.hvm_vcpu.stime_offset; + return now; } -void hvm_set_guest_time(struct vcpu *v, u64 guest_time) +/* + * This function is used to emulate HW timer counters. In the case of + * delay_for_missed_ticks mode, the guest time once goes backwards to + * the frozen time when the vcpu is rescheduled. To avoid decrement + * of a timer counter, return the frozen time while the vcpu is not + * being scheduled. + */ +u64 hvm_get_base_time(struct vcpu *v) { - v->arch.hvm_vcpu.stime_offset += guest_time - hvm_get_guest_time(v); + struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; + u64 now; + + /* Called from device models shared with PV guests. Be careful. */ + ASSERT(is_hvm_vcpu(v)); + + spin_lock(&pl->pl_time_lock); + if ( v->arch.hvm_vcpu.guest_time ) /* the guest time is frozen */ + now = v->arch.hvm_vcpu.guest_time; + else + now = pt_now(pl, v); + spin_unlock(&pl->pl_time_lock); + return now; } static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src) @@ -138,24 +165,62 @@ static void pt_process_missed_ticks(struct periodic_time *pt) pt->scheduled += missed_ticks * pt->period; } +/* + * N.B. The following three functions, pt_freeze_time(), + * pt_thaw_time() and pt_step_time() never race with each others, + * but race with either hvm_get_guest_time() or hvm_get_base_time(). + */ + static void pt_freeze_time(struct vcpu *v) { + struct pl_time *pl; + if ( !mode_is(v->domain, delay_for_missed_ticks) ) return; - v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); + pl = &v->domain->arch.hvm_domain.pl_time; + spin_lock(&pl->pl_time_lock); + v->arch.hvm_vcpu.guest_time = pt_now(pl, v); + spin_unlock(&pl->pl_time_lock); } static void pt_thaw_time(struct vcpu *v) { + struct pl_time *pl; + u64 now, frozen_time = v->arch.hvm_vcpu.guest_time; + +#if 0 /* redundant */ if ( !mode_is(v->domain, delay_for_missed_ticks) ) return; +#endif - if ( v->arch.hvm_vcpu.guest_time == 0 ) + if ( frozen_time == 0 ) return; - hvm_set_guest_time(v, v->arch.hvm_vcpu.guest_time); + ASSERT(mode_is(v->domain, delay_for_missed_ticks)); + + pl = &v->domain->arch.hvm_domain.pl_time; + spin_lock(&pl->pl_time_lock); + now = pt_now(pl, v); + v->arch.hvm_vcpu.stime_offset += frozen_time - now; v->arch.hvm_vcpu.guest_time = 0; + spin_unlock(&pl->pl_time_lock); +} + +static void pt_step_time(struct vcpu *v, u64 guest_time) +{ + struct pl_time *pl; + u64 now; + + if ( !mode_is(v->domain, delay_for_missed_ticks) ) + return; + + pl = &v->domain->arch.hvm_domain.pl_time; + spin_lock(&pl->pl_time_lock); + now = pt_now(pl, v); + if ( now < guest_time ) + v->arch.hvm_vcpu.stime_offset += guest_time - now; + spin_unlock(&pl->pl_time_lock); } void pt_save_timer(struct vcpu *v) @@ -341,9 +406,7 @@ void pt_intr_post(struct vcpu *v, struct hvm_intack intack) } } - if ( mode_is(v->domain, delay_for_missed_ticks) && - (hvm_get_guest_time(v) < pt->last_plt_gtime) ) - hvm_set_guest_time(v, pt->last_plt_gtime); + pt_step_time(v, pt->last_plt_gtime); cb = pt->cb; cb_priv = pt->priv; diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 2fa2ea5..f4cd200 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -226,8 +226,8 @@ void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc); u64 hvm_get_guest_tsc(struct vcpu *v); void hvm_init_guest_time(struct domain *d); -void hvm_set_guest_time(struct vcpu *v, u64 guest_time); u64 hvm_get_guest_time(struct vcpu *v); +u64 hvm_get_base_time(struct vcpu *v); int vmsi_deliver( struct domain *d, int vector, -- 1.7.9.5