|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 1/1] x86/HVM: Use fixed TSC value when saving or restoring domain
When a domain is saved each VCPU's TSC value needs to be preserved. To get it we
use hvm_get_guest_tsc(). This routine (either itself or via get_s_time() which
it may call) calculates VCPU's TSC based on current host's TSC value (by doing a
rdtscll()). Since this is performed for each VCPU separately we end up with
un-synchronized TSCs.
Similarly, during a restore each VCPU is assigned its TSC based on host's
current
tick, causing virtual TSCs to diverge further.
With this, we can easily get into situation where a guest may see time going
backwards.
Instead of reading new TSC value for each VCPU when saving/restoring it we
should
use the same value across all VCPUs.
(As part of the patch, update rdtscll's definition so that its variables don't
clash with outer code)
Reported-by: Philippe Coquard <philippe.coquard@xxxxxxxx>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
---
xen/arch/x86/hvm/hvm.c | 22 +++++++++++++++-------
xen/arch/x86/hvm/save.c | 16 ++++++++++++++++
xen/arch/x86/hvm/svm/svm.c | 13 ++++++++-----
xen/arch/x86/hvm/vmx/vmx.c | 6 +++---
xen/arch/x86/hvm/vmx/vvmx.c | 4 ++--
xen/arch/x86/hvm/vpt.c | 16 ++++++++++------
xen/arch/x86/time.c | 12 ++++++++++--
xen/common/hvm/save.c | 5 +++++
xen/include/asm-x86/domain.h | 2 ++
xen/include/asm-x86/hvm/hvm.h | 11 +++++++----
xen/include/asm-x86/msr.h | 6 +++---
xen/include/xen/hvm/save.h | 2 ++
xen/include/xen/time.h | 1 +
13 files changed, 84 insertions(+), 32 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 5e89cf5..3711377 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -255,16 +255,20 @@ int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat)
return 1;
}
-void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
+void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc)
{
uint64_t tsc;
uint64_t delta_tsc;
if ( v->domain->arch.vtsc )
{
- tsc = hvm_get_guest_time(v);
+ tsc = hvm_get_guest_time_fixed(v, at_tsc);
tsc = gtime_to_gtsc(v->domain, tsc);
}
+ else if ( at_tsc )
+ {
+ tsc = at_tsc;
+ }
else
{
rdtscll(tsc);
@@ -275,27 +279,31 @@ void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
- v->arch.hvm_vcpu.cache_tsc_offset;
v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, at_tsc);
}
void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust)
{
v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust
- v->arch.hvm_vcpu.msr_tsc_adjust;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust;
}
-u64 hvm_get_guest_tsc(struct vcpu *v)
+u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc)
{
uint64_t tsc;
if ( v->domain->arch.vtsc )
{
- tsc = hvm_get_guest_time(v);
+ tsc = hvm_get_guest_time_fixed(v, at_tsc);
tsc = gtime_to_gtsc(v->domain, tsc);
v->domain->arch.vtsc_kerncount++;
}
+ else if ( at_tsc )
+ {
+ tsc = at_tsc;
+ }
else
{
rdtscll(tsc);
@@ -3848,7 +3856,7 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs,
uint16_t ip)
/* Sync AP's TSC with BSP's. */
v->arch.hvm_vcpu.cache_tsc_offset =
v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
v->arch.hvm_vcpu.msr_tsc_adjust = 0;
diff --git a/xen/arch/x86/hvm/save.c b/xen/arch/x86/hvm/save.c
index 066fdb2..309a1fd 100644
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -34,6 +34,14 @@ void arch_hvm_save(struct domain *d, struct hvm_save_header
*hdr)
/* Save guest's preferred TSC. */
hdr->gtsc_khz = d->arch.tsc_khz;
+
+ /* Time when saving started */
+ rdtscll(d->arch.chkpt_tsc);
+}
+
+void arch_hvm_save_done(struct domain *d)
+{
+ d->arch.chkpt_tsc = 0;
}
int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -67,12 +75,20 @@ int arch_hvm_load(struct domain *d, struct hvm_save_header
*hdr)
if ( d->arch.vtsc )
hvm_set_rdtsc_exiting(d, 1);
+ /* Time when restore started */
+ rdtscll(d->arch.chkpt_tsc);
+
/* VGA state is not saved/restored, so we nobble the cache. */
d->arch.hvm_domain.stdvga.cache = 0;
return 0;
}
+void arch_hvm_load_done(struct domain *d)
+{
+ d->arch.chkpt_tsc = 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4fd5376..1ec11ba 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -318,7 +318,7 @@ static void svm_save_cpu_state(struct vcpu *v, struct
hvm_hw_cpu *data)
data->msr_efer = v->arch.hvm_vcpu.guest_efer;
data->msr_flags = -1ULL;
- data->tsc = hvm_get_guest_tsc(v);
+ data->tsc = hvm_get_guest_tsc_fixed(v, v->domain->arch.chkpt_tsc);
}
@@ -334,7 +334,7 @@ static void svm_load_cpu_state(struct vcpu *v, struct
hvm_hw_cpu *data)
v->arch.hvm_vcpu.guest_efer = data->msr_efer;
svm_update_guest_efer(v);
- hvm_set_guest_tsc(v, data->tsc);
+ hvm_set_guest_tsc_fixed(v, data->tsc, v->domain->arch.chkpt_tsc);
}
static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
@@ -680,7 +680,7 @@ static uint64_t svm_get_tsc_offset(uint64_t host_tsc,
uint64_t guest_tsc,
return guest_tsc - offset;
}
-static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
+static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
struct vmcb_struct *n1vmcb, *n2vmcb;
@@ -688,11 +688,14 @@ static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
struct domain *d = v->domain;
uint64_t host_tsc, guest_tsc;
- guest_tsc = hvm_get_guest_tsc(v);
+ guest_tsc = hvm_get_guest_tsc_fixed(v, at_tsc);
/* Re-adjust the offset value when TSC_RATIO is available */
if ( cpu_has_tsc_ratio && d->arch.vtsc ) {
- rdtscll(host_tsc);
+ if ( at_tsc )
+ host_tsc = at_tsc;
+ else
+ rdtscll(host_tsc);
offset = svm_get_tsc_offset(host_tsc, guest_tsc, vcpu_tsc_ratio(v));
}
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 77ce167..c6e7ba4 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -540,7 +540,7 @@ static void vmx_save_cpu_state(struct vcpu *v, struct
hvm_hw_cpu *data)
data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR];
data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
- data->tsc = hvm_get_guest_tsc(v);
+ data->tsc = hvm_get_guest_tsc_fixed(v, v->domain->arch.chkpt_tsc);
}
static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
@@ -556,7 +556,7 @@ static void vmx_load_cpu_state(struct vcpu *v, struct
hvm_hw_cpu *data)
v->arch.hvm_vmx.cstar = data->msr_cstar;
v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
- hvm_set_guest_tsc(v, data->tsc);
+ hvm_set_guest_tsc_fixed(v, data->tsc, v->domain->arch.chkpt_tsc);
}
@@ -1052,7 +1052,7 @@ static void vmx_handle_cd(struct vcpu *v, unsigned long
value)
}
}
-static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
+static void vmx_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
{
vmx_vmcs_enter(v);
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 40167d6..e263376 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1058,7 +1058,7 @@ static void load_shadow_guest_state(struct vcpu *v)
if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL, __get_vvmcs(vvmcs,
GUEST_PERF_GLOBAL_CTRL));
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmentry_fields), vmentry_fields);
@@ -1259,7 +1259,7 @@ static void load_vvmcs_host_state(struct vcpu *v)
if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL, __get_vvmcs(vvmcs,
HOST_PERF_GLOBAL_CTRL));
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
__set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
}
diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index f7af688..38541cf 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -36,7 +36,7 @@ void hvm_init_guest_time(struct domain *d)
pl->last_guest_time = 0;
}
-u64 hvm_get_guest_time(struct vcpu *v)
+u64 hvm_get_guest_time_fixed(struct vcpu *v, u64 at_tsc)
{
struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
u64 now;
@@ -45,11 +45,15 @@ u64 hvm_get_guest_time(struct vcpu *v)
ASSERT(is_hvm_vcpu(v));
spin_lock(&pl->pl_time_lock);
- now = get_s_time() + pl->stime_offset;
- if ( (int64_t)(now - pl->last_guest_time) > 0 )
- pl->last_guest_time = now;
- else
- now = ++pl->last_guest_time;
+ now = get_s_time_fixed(at_tsc) + pl->stime_offset;
+
+ if ( !at_tsc )
+ {
+ if ( (int64_t)(now - pl->last_guest_time) > 0 )
+ pl->last_guest_time = now;
+ else
+ now = ++pl->last_guest_time;
+ }
spin_unlock(&pl->pl_time_lock);
return now + v->arch.hvm_vcpu.stime_offset;
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index f904af2..b0bd388 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -715,19 +715,27 @@ static unsigned long get_cmos_time(void)
* System Time
***************************************************************************/
-s_time_t get_s_time(void)
+s_time_t get_s_time_fixed(u64 at_tsc)
{
struct cpu_time *t = &this_cpu(cpu_time);
u64 tsc, delta;
s_time_t now;
- rdtscll(tsc);
+ if ( at_tsc )
+ tsc = at_tsc;
+ else
+ rdtscll(tsc);
delta = tsc - t->local_tsc_stamp;
now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
return now;
}
+s_time_t get_s_time()
+{
+ return get_s_time_fixed(0);
+}
+
uint64_t tsc_ticks2ns(uint64_t ticks)
{
struct cpu_time *t = &this_cpu(cpu_time);
diff --git a/xen/common/hvm/save.c b/xen/common/hvm/save.c
index 6c16399..7db68af 100644
--- a/xen/common/hvm/save.c
+++ b/xen/common/hvm/save.c
@@ -186,6 +186,8 @@ int hvm_save(struct domain *d, hvm_domain_context_t *h)
}
}
+ arch_hvm_save_done(d);
+
/* Save an end-of-file marker */
if ( hvm_save_entry(END, 0, h, &end) != 0 )
{
@@ -236,7 +238,10 @@ int hvm_load(struct domain *d, hvm_domain_context_t *h)
/* Read the typecode of the next entry and check for the end-marker */
desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
if ( desc->typecode == 0 )
+ {
+ arch_hvm_load_done(d);
return 0;
+ }
/* Find the handler for this entry */
if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 4ff89f0..7274fc1 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -308,6 +308,8 @@ struct arch_domain
(possibly other cases in the future */
uint64_t vtsc_kerncount; /* for hvm, counts all vtsc */
uint64_t vtsc_usercount; /* not used for hvm */
+ uint64_t chkpt_tsc; /* TSC value that VCPUs use to calculate their
+ tsc_offset value. Used during save/restore */
/* Pseudophysical e820 map (XENMEM_memory_map). */
spinlock_t e820_lock;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index dcc3483..31043b2 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -137,7 +137,7 @@ struct hvm_function_table {
int (*get_guest_pat)(struct vcpu *v, u64 *);
int (*set_guest_pat)(struct vcpu *v, u64);
- void (*set_tsc_offset)(struct vcpu *v, u64 offset);
+ void (*set_tsc_offset)(struct vcpu *v, u64 offset, u64 at_tsc);
void (*inject_trap)(struct hvm_trap *trap);
@@ -232,12 +232,15 @@ bool_t hvm_send_assist_req(struct vcpu *v);
void hvm_get_guest_pat(struct vcpu *v, u64 *guest_pat);
int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat);
-void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc);
-u64 hvm_get_guest_tsc(struct vcpu *v);
+void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc);
+#define hvm_set_guest_tsc(v, t) hvm_set_guest_tsc_fixed(v, t, 0)
+u64 hvm_get_guest_tsc_fixed(struct vcpu *v, u64 at_tsc);
+#define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
void hvm_init_guest_time(struct domain *d);
void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
-u64 hvm_get_guest_time(struct vcpu *v);
+u64 hvm_get_guest_time_fixed(struct vcpu *v, u64 at_tsc);
+#define hvm_get_guest_time(v) hvm_get_guest_time_fixed(v, 0)
int vmsi_deliver(
struct domain *d, int vector,
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 61f579a..52cae4b 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -78,9 +78,9 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val)
__asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
#define rdtscll(val) do { \
- unsigned int a,d; \
- asm volatile("rdtsc" : "=a" (a), "=d" (d)); \
- (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
+ unsigned int _eax, _edx; \
+ asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \
+ (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \
} while(0)
#define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
diff --git a/xen/include/xen/hvm/save.h b/xen/include/xen/hvm/save.h
index ae6f0bb..70522a9 100644
--- a/xen/include/xen/hvm/save.h
+++ b/xen/include/xen/hvm/save.h
@@ -133,6 +133,8 @@ int hvm_load(struct domain *d, hvm_domain_context_t *h);
/* Arch-specific definitions. */
struct hvm_save_header;
void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr);
+void arch_hvm_save_done(struct domain *d);
int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr);
+void arch_hvm_load_done(struct domain *d);
#endif /* __XEN_HVM_SAVE_H__ */
diff --git a/xen/include/xen/time.h b/xen/include/xen/time.h
index 2703454..709501f 100644
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -32,6 +32,7 @@ struct vcpu;
typedef s64 s_time_t;
#define PRI_stime PRId64
+s_time_t get_s_time_fixed(u64 at_tick);
s_time_t get_s_time(void);
unsigned long get_localtime(struct domain *d);
uint64_t get_localtime_us(struct domain *d);
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |