[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 for 4.5] x86/viridian: Freeze time reference counter when domain is paused



In XenServer system test it has become apparent that versions of Windows
that make use of the time reference counter enlightenment cannot cope with
large jumps forward in the value read from the MSR. Specifically,
suspending a very large domain took approx. 45 minutes to complete and
when the domain was resumed it was discovered that the WMI (Windows
Management Instrumentation) service had hung.

The reason a large jump forward is seen by the guest is that, when a guest
is suspended, the guest stops running when the SCHEDOP_suspend hypercall is
made, however the MSR value essentially keeps incrementing until the
tool-stack issues DOMCTL_gethvmcontext.

This patch adds code to freeze the value of the time reference counter
on domain pause and 'thaw' it on domain unpause, but only thaw it if the
domain is not shutting down. The absolute value of the counter is then
saved in the viridian domain context record. This prevents the guest OS
from experiencing large jumps in the value of the MSR and has been shown
to reliably fix the problem with WMI.

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v3:
 - Moved viridian calls into new arch_domain functions as suggested by Jan

v2:
 - Added extra check-in comment to explain timing discrepency
 - Fixed viridian_feature_mask() macro
 - Pass vcpu sleep function to do_domain_pause() rather than a bool

 xen/arch/arm/domain.c                  |   12 +++++++
 xen/arch/x86/domain.c                  |   19 ++++++++++
 xen/arch/x86/hvm/viridian.c            |   62 +++++++++++++++++++++++++-------
 xen/common/domain.c                    |   26 ++++++++------
 xen/include/asm-x86/hvm/hvm.h          |    9 ++++-
 xen/include/asm-x86/hvm/viridian.h     |   27 ++++++++++++++
 xen/include/public/arch-x86/hvm/save.h |    1 +
 xen/include/xen/domain.h               |    4 +++
 8 files changed, 136 insertions(+), 24 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 2b53931..5043837 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -580,6 +580,18 @@ void arch_domain_destroy(struct domain *d)
     free_xenheap_page(d->shared_info);
 }
 
+void arch_domain_shutdown(struct domain *d)
+{
+}
+
+void arch_domain_pause(struct domain *d)
+{
+}
+
+void arch_domain_unpause(struct domain *d)
+{
+}
+
 static int is_guest_pv32_psr(uint32_t psr)
 {
     switch (psr & PSR_MODE_MASK)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 558d8d5..ae0a344 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -51,6 +51,7 @@
 #include <asm/fixmap.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
+#include <asm/hvm/viridian.h>
 #include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/traps.h>
@@ -666,6 +667,24 @@ void arch_domain_destroy(struct domain *d)
     psr_free_rmid(d);
 }
 
+void arch_domain_shutdown(struct domain *d)
+{
+    if ( has_viridian_time_ref_count(d) )
+        viridian_time_ref_count_freeze(d);
+}
+
+void arch_domain_pause(struct domain *d)
+{
+    if ( has_viridian_time_ref_count(d) )
+        viridian_time_ref_count_freeze(d);
+}
+
+void arch_domain_unpause(struct domain *d)
+{
+    if ( has_viridian_time_ref_count(d) )
+        viridian_time_ref_count_thaw(d);
+}
+
 unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long guest_cr4)
 {
     unsigned long hv_cr4_mask, hv_cr4 = real_cr4_to_pv_guest_cr4(read_cr4());
diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
index 6726168..7d55363 100644
--- a/xen/arch/x86/hvm/viridian.c
+++ b/xen/arch/x86/hvm/viridian.c
@@ -289,6 +289,39 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
     return 1;
 }
 
+static int64_t raw_trc_val(struct domain *d)
+{
+    uint64_t tsc;
+    struct time_scale tsc_to_ns;
+
+    tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
+
+    /* convert tsc to count of 100ns periods */
+    set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
+    return scale_delta(tsc, &tsc_to_ns) / 100ul;
+}
+
+void viridian_time_ref_count_freeze(struct domain *d)
+{
+    struct viridian_time_ref_count *trc;
+
+    trc = &d->arch.hvm_domain.viridian.time_ref_count;
+
+    if ( test_and_clear_bit(_TRC_running, &trc->flags) )
+        trc->val = raw_trc_val(d) + trc->off;
+}
+
+void viridian_time_ref_count_thaw(struct domain *d)
+{
+    struct viridian_time_ref_count *trc;
+
+    trc = &d->arch.hvm_domain.viridian.time_ref_count;
+
+    if ( !d->is_shutting_down &&
+         !test_and_set_bit(_TRC_running, &trc->flags) )
+        trc->off = (int64_t)trc->val - raw_trc_val(d);
+}
+
 int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
 {
     struct vcpu *v = current;
@@ -348,18 +381,19 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
 
     case VIRIDIAN_MSR_TIME_REF_COUNT:
     {
-        uint64_t tsc;
-        struct time_scale tsc_to_ns;
+        struct viridian_time_ref_count *trc;
+
+        trc = &d->arch.hvm_domain.viridian.time_ref_count;
 
         if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
             return 0;
 
-        perfc_incr(mshv_rdmsr_time_ref_count);
-        tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
+        if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
+            printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT: 
accessed\n",
+                   d->domain_id);
 
-        /* convert tsc to count of 100ns periods */
-        set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
-        *val = scale_delta(tsc, &tsc_to_ns) / 100ul;
+        perfc_incr(mshv_rdmsr_time_ref_count);
+        *val = raw_trc_val(d) + trc->off;
         break;
     }
 
@@ -450,9 +484,10 @@ static int viridian_save_domain_ctxt(struct domain *d, 
hvm_domain_context_t *h)
     if ( !is_viridian_domain(d) )
         return 0;
 
-    ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
-    ctxt.guest_os_id   = d->arch.hvm_domain.viridian.guest_os_id.raw;
-
+    ctxt.time_ref_count = d->arch.hvm_domain.viridian.time_ref_count.val;
+    ctxt.hypercall_gpa  = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
+    ctxt.guest_os_id    = d->arch.hvm_domain.viridian.guest_os_id.raw;
+  
     return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
 }
 
@@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct domain *d, 
hvm_domain_context_t *h)
 {
     struct hvm_viridian_domain_context ctxt;
 
-    if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
+    if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
         return -EINVAL;
 
-    d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
-    d->arch.hvm_domain.viridian.guest_os_id.raw   = ctxt.guest_os_id;
+    d->arch.hvm_domain.viridian.time_ref_count.val = ctxt.time_ref_count;
+    d->arch.hvm_domain.viridian.hypercall_gpa.raw  = ctxt.hypercall_gpa;
+    d->arch.hvm_domain.viridian.guest_os_id.raw    = ctxt.guest_os_id;
 
     return 0;
 }
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 190998c..7c8e13a 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -706,6 +706,8 @@ void domain_shutdown(struct domain *d, u8 reason)
         v->paused_for_shutdown = 1;
     }
 
+    arch_domain_shutdown(d);
+
     __domain_finalise_shutdown(d);
 
     spin_unlock(&d->shutdown_lock);
@@ -925,32 +927,36 @@ int vcpu_unpause_by_systemcontroller(struct vcpu *v)
     return 0;
 }
 
-void domain_pause(struct domain *d)
+void do_domain_pause(struct domain *d,
+                     void (*sleep_fn)(struct vcpu *v))
 {
     struct vcpu *v;
 
-    ASSERT(d != current->domain);
-
     atomic_inc(&d->pause_count);
 
     for_each_vcpu( d, v )
-        vcpu_sleep_sync(v);
+        sleep_fn(v);
+
+    arch_domain_pause(d);
 }
 
-void domain_pause_nosync(struct domain *d)
+void domain_pause(struct domain *d)
 {
-    struct vcpu *v;
-
-    atomic_inc(&d->pause_count);
+    ASSERT(d != current->domain);
+    do_domain_pause(d, vcpu_sleep_sync);
+}
 
-    for_each_vcpu( d, v )
-        vcpu_sleep_nosync(v);
+void domain_pause_nosync(struct domain *d)
+{
+    do_domain_pause(d, vcpu_sleep_nosync);
 }
 
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
 
+    arch_domain_unpause(d);
+
     if ( atomic_dec_and_test(&d->pause_count) )
         for_each_vcpu( d, v )
             vcpu_wake(v);
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0d94c48..e3d2d9a 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -340,12 +340,19 @@ static inline unsigned long hvm_get_shadow_gs_base(struct 
vcpu *v)
     return hvm_funcs.get_shadow_gs_base(v);
 }
 
+
+#define has_hvm_params(d) \
+    ((d)->arch.hvm_domain.params != NULL)
+
 #define viridian_feature_mask(d) \
-    ((d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
+    (has_hvm_params(d) ? (d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN] : 0)
 
 #define is_viridian_domain(d) \
     (is_hvm_domain(d) && (viridian_feature_mask(d) & HVMPV_base_freq))
 
+#define has_viridian_time_ref_count(d) \
+    (is_viridian_domain(d) && (viridian_feature_mask(d) & 
HVMPV_time_ref_count))
+
 void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx,
                                uint32_t *eax, uint32_t *ebx,
                                uint32_t *ecx, uint32_t *edx);
diff --git a/xen/include/asm-x86/hvm/viridian.h 
b/xen/include/asm-x86/hvm/viridian.h
index 496da33..4cab2e8 100644
--- a/xen/include/asm-x86/hvm/viridian.h
+++ b/xen/include/asm-x86/hvm/viridian.h
@@ -48,10 +48,24 @@ union viridian_hypercall_gpa
     } fields;
 };
 
+struct viridian_time_ref_count
+{
+    unsigned long flags;
+
+#define _TRC_accessed 0
+#define TRC_accessed (1 << _TRC_accessed)
+#define _TRC_running 1
+#define TRC_running (1 << _TRC_running)
+
+    uint64_t val;
+    int64_t off;
+};
+
 struct viridian_domain
 {
     union viridian_guest_os_id guest_os_id;
     union viridian_hypercall_gpa hypercall_gpa;
+    struct viridian_time_ref_count time_ref_count;
 };
 
 int
@@ -75,4 +89,17 @@ rdmsr_viridian_regs(
 int
 viridian_hypercall(struct cpu_user_regs *regs);
 
+void viridian_time_ref_count_freeze(struct domain *d);
+void viridian_time_ref_count_thaw(struct domain *d);
+
 #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/public/arch-x86/hvm/save.h 
b/xen/include/public/arch-x86/hvm/save.h
index 16d85a3..88aab7e 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
 struct hvm_viridian_domain_context {
     uint64_t hypercall_gpa;
     uint64_t guest_os_id;
+    uint64_t time_ref_count;
 };
 
 DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context);
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index c5664c2..9215b0e 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -59,6 +59,10 @@ int arch_domain_create(struct domain *d, unsigned int 
domcr_flags);
 
 void arch_domain_destroy(struct domain *d);
 
+void arch_domain_shutdown(struct domain *d);
+void arch_domain_pause(struct domain *d);
+void arch_domain_unpause(struct domain *d);
+
 int arch_set_info_guest(struct vcpu *, vcpu_guest_context_u);
 void arch_get_info_guest(struct vcpu *, vcpu_guest_context_u);
 
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.