|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
> -----Original Message-----
> From: Andrew Cooper
> Sent: 13 October 2014 11:14
> To: Paul Durrant; xen-devel@xxxxxxxxxxxxxxxxxxxx
> Cc: Keir (Xen.org); Jan Beulich
> Subject: Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter
> when domain is paused
>
> On 10/10/14 10:07, Paul Durrant wrote:
> > In XenServer system test it has become apparent that versions of Windows
> > that make use of the time reference counter enlightenment cannot cope
> with
> > large jumps forward in the value read from the MSR. Specifically,
> > suspending a very large domain took approx. 45 minutes to complete and
> > when the domain was resumed it was discovered that the WMI (Windows
> > Management Instrumentation) service had hung.
>
> It is possibly worth noting in the commit message that the OSes idea of
> the suspend time happens at the SHEDOP_suspend time, while the VMs real
> suspend time is after all the memory is complete, and the toolstack
> calls DOMCTL_gethvmcontext.
>
> For a large VM (or slow network/storage), this is a delta of 45 minutes.
>
Ok.
> >
> > This patch adds code to freeze the value of the time reference counter
> > on domain pause and 'thaw' it on domain unpause, but only if the domain is
> > not shutting down. The absolute value of the counter is then saved in the
> > viridian domain context record. This prevents the guest OS from
> experiencing
> > large jumps in the value of the MSR and has been shown to reliably fix
> > the problem with WMI.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
> > Cc: Keir Fraser <keir@xxxxxxx>
> > Cc: Jan Beulich <jbeulich@xxxxxxxx>
> > Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> > ---
> > xen/arch/x86/hvm/viridian.c | 62 +++++++++++++++++++++++++--
> -----
> > xen/common/domain.c | 41 +++++++++++++++------
> > xen/include/asm-x86/hvm/hvm.h | 9 ++++-
> > xen/include/asm-x86/hvm/viridian.h | 27 ++++++++++++++
> > xen/include/public/arch-x86/hvm/save.h | 1 +
> > 5 files changed, 116 insertions(+), 24 deletions(-)
> >
> > diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
> > index 6726168..7d55363 100644
> > --- a/xen/arch/x86/hvm/viridian.c
> > +++ b/xen/arch/x86/hvm/viridian.c
> > @@ -289,6 +289,39 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
> > return 1;
> > }
> >
> > +static int64_t raw_trc_val(struct domain *d)
> > +{
> > + uint64_t tsc;
> > + struct time_scale tsc_to_ns;
> > +
> > + tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
> > +
> > + /* convert tsc to count of 100ns periods */
> > + set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
> > + return scale_delta(tsc, &tsc_to_ns) / 100ul;
> > +}
> > +
> > +void viridian_time_ref_count_freeze(struct domain *d)
> > +{
> > + struct viridian_time_ref_count *trc;
> > +
> > + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> > +
> > + if ( test_and_clear_bit(_TRC_running, &trc->flags) )
> > + trc->val = raw_trc_val(d) + trc->off;
> > +}
> > +
> > +void viridian_time_ref_count_thaw(struct domain *d)
> > +{
> > + struct viridian_time_ref_count *trc;
> > +
> > + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> > +
> > + if ( !d->is_shutting_down &&
> > + !test_and_set_bit(_TRC_running, &trc->flags) )
> > + trc->off = (int64_t)trc->val - raw_trc_val(d);
> > +}
> > +
> > int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
> > {
> > struct vcpu *v = current;
> > @@ -348,18 +381,19 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t
> *val)
> >
> > case VIRIDIAN_MSR_TIME_REF_COUNT:
> > {
> > - uint64_t tsc;
> > - struct time_scale tsc_to_ns;
> > + struct viridian_time_ref_count *trc;
> > +
> > + trc = &d->arch.hvm_domain.viridian.time_ref_count;
> >
> > if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
> > return 0;
> >
> > - perfc_incr(mshv_rdmsr_time_ref_count);
> > - tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
> > + if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
> > + printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT:
> accessed\n",
> > + d->domain_id);
> >
> > - /* convert tsc to count of 100ns periods */
> > - set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
> > - *val = scale_delta(tsc, &tsc_to_ns) / 100ul;
> > + perfc_incr(mshv_rdmsr_time_ref_count);
> > + *val = raw_trc_val(d) + trc->off;
> > break;
> > }
> >
> > @@ -450,9 +484,10 @@ static int viridian_save_domain_ctxt(struct domain
> *d, hvm_domain_context_t *h)
> > if ( !is_viridian_domain(d) )
> > return 0;
> >
> > - ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
> > - ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
> > -
> > + ctxt.time_ref_count = d-
> >arch.hvm_domain.viridian.time_ref_count.val;
> > + ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
> > + ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
> > +
> > return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
> > }
> >
> > @@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct
> domain *d, hvm_domain_context_t *h)
> > {
> > struct hvm_viridian_domain_context ctxt;
> >
> > - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> > + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
> > return -EINVAL;
> >
> > - d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> > - d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> > + d->arch.hvm_domain.viridian.time_ref_count.val =
> ctxt.time_ref_count;
> > + d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
> > + d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
> >
> > return 0;
> > }
> > diff --git a/xen/common/domain.c b/xen/common/domain.c
> > index 190998c..af5dc82 100644
> > --- a/xen/common/domain.c
> > +++ b/xen/common/domain.c
> > @@ -43,6 +43,10 @@
> > #include <xen/trace.h>
> > #include <xen/tmem.h>
> >
> > +#ifdef CONFIG_X86
> > +#include <asm/hvm/viridian.h>
> > +#endif
> > +
> > /* Linux config option: propageted to domain0 */
> > /* xen_processor_pmbits: xen control Cx, Px, ... */
> > unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
> > @@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8
> reason)
> > v->paused_for_shutdown = 1;
> > }
> >
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_freeze(d);
> > +#endif
> > +
> > __domain_finalise_shutdown(d);
> >
> > spin_unlock(&d->shutdown_lock);
> > @@ -925,32 +934,44 @@ int vcpu_unpause_by_systemcontroller(struct
> vcpu *v)
> > return 0;
> > }
> >
> > -void domain_pause(struct domain *d)
> > +void do_domain_pause(struct domain *d, bool_t nosync)
> > {
> > struct vcpu *v;
> >
> > - ASSERT(d != current->domain);
> > -
> > atomic_inc(&d->pause_count);
> >
> > for_each_vcpu( d, v )
> > - vcpu_sleep_sync(v);
> > + if ( nosync )
> > + vcpu_sleep_nosync(v);
> > + else
> > + vcpu_sleep_sync(v);
> > +
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_freeze(d);
> > +#endif
> > }
> >
> > -void domain_pause_nosync(struct domain *d)
> > +void domain_pause(struct domain *d)
> > {
> > - struct vcpu *v;
> > -
> > - atomic_inc(&d->pause_count);
> > + ASSERT(d != current->domain);
> > + do_domain_pause(d, 0);
> > +}
> >
> > - for_each_vcpu( d, v )
> > - vcpu_sleep_nosync(v);
> > +void domain_pause_nosync(struct domain *d)
> > +{
> > + do_domain_pause(d, 1);
> > }
>
> All this fiddling with a boolean nosync parameter might be neater done
> in the same style as I did with __domain_pause_by_systemcontroller(), by
> passing a function pointer.
>
Yeah - probably would be neater. I was in two minds about that.
> >
> > void domain_unpause(struct domain *d)
> > {
> > struct vcpu *v;
> >
> > +#ifdef CONFIG_X86
> > + if ( has_viridian_time_ref_count(d) )
> > + viridian_time_ref_count_thaw(d);
> > +#endif
> > +
> > if ( atomic_dec_and_test(&d->pause_count) )
> > for_each_vcpu( d, v )
> > vcpu_wake(v);
> > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-
> x86/hvm/hvm.h
> > index 0d94c48..2dda908 100644
> > --- a/xen/include/asm-x86/hvm/hvm.h
> > +++ b/xen/include/asm-x86/hvm/hvm.h
> > @@ -340,12 +340,19 @@ static inline unsigned long
> hvm_get_shadow_gs_base(struct vcpu *v)
> > return hvm_funcs.get_shadow_gs_base(v);
> > }
> >
> > +
> > +#define has_hvm_params(d) \
> > + ((d)->arch.hvm_domain.params != NULL)
> > +
> > #define viridian_feature_mask(d) \
> > - ((d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
> > + (has_hvm_params(d) && (d)-
> >arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])
>
> The result of this expression is a boolean, not the viridian feature mask.
>
Dammit.
> Furthermore, I don't think that has_hvm_params() is useful. Any HVM
> domain will have params, and without an HVM check, looking into
> d->arch.hvm_domain is invalid.
>
Not true, which is why I made the change. domain_pause() is called on an HVM
domain before the array is allocated.
Paul
> ~Andrew
>
> >
> > #define is_viridian_domain(d) \
> > (is_hvm_domain(d) && (viridian_feature_mask(d) &
> HVMPV_base_freq))
> >
> > +#define has_viridian_time_ref_count(d) \
> > + (is_viridian_domain(d) && (viridian_feature_mask(d) &
> HVMPV_time_ref_count))
> > +
> > void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx,
> > uint32_t *eax, uint32_t *ebx,
> > uint32_t *ecx, uint32_t *edx);
> > diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm-
> x86/hvm/viridian.h
> > index 496da33..4cab2e8 100644
> > --- a/xen/include/asm-x86/hvm/viridian.h
> > +++ b/xen/include/asm-x86/hvm/viridian.h
> > @@ -48,10 +48,24 @@ union viridian_hypercall_gpa
> > } fields;
> > };
> >
> > +struct viridian_time_ref_count
> > +{
> > + unsigned long flags;
> > +
> > +#define _TRC_accessed 0
> > +#define TRC_accessed (1 << _TRC_accessed)
> > +#define _TRC_running 1
> > +#define TRC_running (1 << _TRC_running)
> > +
> > + uint64_t val;
> > + int64_t off;
> > +};
> > +
> > struct viridian_domain
> > {
> > union viridian_guest_os_id guest_os_id;
> > union viridian_hypercall_gpa hypercall_gpa;
> > + struct viridian_time_ref_count time_ref_count;
> > };
> >
> > int
> > @@ -75,4 +89,17 @@ rdmsr_viridian_regs(
> > int
> > viridian_hypercall(struct cpu_user_regs *regs);
> >
> > +void viridian_time_ref_count_freeze(struct domain *d);
> > +void viridian_time_ref_count_thaw(struct domain *d);
> > +
> > #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 4
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/xen/include/public/arch-x86/hvm/save.h
> b/xen/include/public/arch-x86/hvm/save.h
> > index 16d85a3..88aab7e 100644
> > --- a/xen/include/public/arch-x86/hvm/save.h
> > +++ b/xen/include/public/arch-x86/hvm/save.h
> > @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
> > struct hvm_viridian_domain_context {
> > uint64_t hypercall_gpa;
> > uint64_t guest_os_id;
> > + uint64_t time_ref_count;
> > };
> >
> > DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct
> hvm_viridian_domain_context);
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |