[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC] x86/xsave: prefer eager clearing of state over eager restoring
Other than FXRSTOR, XRSTOR allows for setting components to their initial state. Utilize this to clear register state immediately after having saved a vCPU's state (which we don't defer past __context_switch()), considering that - this supposedly reduces power consumption, - this might even free up physical registers, - we don't normally save/restore FPU state for a vCPU on every context switch (in some initial measurements I've observed an approximate 50:50 relation between the two on a not overly heavily loaded system; it's clear anyway that this is heavily dependent on what exactly a vCPU is used for). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- RFC since the full performance effect is still not very clear. --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -33,6 +33,7 @@ static inline void fpu_xrstor(struct vcp ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); ASSERT(ok); xrstor(v, mask); + v->arch.xstate_dirty = mask; ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); ASSERT(ok); } @@ -148,6 +149,9 @@ static inline void fpu_xsave(struct vcpu ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); ASSERT(ok); xsave(v, mask); + xstate_load_init(v->arch.xstate_dirty & + v->arch.xsave_area->xsave_hdr.xstate_bv); + v->arch.xstate_dirty = 0; ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); ASSERT(ok); } --- a/xen/arch/x86/spec_ctrl.c +++ b/xen/arch/x86/spec_ctrl.c @@ -616,7 +616,7 @@ void __init init_speculation_mitigations /* Check whether Eager FPU should be enabled by default. */ if ( opt_eager_fpu == -1 ) - opt_eager_fpu = should_use_eager_fpu(); + opt_eager_fpu = !cpu_has_xsave && should_use_eager_fpu(); /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ init_shadow_spec_ctrl_state(); --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -734,6 +734,7 @@ int handle_xsetbv(u32 index, u64 new_bv) cr0 &= ~X86_CR0_TS; } xrstor(curr, mask); + curr->arch.xstate_dirty |= mask; if ( cr0 & X86_CR0_TS ) write_cr0(cr0); } @@ -774,12 +775,19 @@ uint64_t read_bndcfgu(void) return xstate->xsave_hdr.xstate_bv & X86_XCR0_BNDCSR ? bndcsr->bndcfgu : 0; } +void xstate_load_init(uint64_t mask) +{ + struct vcpu *v = idle_vcpu[smp_processor_id()]; + struct xsave_struct *xstate = v->arch.xsave_area; + + memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr)); + xrstor(v, mask); +} + void xstate_set_init(uint64_t mask) { unsigned long cr0 = read_cr0(); unsigned long xcr0 = this_cpu(xcr0); - struct vcpu *v = idle_vcpu[smp_processor_id()]; - struct xsave_struct *xstate = v->arch.xsave_area; if ( ~xfeature_mask & mask ) { @@ -792,8 +800,7 @@ void xstate_set_init(uint64_t mask) clts(); - memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr)); - xrstor(v, mask); + xstate_load_init(mask); if ( cr0 & X86_CR0_TS ) write_cr0(cr0); --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -559,6 +559,11 @@ struct arch_vcpu * it explicitly enables it via xcr0. */ uint64_t xcr0_accum; + /* + * Accumulated set of components which may currently be dirty, and hence + * should be cleared immediately after saving state. + */ + uint64_t xstate_dirty; /* This variable determines whether nonlazy extended state has been used, * and thus should be saved/restored. */ bool_t nonlazy_xstate_used; --- a/xen/include/asm-x86/xstate.h +++ b/xen/include/asm-x86/xstate.h @@ -95,6 +95,7 @@ uint64_t get_msr_xss(void); uint64_t read_bndcfgu(void); void xsave(struct vcpu *v, uint64_t mask); void xrstor(struct vcpu *v, uint64_t mask); +void xstate_load_init(uint64_t mask); void xstate_set_init(uint64_t mask); bool xsave_enabled(const struct vcpu *v); int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |