[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC] x86/xsave: prefer eager clearing of state over eager restoring



Other than FXRSTOR, XRSTOR allows for setting components to their
initial state. Utilize this to clear register state immediately after
having saved a vCPU's state (which we don't defer past
__context_switch()), considering that
- this supposedly reduces power consumption,
- this might even free up physical registers,
- we don't normally save/restore FPU state for a vCPU on every context
  switch (in some initial measurements I've observed an approximate
  50:50 relation between the two on a not overly heavily loaded system;
  it's clear anyway that this is heavily dependent on what exactly a
  vCPU is used for).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
RFC since the full performance effect is still not very clear.

--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -33,6 +33,7 @@ static inline void fpu_xrstor(struct vcp
     ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
     ASSERT(ok);
     xrstor(v, mask);
+    v->arch.xstate_dirty = mask;
     ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
     ASSERT(ok);
 }
@@ -148,6 +149,9 @@ static inline void fpu_xsave(struct vcpu
     ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
     ASSERT(ok);
     xsave(v, mask);
+    xstate_load_init(v->arch.xstate_dirty &
+                     v->arch.xsave_area->xsave_hdr.xstate_bv);
+    v->arch.xstate_dirty = 0;
     ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
     ASSERT(ok);
 }
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -616,7 +616,7 @@ void __init init_speculation_mitigations
 
     /* Check whether Eager FPU should be enabled by default. */
     if ( opt_eager_fpu == -1 )
-        opt_eager_fpu = should_use_eager_fpu();
+        opt_eager_fpu = !cpu_has_xsave && should_use_eager_fpu();
 
     /* (Re)init BSP state now that default_spec_ctrl_flags has been 
calculated. */
     init_shadow_spec_ctrl_state();
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -734,6 +734,7 @@ int handle_xsetbv(u32 index, u64 new_bv)
             cr0 &= ~X86_CR0_TS;
         }
         xrstor(curr, mask);
+        curr->arch.xstate_dirty |= mask;
         if ( cr0 & X86_CR0_TS )
             write_cr0(cr0);
     }
@@ -774,12 +775,19 @@ uint64_t read_bndcfgu(void)
     return xstate->xsave_hdr.xstate_bv & X86_XCR0_BNDCSR ? bndcsr->bndcfgu : 0;
 }
 
+void xstate_load_init(uint64_t mask)
+{
+    struct vcpu *v = idle_vcpu[smp_processor_id()];
+    struct xsave_struct *xstate = v->arch.xsave_area;
+
+    memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
+    xrstor(v, mask);
+}
+
 void xstate_set_init(uint64_t mask)
 {
     unsigned long cr0 = read_cr0();
     unsigned long xcr0 = this_cpu(xcr0);
-    struct vcpu *v = idle_vcpu[smp_processor_id()];
-    struct xsave_struct *xstate = v->arch.xsave_area;
 
     if ( ~xfeature_mask & mask )
     {
@@ -792,8 +800,7 @@ void xstate_set_init(uint64_t mask)
 
     clts();
 
-    memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
-    xrstor(v, mask);
+    xstate_load_init(mask);
 
     if ( cr0 & X86_CR0_TS )
         write_cr0(cr0);
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -559,6 +559,11 @@ struct arch_vcpu
      * it explicitly enables it via xcr0.
      */
     uint64_t xcr0_accum;
+    /*
+     * Accumulated set of components which may currently be dirty, and hence
+     * should be cleared immediately after saving state.
+     */
+    uint64_t xstate_dirty;
     /* This variable determines whether nonlazy extended state has been used,
      * and thus should be saved/restored. */
     bool_t nonlazy_xstate_used;
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -95,6 +95,7 @@ uint64_t get_msr_xss(void);
 uint64_t read_bndcfgu(void);
 void xsave(struct vcpu *v, uint64_t mask);
 void xrstor(struct vcpu *v, uint64_t mask);
+void xstate_load_init(uint64_t mask);
 void xstate_set_init(uint64_t mask);
 bool xsave_enabled(const struct vcpu *v);
 int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum,




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.