[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10 11/11] x86/idle: Clear SPEC_CTRL while idle



On contemporary hardware, setting IBRS/STIBP has a performance impact on
adjacent hyperthreads.  It is therefore recommended to clear the setting
before becoming idle, to avoid an idle core preventing adjacent userspace
execution from running at full performance.

Care must be taken to ensure there are no ret or indirect branch instructions
between spec_ctrl_{enter,exit}_idle() invocations, which are forced always
inline.  Care must also be taken to avoid using spec_ctrl_enter_idle() between
flushing caches and becoming idle, in cases where that matters.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/acpi/cpu_idle.c    | 21 +++++++++++++++++++++
 xen/arch/x86/cpu/mwait-idle.c   |  7 +++++++
 xen/arch/x86/domain.c           |  8 ++++++++
 xen/include/asm-x86/spec_ctrl.h | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)

diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index cb1c5da..3f72bda 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -55,6 +55,7 @@
 #include <asm/mwait.h>
 #include <xen/notifier.h>
 #include <xen/cpu.h>
+#include <asm/spec_ctrl.h>
 
 /*#define DEBUG_PM_CX*/
 
@@ -414,8 +415,14 @@ void mwait_idle_with_hints(unsigned int eax, unsigned int 
ecx)
      */
     if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) )
     {
+        struct cpu_info *info = get_cpu_info();
+
         cpumask_set_cpu(cpu, &cpuidle_mwait_flags);
+
+        spec_ctrl_enter_idle(info);
         __mwait(eax, ecx);
+        spec_ctrl_exit_idle(info);
+
         cpumask_clear_cpu(cpu, &cpuidle_mwait_flags);
     }
 
@@ -430,6 +437,8 @@ static void acpi_processor_ffh_cstate_enter(struct 
acpi_processor_cx *cx)
 
 static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
+    struct cpu_info *info = get_cpu_info();
+
     switch ( cx->entry_method )
     {
     case ACPI_CSTATE_EM_FFH:
@@ -437,15 +446,19 @@ static void acpi_idle_do_entry(struct acpi_processor_cx 
*cx)
         acpi_processor_ffh_cstate_enter(cx);
         return;
     case ACPI_CSTATE_EM_SYSIO:
+        spec_ctrl_enter_idle(info);
         /* IO port based C-state */
         inb(cx->address);
         /* Dummy wait op - must do something useless after P_LVL2 read
            because chipsets cannot guarantee that STPCLK# signal
            gets asserted in time to freeze execution properly. */
         inl(pmtmr_ioport);
+        spec_ctrl_exit_idle(info);
         return;
     case ACPI_CSTATE_EM_HALT:
+        spec_ctrl_enter_idle(info);
         safe_halt();
+        spec_ctrl_exit_idle(info);
         local_irq_disable();
         return;
     }
@@ -573,7 +586,13 @@ static void acpi_processor_idle(void)
         if ( pm_idle_save )
             pm_idle_save();
         else
+        {
+            struct cpu_info *info = get_cpu_info();
+
+            spec_ctrl_enter_idle(info);
             safe_halt();
+            spec_ctrl_exit_idle(info);
+        }
         return;
     }
 
@@ -752,6 +771,7 @@ void acpi_dead_idle(void)
          * Otherwise, CPU may still hold dirty data, breaking cache coherency,
          * leading to strange errors.
          */
+        spec_ctrl_enter_idle(get_cpu_info());
         wbinvd();
 
         while ( 1 )
@@ -781,6 +801,7 @@ void acpi_dead_idle(void)
         u32 address = cx->address;
         u32 pmtmr_ioport_local = pmtmr_ioport;
 
+        spec_ctrl_enter_idle(get_cpu_info());
         wbinvd();
 
         while ( 1 )
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 762dff1..e357f29 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -58,6 +58,7 @@
 #include <asm/hpet.h>
 #include <asm/mwait.h>
 #include <asm/msr.h>
+#include <asm/spec_ctrl.h>
 #include <acpi/cpufreq/cpufreq.h>
 
 #define MWAIT_IDLE_VERSION "0.4.1"
@@ -736,7 +737,13 @@ static void mwait_idle(void)
                if (pm_idle_save)
                        pm_idle_save();
                else
+               {
+                       struct cpu_info *info = get_cpu_info();
+
+                       spec_ctrl_enter_idle(info);
                        safe_halt();
+                       spec_ctrl_exit_idle(info);
+               }
                return;
        }
 
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 12f527b..be6d15f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -55,6 +55,7 @@
 #include <asm/hvm/viridian.h>
 #include <asm/debugreg.h>
 #include <asm/msr.h>
+#include <asm/spec_ctrl.h>
 #include <asm/traps.h>
 #include <asm/nmi.h>
 #include <asm/mce.h>
@@ -75,9 +76,15 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle;
 
 static void default_idle(void)
 {
+    struct cpu_info *info = get_cpu_info();
+
     local_irq_disable();
     if ( cpu_is_haltable(smp_processor_id()) )
+    {
+        spec_ctrl_enter_idle(info);
         safe_halt();
+        spec_ctrl_exit_idle(info);
+    }
     else
         local_irq_enable();
 }
@@ -89,6 +96,7 @@ void default_dead_idle(void)
      * held by the CPUs spinning here indefinitely, and get discarded by
      * a subsequent INIT.
      */
+    spec_ctrl_enter_idle(get_cpu_info());
     wbinvd();
     for ( ; ; )
         halt();
diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
index e328b0f..5ab4ff3 100644
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -20,7 +20,9 @@
 #ifndef __X86_SPEC_CTRL_H__
 #define __X86_SPEC_CTRL_H__
 
+#include <asm/alternative.h>
 #include <asm/current.h>
+#include <asm/msr-index.h>
 
 void init_speculation_mitigations(void);
 
@@ -35,6 +37,38 @@ static inline void init_shadow_spec_ctrl_state(void)
     info->bti_ist_info = default_bti_ist_info;
 }
 
+/* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */
+static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
+{
+    uint32_t val = 0;
+
+    /*
+     * Latch the new shadow value, then enable shadowing, then update the MSR.
+     * There are no SMP issues here; only local processor ordering concerns.
+     */
+    info->shadow_spec_ctrl = val;
+    barrier();
+    info->use_shadow_spec_ctrl = true;
+    barrier();
+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
+                   :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
+}
+
+/* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
+static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
+{
+    uint32_t val = SPEC_CTRL_IBRS;
+
+    /*
+     * Disable shadowing before updating the MSR.  There are no SMP issues
+     * here; only local processor ordering concerns.
+     */
+    info->use_shadow_spec_ctrl = false;
+    barrier();
+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
+                   :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
+}
+
 #endif /* !__X86_SPEC_CTRL_H__ */
 
 /*
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.