[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] [VMX] Add support for Pause-Loop Exiting



[VMX] Add support for Pause-Loop Exiting

New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution
control fields:
PLE_Gap    - upper bound on the amount of time between two successive
             executions of PAUSE in a loop.
PLE_Window - upper bound on the amount of time a guest is allowed to execute in
             a PAUSE loop

If the time, between this execution of PAUSE and previous one, exceeds the
PLE_Gap, processor consider this PAUSE belongs to a new loop.
Otherwise, processor determins the the total execution time of this loop(since
1st PAUSE in this loop), and triggers a VM exit if total time exceeds the
PLE_Window.
* Refer SDM volume 3b section 21.6.13 & 22.1.3.

Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP
is sched-out after hold a spinlock, then other VPs for same lock are sched-in
to waste the CPU time.

Our tests indicate that most spinlocks are held for less than 2^12 cycles.
Performance tests show that with 2X LP over-commitment we can get +2% perf
improvement for kernel build(Even more perf gain with more LPs).

Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx>

Index: hv/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- hv.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ hv/xen/arch/x86/hvm/vmx/vmcs.c
@@ -44,6 +44,20 @@ boolean_param("vpid", opt_vpid_enabled);
 static int opt_unrestricted_guest_enabled = 1;
 boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
 
+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * ple_gap:    upper bound on the amount of time between two successive
+ *             executions of PAUSE in a loop.
+ * ple_window: upper bound on the amount of time a guest is allowed to execute
+ *             in a PAUSE loop.
+ * Time is measured based on a counter that runs at the same rate as the TSC,
+ * refer SDM volume 3b section 21.6.13 & 22.1.3.
+ */
+static unsigned int ple_gap = 41;
+integer_param("ple_gap", ple_gap);
+static unsigned int ple_window = 4096;
+integer_param("ple_window", ple_window);
+
 /* Dynamic (run-time adjusted) execution control flags. */
 u32 vmx_pin_based_exec_control __read_mostly;
 u32 vmx_cpu_based_exec_control __read_mostly;
@@ -140,7 +154,8 @@ static void vmx_init_vmcs_config(void)
         min = 0;
         opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                SECONDARY_EXEC_WBINVD_EXITING |
-               SECONDARY_EXEC_ENABLE_EPT);
+               SECONDARY_EXEC_ENABLE_EPT |
+               SECONDARY_EXEC_PAUSE_LOOP_EXITING);
         if ( opt_vpid_enabled )
             opt |= SECONDARY_EXEC_ENABLE_VPID;
         if ( opt_unrestricted_guest_enabled )
@@ -168,6 +183,13 @@ static void vmx_init_vmcs_config(void)
                   SECONDARY_EXEC_UNRESTRICTED_GUEST);
     }
 
+    if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
+          ple_gap == 0 )
+    {
+        printk("Disable Pause-Loop Exiting.\n");
+        _vmx_secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+    }
+
 #if defined(__i386__)
     /* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
     if ( !(_vmx_secondary_exec_control &
@@ -556,6 +578,12 @@ static int construct_vmcs(struct vcpu *v
     __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
     __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
 
+    if ( cpu_has_vmx_ple )
+    {
+        __vmwrite(PLE_GAP, ple_gap);
+        __vmwrite(PLE_WINDOW, ple_window);
+    }
+
     if ( cpu_has_vmx_secondary_exec_control )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
                   v->arch.hvm_vmx.secondary_exec_control);
Index: hv/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- hv.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ hv/xen/arch/x86/hvm/vmx/vmx.c
@@ -2617,6 +2617,13 @@ asmlinkage void vmx_vmexit_handler(struc
         break;
     }
 
+    case EXIT_REASON_PAUSE_INSTRUCTION:
+    {
+        perfc_incr(ple_exits);
+        do_sched_op_compat(SCHEDOP_yield, 0);
+        break;
+    }
+
     default:
     exit_and_crash:
         gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
Index: hv/xen/include/asm-x86/hvm/vmx/vmcs.h
===================================================================
--- hv.orig/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ hv/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -171,6 +171,7 @@ extern u32 vmx_vmentry_control;
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING           0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST       0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING       0x00000400
 extern u32 vmx_secondary_exec_control;
 
 extern bool_t cpu_has_vmx_ins_outs_instr_info;
@@ -200,6 +201,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
 #define vmx_unrestricted_guest(v)               \
     ((v)->arch.hvm_vmx.secondary_exec_control & \
      SECONDARY_EXEC_UNRESTRICTED_GUEST)
+#define cpu_has_vmx_ple \
+    (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define VMX_INTR_SHADOW_STI             0x00000001
@@ -279,6 +282,8 @@ enum vmcs_field {
     VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
     TPR_THRESHOLD                   = 0x0000401c,
     SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+    PLE_GAP                         = 0x00004020,
+    PLE_WINDOW                      = 0x00004022,
     VM_INSTRUCTION_ERROR            = 0x00004400,
     VM_EXIT_REASON                  = 0x00004402,
     VM_EXIT_INTR_INFO               = 0x00004404,
Index: hv/xen/include/asm-x86/perfc_defn.h
===================================================================
--- hv.orig/xen/include/asm-x86/perfc_defn.h
+++ hv/xen/include/asm-x86/perfc_defn.h
@@ -130,4 +130,6 @@ PERFCOUNTER(mshv_wrmsr_eoi,             
 PERFCOUNTER(realmode_emulations, "realmode instructions emulated")
 PERFCOUNTER(realmode_exits,      "vmexits from realmode")
 
+PERFCOUNTER(ple_exits,    "vmexits from Pause Loop Exiting")
+
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.