[VMX] Add support for Pause-Loop Exiting
New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution
control fields:
PLE_Gap - upper bound on the amount of time between two successive
executions of PAUSE in a loop.
PLE_Window - upper bound on the amount of time a guest is allowed to execute in
a PAUSE loop
If the time, between this execution of PAUSE and previous one, exceeds the
PLE_Gap, processor consider this PAUSE belongs to a new loop.
Otherwise, processor determins the the total execution time of this loop(since
1st PAUSE in this loop), and triggers a VM exit if total time exceeds the
PLE_Window.
* Refer SDM volume 3b section 21.6.13 & 22.1.3.
Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP
is sched-out after hold a spinlock, then other VPs for same lock are sched-in
to waste the CPU time.
Our tests indicate that most spinlocks are held for less than 2^12 cycles.
Performance tests show that with 2X LP over-commitment we can get +2% perf
improvement for kernel build(Even more perf gain with more LPs).
Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx>
Index: hv/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- hv.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ hv/xen/arch/x86/hvm/vmx/vmcs.c
@@ -44,6 +44,20 @@ boolean_param("vpid", opt_vpid_enabled);
static int opt_unrestricted_guest_enabled = 1;
boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * ple_gap: upper bound on the amount of time between two successive
+ * executions of PAUSE in a loop.
+ * ple_window: upper bound on the amount of time a guest is allowed to execute
+ * in a PAUSE loop.
+ * Time is measured based on a counter that runs at the same rate as the TSC,
+ * refer SDM volume 3b section 21.6.13 & 22.1.3.
+ */
+static unsigned int ple_gap = 41;
+integer_param("ple_gap", ple_gap);
+static unsigned int ple_window = 4096;
+integer_param("ple_window", ple_window);
+
/* Dynamic (run-time adjusted) execution control flags. */
u32 vmx_pin_based_exec_control __read_mostly;
u32 vmx_cpu_based_exec_control __read_mostly;
@@ -140,7 +154,8 @@ static void vmx_init_vmcs_config(void)
min = 0;
opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_ENABLE_EPT);
+ SECONDARY_EXEC_ENABLE_EPT |
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING);
if ( opt_vpid_enabled )
opt |= SECONDARY_EXEC_ENABLE_VPID;
if ( opt_unrestricted_guest_enabled )
@@ -168,6 +183,13 @@ static void vmx_init_vmcs_config(void)
SECONDARY_EXEC_UNRESTRICTED_GUEST);
}
+ if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
+ ple_gap == 0 )
+ {
+ printk("Disable Pause-Loop Exiting.\n");
+ _vmx_secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+ }
+
#if defined(__i386__)
/* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
if ( !(_vmx_secondary_exec_control &
@@ -556,6 +578,12 @@ static int construct_vmcs(struct vcpu *v
__vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
__vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
+ if ( cpu_has_vmx_ple )
+ {
+ __vmwrite(PLE_GAP, ple_gap);
+ __vmwrite(PLE_WINDOW, ple_window);
+ }
+
if ( cpu_has_vmx_secondary_exec_control )
__vmwrite(SECONDARY_VM_EXEC_CONTROL,
v->arch.hvm_vmx.secondary_exec_control);
Index: hv/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- hv.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ hv/xen/arch/x86/hvm/vmx/vmx.c
@@ -2617,6 +2617,13 @@ asmlinkage void vmx_vmexit_handler(struc
break;
}
+ case EXIT_REASON_PAUSE_INSTRUCTION:
+ {
+ perfc_incr(ple_exits);
+ do_sched_op_compat(SCHEDOP_yield, 0);
+ break;
+ }
+
default:
exit_and_crash:
gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
Index: hv/xen/include/asm-x86/hvm/vmx/vmcs.h
===================================================================
--- hv.orig/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ hv/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -171,6 +171,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
extern u32 vmx_secondary_exec_control;
extern bool_t cpu_has_vmx_ins_outs_instr_info;
@@ -200,6 +201,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
#define vmx_unrestricted_guest(v) \
((v)->arch.hvm_vmx.secondary_exec_control & \
SECONDARY_EXEC_UNRESTRICTED_GUEST)
+#define cpu_has_vmx_ple \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -279,6 +282,8 @@ enum vmcs_field {
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
Index: hv/xen/include/asm-x86/perfc_defn.h
===================================================================
--- hv.orig/xen/include/asm-x86/perfc_defn.h
+++ hv/xen/include/asm-x86/perfc_defn.h
@@ -130,4 +130,6 @@ PERFCOUNTER(mshv_wrmsr_eoi,
PERFCOUNTER(realmode_emulations, "realmode instructions emulated")
PERFCOUNTER(realmode_exits, "vmexits from realmode")
+PERFCOUNTER(ple_exits, "vmexits from Pause Loop Exiting")
+
/*#endif*/ /* __XEN_PERFC_DEFN_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|