# HG changeset patch
# User Eddie Dong <eddie.dong@xxxxxxxxx>
# Date 1307607849 -28800
# Node ID 3ded99964cdf2a9939f5e938ae110ee67e40412a
# Parent c95338e40c50999e64053ccea0dcd07c23449269
Nested VMX: interrupt/exception handling for n2 guest
Signed-off-by: Qing He <qing.he@xxxxxxxxx>
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>
Acked-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Committed-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/intr.c Thu Jun 09 16:24:09 2011 +0800
@@ -35,6 +35,7 @@
#include <asm/hvm/vmx/vmcs.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
+#include <asm/hvm/nestedhvm.h>
#include <public/hvm/ioreq.h>
#include <asm/hvm/trace.h>
@@ -109,6 +110,96 @@
}
}
+/*
+ * Injecting interrupts for nested virtualization
+ *
+ * When injecting virtual interrupts (originated from L0), there are
+ * two major possibilities, within L1 context and within L2 context
+ * 1. L1 context (in_nesting == 0)
+ * Everything is the same as without nested, check RFLAGS.IF to
+ * see if the injection can be done, using VMCS to inject the
+ * interrupt
+ *
+ * 2. L2 context (in_nesting == 1)
+ * Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
+ * irq according to intr_ack_on_exit, shouldn't block normally,
+ * except for:
+ * a. context transition
+ * interrupt needs to be blocked at virtual VMEntry time
+ * b. L2 idtv reinjection
+ * if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
+ * it needs to be reinjected without exiting to L1, interrupt
+ * injection should be blocked as well at this point.
+ *
+ * Unfortunately, interrupt blocking in L2 won't work with simple
+ * intr_window_open (which depends on L2's IF). To solve this,
+ * the following algorithm can be used:
+ * v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
+ * only L0 control, physical control may be different from it.
+ * - if in L1, it behaves normally, intr window is written
+ * to physical control as it is
+ * - if in L2, replace it to MTF (or NMI window) if possible
+ * - if MTF/NMI window is not used, intr window can still be
+ * used but may have negative impact on interrupt performance.
+ */
+
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
+{
+ int r = hvm_intblk_none;
+ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+
+ if ( nestedhvm_vcpu_in_guestmode(v) )
+ {
+ if ( nvcpu->nv_vmexit_pending ||
+ nvcpu->nv_vmswitch_in_progress ||
+ (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+ r = hvm_intblk_rflags_ie;
+ }
+ else if ( nvcpu->nv_vmentry_pending )
+ r = hvm_intblk_rflags_ie;
+
+ return r;
+}
+
+static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
+{
+ u32 exit_ctrl;
+
+ if ( nvmx_intr_blocked(v) != hvm_intblk_none )
+ {
+ enable_intr_window(v, intack);
+ return 1;
+ }
+
+ if ( nestedhvm_vcpu_in_guestmode(v) )
+ {
+ if ( intack.source == hvm_intsrc_pic ||
+ intack.source == hvm_intsrc_lapic )
+ {
+ vmx_inject_extint(intack.vector);
+
+ exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
+ VM_EXIT_CONTROLS);
+ if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+ {
+ /* for now, duplicate the ack path in vmx_intr_assist */
+ hvm_vcpu_ack_pending_irq(v, intack);
+ pt_intr_post(v, intack);
+
+ intack = hvm_vcpu_has_pending_irq(v);
+ if ( unlikely(intack.source != hvm_intsrc_none) )
+ enable_intr_window(v, intack);
+ }
+ else
+ enable_intr_window(v, intack);
+
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
asmlinkage void vmx_intr_assist(void)
{
struct hvm_intack intack;
@@ -132,6 +223,9 @@
if ( likely(intack.source == hvm_intsrc_none) )
goto out;
+ if ( unlikely(nvmx_intr_intercept(v, intack)) )
+ goto out;
+
intblk = hvm_interrupt_blocked(v, intack);
if ( intblk == hvm_intblk_tpr )
{
diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jun 09 16:24:09 2011 +0800
@@ -1240,6 +1240,31 @@
__ept_sync_domain, d, 1);
}
+void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+ unsigned long intr_fields, int error_code)
+{
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+ if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
+ /* enqueue the exception till the VMCS switch back to L1 */
+ nvmx->intr.intr_info = intr_fields;
+ nvmx->intr.error_code = error_code;
+ vcpu_nestedhvm(v).nv_vmexit_pending = 1;
+ return;
+ }
+ else
+ gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
+ "on %lx %x\n", intr_fields, error_code,
+ nvmx->intr.intr_info, nvmx->intr.error_code);
+}
+
+static int nvmx_vmexit_exceptions(struct vcpu *v, unsigned int trapnr,
+ int errcode, unsigned long cr2)
+{
+ nvmx_enqueue_n2_exceptions(v, trapnr, errcode);
+ return NESTEDHVM_VMEXIT_DONE;
+}
+
static void __vmx_inject_exception(int trap, int type, int error_code)
{
unsigned long intr_fields;
@@ -1269,11 +1294,16 @@
void vmx_inject_hw_exception(int trap, int error_code)
{
- unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+ unsigned long intr_info;
struct vcpu *curr = current;
int type = X86_EVENTTYPE_HW_EXCEPTION;
+ if ( nestedhvm_vcpu_in_guestmode(curr) )
+ intr_info = vcpu_2_nvmx(curr).intr.intr_info;
+ else
+ intr_info = __vmread(VM_ENTRY_INTR_INFO);
+
switch ( trap )
{
case TRAP_debug:
@@ -1305,7 +1335,16 @@
error_code = 0;
}
- __vmx_inject_exception(trap, type, error_code);
+ if ( nestedhvm_vcpu_in_guestmode(curr) &&
+ nvmx_intercepts_exception(curr, trap, error_code) )
+ {
+ nvmx_enqueue_n2_exceptions (curr,
+ INTR_INFO_VALID_MASK | (type<<8) | trap,
+ error_code);
+ return;
+ }
+ else
+ __vmx_inject_exception(trap, type, error_code);
if ( trap == TRAP_page_fault )
HVMTRACE_LONG_2D(PF_INJECT, error_code,
@@ -1316,12 +1355,38 @@
void vmx_inject_extint(int trap)
{
+ struct vcpu *v = current;
+ u32 pin_based_cntrl;
+
+ if ( nestedhvm_vcpu_in_guestmode(v) ) {
+ pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
+ PIN_BASED_VM_EXEC_CONTROL);
+ if ( pin_based_cntrl && PIN_BASED_EXT_INTR_MASK ) {
+ nvmx_enqueue_n2_exceptions (v,
+ INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap,
+ HVM_DELIVER_NO_ERROR_CODE);
+ return;
+ }
+ }
__vmx_inject_exception(trap, X86_EVENTTYPE_EXT_INTR,
HVM_DELIVER_NO_ERROR_CODE);
}
void vmx_inject_nmi(void)
{
+ struct vcpu *v = current;
+ u32 pin_based_cntrl;
+
+ if ( nestedhvm_vcpu_in_guestmode(v) ) {
+ pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
+ PIN_BASED_VM_EXEC_CONTROL);
+ if ( pin_based_cntrl && PIN_BASED_NMI_EXITING ) {
+ nvmx_enqueue_n2_exceptions (v,
+ INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi,
+ HVM_DELIVER_NO_ERROR_CODE);
+ return;
+ }
+ }
__vmx_inject_exception(2, X86_EVENTTYPE_NMI,
HVM_DELIVER_NO_ERROR_CODE);
}
@@ -1421,7 +1486,10 @@
.nhvm_vcpu_reset = nvmx_vcpu_reset,
.nhvm_vcpu_guestcr3 = nvmx_vcpu_guestcr3,
.nhvm_vcpu_hostcr3 = nvmx_vcpu_hostcr3,
- .nhvm_vcpu_asid = nvmx_vcpu_asid
+ .nhvm_vcpu_asid = nvmx_vcpu_asid,
+ .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
+ .nhvm_vcpu_vmexit_trap = nvmx_vmexit_exceptions,
+ .nhvm_intr_blocked = nvmx_intr_blocked
};
struct hvm_function_table * __init start_vmx(void)
@@ -2232,7 +2300,8 @@
hvm_maybe_deassert_evtchn_irq();
idtv_info = __vmread(IDT_VECTORING_INFO);
- if ( exit_reason != EXIT_REASON_TASK_SWITCH )
+ if ( !nestedhvm_vcpu_in_guestmode(v) &&
+ exit_reason != EXIT_REASON_TASK_SWITCH )
vmx_idtv_reinject(idtv_info);
switch ( exit_reason )
@@ -2584,6 +2653,9 @@
domain_crash(v->domain);
break;
}
+
+ if ( nestedhvm_vcpu_in_guestmode(v) )
+ nvmx_idtv_handling();
}
asmlinkage void vmx_vmenter_helper(void)
diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vvmx.c Thu Jun 09 16:24:09 2011 +0800
@@ -393,6 +393,27 @@
regs->eflags = eflags;
}
+int nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
+ int error_code)
+{
+ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+ u32 exception_bitmap, pfec_match=0, pfec_mask=0;
+ int r;
+
+ ASSERT ( trap < 32 );
+
+ exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+ r = exception_bitmap & (1 << trap) ? 1: 0;
+
+ if ( trap == TRAP_page_fault ) {
+ pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
+ pfec_mask = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+ if ( (error_code & pfec_mask) != pfec_match )
+ r = !r;
+ }
+ return r;
+}
+
/*
* Nested VMX uses "strict" condition to exit from
* L2 guest if either L1 VMM or L0 VMM expect to exit.
@@ -464,6 +485,7 @@
__vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
}
+ /* TODO: change L0 intr window to MTF or NMI window */
__vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
}
@@ -836,6 +858,42 @@
__set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
}
+static void sync_exception_state(struct vcpu *v)
+{
+ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+ if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
+ return;
+
+ switch ( (nvmx->intr.intr_info & INTR_INFO_INTR_TYPE_MASK) >> 8 )
+ {
+ case X86_EVENTTYPE_EXT_INTR:
+ /* rename exit_reason to EXTERNAL_INTERRUPT */
+ __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
+ EXIT_REASON_EXTERNAL_INTERRUPT);
+ __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
+ __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+ nvmx->intr.intr_info);
+ break;
+
+ case X86_EVENTTYPE_HW_EXCEPTION:
+ case X86_EVENTTYPE_SW_INTERRUPT:
+ case X86_EVENTTYPE_SW_EXCEPTION:
+ /* throw to L1 */
+ __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+ nvmx->intr.intr_info);
+ __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
+ nvmx->intr.error_code);
+ break;
+ case X86_EVENTTYPE_NMI:
+ default:
+ gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
+ nvmx->intr.intr_info);
+ break;
+ }
+}
+
static void virtual_vmexit(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -846,6 +904,7 @@
sync_vvmcs_ro(v);
sync_vvmcs_guest_state(v, regs);
+ sync_exception_state(v);
vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx);
@@ -1158,3 +1217,38 @@
return X86EMUL_OKAY;
}
+void nvmx_idtv_handling(void)
+{
+ struct vcpu *v = current;
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+ unsigned int idtv_info = __vmread(IDT_VECTORING_INFO);
+
+ if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
+ return;
+
+ /*
+ * If L0 can solve the fault that causes idt vectoring, it should
+ * be reinjected, otherwise, pass to L1.
+ */
+ if ( (__vmread(VM_EXIT_REASON) != EXIT_REASON_EPT_VIOLATION &&
+ !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK)) ||
+ (__vmread(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION &&
+ !nvcpu->nv_vmexit_pending) )
+ {
+ __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+ if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ /*
+ * SDM 23.2.4, if L1 tries to inject a software interrupt
+ * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
+ * the value of previous VM_ENTRY_INSTRUCTION_LEN.
+ *
+ * This means EXIT_INSTRUCTION_LEN is always valid here, for
+ * software interrupts both injected by L1, and generated in L2.
+ */
+ __vmwrite(VM_ENTRY_INSTRUCTION_LEN, __vmread(VM_EXIT_INSTRUCTION_LEN));
+ }
+}
+
diff -r c95338e40c50 -r 3ded99964cdf xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h Thu Jun 09 16:24:09 2011 +0800
@@ -93,6 +93,9 @@
uint64_t nvmx_vcpu_guestcr3(struct vcpu *v);
uint64_t nvmx_vcpu_hostcr3(struct vcpu *v);
uint32_t nvmx_vcpu_asid(struct vcpu *v);
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v);
+int nvmx_intercepts_exception(struct vcpu *v,
+ unsigned int trap, int error_code);
int nvmx_handle_vmxon(struct cpu_user_regs *regs);
int nvmx_handle_vmxoff(struct cpu_user_regs *regs);
@@ -166,6 +169,7 @@
unsigned long value);
void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value);
asmlinkage void nvmx_switch_guest(void);
+void nvmx_idtv_handling(void);
#endif /* __ASM_X86_HVM_VVMX_H__ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|