WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Nested VMX: interrupt/exception handling

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Nested VMX: interrupt/exception handling for n2 guest
From: Xen patchbot-unstable <patchbot@xxxxxxx>
Date: Thu, 16 Jun 2011 11:12:56 +0100
Delivery-date: Thu, 16 Jun 2011 03:34:13 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Eddie Dong <eddie.dong@xxxxxxxxx>
# Date 1307607849 -28800
# Node ID 3ded99964cdf2a9939f5e938ae110ee67e40412a
# Parent  c95338e40c50999e64053ccea0dcd07c23449269
Nested VMX: interrupt/exception handling for n2 guest

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>
Acked-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Committed-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---


diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/intr.c       Thu Jun 09 16:24:09 2011 +0800
@@ -35,6 +35,7 @@
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
+#include <asm/hvm/nestedhvm.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/trace.h>
 
@@ -109,6 +110,96 @@
     }
 }
 
+/*
+ * Injecting interrupts for nested virtualization
+ *
+ *  When injecting virtual interrupts (originated from L0), there are
+ *  two major possibilities, within L1 context and within L2 context
+ *   1. L1 context (in_nesting == 0)
+ *     Everything is the same as without nested, check RFLAGS.IF to
+ *     see if the injection can be done, using VMCS to inject the
+ *     interrupt
+ *
+ *   2. L2 context (in_nesting == 1)
+ *     Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
+ *     irq according to intr_ack_on_exit, shouldn't block normally,
+ *     except for:
+ *    a. context transition
+ *     interrupt needs to be blocked at virtual VMEntry time
+ *    b. L2 idtv reinjection
+ *     if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
+ *     it needs to be reinjected without exiting to L1, interrupt
+ *     injection should be blocked as well at this point.
+ *
+ *  Unfortunately, interrupt blocking in L2 won't work with simple
+ *  intr_window_open (which depends on L2's IF). To solve this,
+ *  the following algorithm can be used:
+ *   v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
+ *   only L0 control, physical control may be different from it.
+ *       - if in L1, it behaves normally, intr window is written
+ *         to physical control as it is
+ *       - if in L2, replace it to MTF (or NMI window) if possible
+ *       - if MTF/NMI window is not used, intr window can still be
+ *         used but may have negative impact on interrupt performance.
+ */
+
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
+{
+    int r = hvm_intblk_none;
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+    {
+        if ( nvcpu->nv_vmexit_pending ||
+             nvcpu->nv_vmswitch_in_progress ||
+             (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+            r = hvm_intblk_rflags_ie;
+    }
+    else if ( nvcpu->nv_vmentry_pending )
+        r = hvm_intblk_rflags_ie;
+
+    return r;
+}
+
+static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
+{
+    u32 exit_ctrl;
+
+    if ( nvmx_intr_blocked(v) != hvm_intblk_none )
+    {
+        enable_intr_window(v, intack);
+        return 1;
+    }
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+    {
+        if ( intack.source == hvm_intsrc_pic ||
+                 intack.source == hvm_intsrc_lapic )
+        {
+            vmx_inject_extint(intack.vector);
+
+            exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
+                            VM_EXIT_CONTROLS);
+            if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+            {
+                /* for now, duplicate the ack path in vmx_intr_assist */
+                hvm_vcpu_ack_pending_irq(v, intack);
+                pt_intr_post(v, intack);
+
+                intack = hvm_vcpu_has_pending_irq(v);
+                if ( unlikely(intack.source != hvm_intsrc_none) )
+                    enable_intr_window(v, intack);
+            }
+            else
+                enable_intr_window(v, intack);
+
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
 asmlinkage void vmx_intr_assist(void)
 {
     struct hvm_intack intack;
@@ -132,6 +223,9 @@
         if ( likely(intack.source == hvm_intsrc_none) )
             goto out;
 
+        if ( unlikely(nvmx_intr_intercept(v, intack)) )
+            goto out;
+
         intblk = hvm_interrupt_blocked(v, intack);
         if ( intblk == hvm_intblk_tpr )
         {
diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Jun 09 16:24:09 2011 +0800
@@ -1240,6 +1240,31 @@
                      __ept_sync_domain, d, 1);
 }
 
+void nvmx_enqueue_n2_exceptions(struct vcpu *v, 
+            unsigned long intr_fields, int error_code)
+{
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
+        /* enqueue the exception till the VMCS switch back to L1 */
+        nvmx->intr.intr_info = intr_fields;
+        nvmx->intr.error_code = error_code;
+        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
+        return;
+    }
+    else
+        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
+                 "on %lx %x\n", intr_fields, error_code,
+                 nvmx->intr.intr_info, nvmx->intr.error_code);
+}
+
+static int nvmx_vmexit_exceptions(struct vcpu *v, unsigned int trapnr,
+                      int errcode, unsigned long cr2)
+{
+    nvmx_enqueue_n2_exceptions(v, trapnr, errcode);
+    return NESTEDHVM_VMEXIT_DONE;
+}
+
 static void __vmx_inject_exception(int trap, int type, int error_code)
 {
     unsigned long intr_fields;
@@ -1269,11 +1294,16 @@
 
 void vmx_inject_hw_exception(int trap, int error_code)
 {
-    unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+    unsigned long intr_info;
     struct vcpu *curr = current;
 
     int type = X86_EVENTTYPE_HW_EXCEPTION;
 
+    if ( nestedhvm_vcpu_in_guestmode(curr) )
+        intr_info = vcpu_2_nvmx(curr).intr.intr_info;
+    else
+        intr_info = __vmread(VM_ENTRY_INTR_INFO);
+
     switch ( trap )
     {
     case TRAP_debug:
@@ -1305,7 +1335,16 @@
             error_code = 0;
     }
 
-    __vmx_inject_exception(trap, type, error_code);
+    if ( nestedhvm_vcpu_in_guestmode(curr) &&
+         nvmx_intercepts_exception(curr, trap, error_code) )
+    {
+        nvmx_enqueue_n2_exceptions (curr, 
+            INTR_INFO_VALID_MASK | (type<<8) | trap,
+            error_code); 
+        return;
+    }
+    else
+        __vmx_inject_exception(trap, type, error_code);
 
     if ( trap == TRAP_page_fault )
         HVMTRACE_LONG_2D(PF_INJECT, error_code,
@@ -1316,12 +1355,38 @@
 
 void vmx_inject_extint(int trap)
 {
+    struct vcpu *v = current;
+    u32    pin_based_cntrl;
+
+    if ( nestedhvm_vcpu_in_guestmode(v) ) {
+        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
+                                     PIN_BASED_VM_EXEC_CONTROL);
+        if ( pin_based_cntrl && PIN_BASED_EXT_INTR_MASK ) {
+            nvmx_enqueue_n2_exceptions (v, 
+               INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap,
+               HVM_DELIVER_NO_ERROR_CODE);
+            return;
+        }
+    }
     __vmx_inject_exception(trap, X86_EVENTTYPE_EXT_INTR,
                            HVM_DELIVER_NO_ERROR_CODE);
 }
 
 void vmx_inject_nmi(void)
 {
+    struct vcpu *v = current;
+    u32    pin_based_cntrl;
+
+    if ( nestedhvm_vcpu_in_guestmode(v) ) {
+        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
+                                     PIN_BASED_VM_EXEC_CONTROL);
+        if ( pin_based_cntrl && PIN_BASED_NMI_EXITING ) {
+            nvmx_enqueue_n2_exceptions (v, 
+               INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi,
+               HVM_DELIVER_NO_ERROR_CODE);
+            return;
+        }
+    }
     __vmx_inject_exception(2, X86_EVENTTYPE_NMI,
                            HVM_DELIVER_NO_ERROR_CODE);
 }
@@ -1421,7 +1486,10 @@
     .nhvm_vcpu_reset      = nvmx_vcpu_reset,
     .nhvm_vcpu_guestcr3   = nvmx_vcpu_guestcr3,
     .nhvm_vcpu_hostcr3    = nvmx_vcpu_hostcr3,
-    .nhvm_vcpu_asid       = nvmx_vcpu_asid
+    .nhvm_vcpu_asid       = nvmx_vcpu_asid,
+    .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
+    .nhvm_vcpu_vmexit_trap = nvmx_vmexit_exceptions,
+    .nhvm_intr_blocked    = nvmx_intr_blocked
 };
 
 struct hvm_function_table * __init start_vmx(void)
@@ -2232,7 +2300,8 @@
     hvm_maybe_deassert_evtchn_irq();
 
     idtv_info = __vmread(IDT_VECTORING_INFO);
-    if ( exit_reason != EXIT_REASON_TASK_SWITCH )
+    if ( !nestedhvm_vcpu_in_guestmode(v) && 
+         exit_reason != EXIT_REASON_TASK_SWITCH )
         vmx_idtv_reinject(idtv_info);
 
     switch ( exit_reason )
@@ -2584,6 +2653,9 @@
         domain_crash(v->domain);
         break;
     }
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+        nvmx_idtv_handling();
 }
 
 asmlinkage void vmx_vmenter_helper(void)
diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c       Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vvmx.c       Thu Jun 09 16:24:09 2011 +0800
@@ -393,6 +393,27 @@
     regs->eflags = eflags;
 }
 
+int nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
+                               int error_code)
+{
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    u32 exception_bitmap, pfec_match=0, pfec_mask=0;
+    int r;
+
+    ASSERT ( trap < 32 );
+
+    exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+    r = exception_bitmap & (1 << trap) ? 1: 0;
+
+    if ( trap == TRAP_page_fault ) {
+        pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
+        pfec_mask  = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+        if ( (error_code & pfec_mask) != pfec_match )
+            r = !r;
+    }
+    return r;
+}
+
 /*
  * Nested VMX uses "strict" condition to exit from 
  * L2 guest if either L1 VMM or L0 VMM expect to exit.
@@ -464,6 +485,7 @@
         __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
     }
 
+    /* TODO: change L0 intr window to MTF or NMI window */
     __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
 }
 
@@ -836,6 +858,42 @@
     __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
 }
 
+static void sync_exception_state(struct vcpu *v)
+{
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
+        return;
+
+    switch ( (nvmx->intr.intr_info & INTR_INFO_INTR_TYPE_MASK) >> 8 )
+    {
+    case X86_EVENTTYPE_EXT_INTR:
+        /* rename exit_reason to EXTERNAL_INTERRUPT */
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
+                    EXIT_REASON_EXTERNAL_INTERRUPT);
+        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+                    nvmx->intr.intr_info);
+        break;
+
+    case X86_EVENTTYPE_HW_EXCEPTION:
+    case X86_EVENTTYPE_SW_INTERRUPT:
+    case X86_EVENTTYPE_SW_EXCEPTION:
+        /* throw to L1 */
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+                    nvmx->intr.intr_info);
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
+                    nvmx->intr.error_code);
+        break;
+    case X86_EVENTTYPE_NMI:
+    default:
+        gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
+               nvmx->intr.intr_info); 
+        break;
+    }
+}
+
 static void virtual_vmexit(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
@@ -846,6 +904,7 @@
 
     sync_vvmcs_ro(v);
     sync_vvmcs_guest_state(v, regs);
+    sync_exception_state(v);
 
     vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx);
 
@@ -1158,3 +1217,38 @@
     return X86EMUL_OKAY;
 }
 
+void nvmx_idtv_handling(void)
+{
+    struct vcpu *v = current;
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    unsigned int idtv_info = __vmread(IDT_VECTORING_INFO);
+
+    if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
+        return;
+
+    /*
+     * If L0 can solve the fault that causes idt vectoring, it should
+     * be reinjected, otherwise, pass to L1.
+     */
+    if ( (__vmread(VM_EXIT_REASON) != EXIT_REASON_EPT_VIOLATION &&
+          !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK)) ||
+         (__vmread(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION &&
+          !nvcpu->nv_vmexit_pending) )
+    {
+        __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+        if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+           __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                        __vmread(IDT_VECTORING_ERROR_CODE));
+        /*
+         * SDM 23.2.4, if L1 tries to inject a software interrupt
+         * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
+         * the value of previous VM_ENTRY_INSTRUCTION_LEN.
+         *
+         * This means EXIT_INSTRUCTION_LEN is always valid here, for
+         * software interrupts both injected by L1, and generated in L2.
+         */
+        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, __vmread(VM_EXIT_INSTRUCTION_LEN));
+   }
+}
+
diff -r c95338e40c50 -r 3ded99964cdf xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h        Thu Jun 09 16:24:09 2011 +0800
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h        Thu Jun 09 16:24:09 2011 +0800
@@ -93,6 +93,9 @@
 uint64_t nvmx_vcpu_guestcr3(struct vcpu *v);
 uint64_t nvmx_vcpu_hostcr3(struct vcpu *v);
 uint32_t nvmx_vcpu_asid(struct vcpu *v);
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v);
+int nvmx_intercepts_exception(struct vcpu *v, 
+                              unsigned int trap, int error_code);
 
 int nvmx_handle_vmxon(struct cpu_user_regs *regs);
 int nvmx_handle_vmxoff(struct cpu_user_regs *regs);
@@ -166,6 +169,7 @@
                                         unsigned long value);
 void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value);
 asmlinkage void nvmx_switch_guest(void);
+void nvmx_idtv_handling(void);
 
 #endif /* __ASM_X86_HVM_VVMX_H__ */
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Nested VMX: interrupt/exception handling for n2 guest, Xen patchbot-unstable <=