WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 16 of 20] interrupt/exception handling for n2 guest

To: Tim.Deegan@xxxxxxxxxx
Subject: [Xen-devel] [PATCH 16 of 20] interrupt/exception handling for n2 guest
From: Eddie Dong <eddie.dong@xxxxxxxxx>
Date: Thu, 02 Jun 2011 16:57:29 +0800
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Thu, 02 Jun 2011 02:54:10 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1307005033@xxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <patchbomb.1307005033@xxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Eddie Dong <eddie.dong@xxxxxxxxx>
# Date 1307003601 -28800
# Node ID f14f451a780e60e920c057e44fa1bc3ee40495a7
# Parent  bd15acfc9b822ccf27b5c7603e600e5e11733907
interrupt/exception handling for n2 guest

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>

diff -r bd15acfc9b82 -r f14f451a780e xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/intr.c       Thu Jun 02 16:33:21 2011 +0800
@@ -35,6 +35,7 @@
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
+#include <asm/hvm/nestedhvm.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/trace.h>
 
@@ -109,6 +110,102 @@ static void enable_intr_window(struct vc
     }
 }
 
+/*
+ * Injecting interrupts for nested virtualization
+ *
+ *  When injecting virtual interrupts (originated from L0), there are
+ *  two major possibilities, within L1 context and within L2 context
+ *   1. L1 context (in_nesting == 0)
+ *     Everything is the same as without nested, check RFLAGS.IF to
+ *     see if the injection can be done, using VMCS to inject the
+ *     interrupt
+ *
+ *   2. L2 context (in_nesting == 1)
+ *     Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
+ *     irq according to intr_ack_on_exit, shouldn't block normally,
+ *     except for:
+ *    a. context transition
+ *     interrupt needs to be blocked at virtual VMEntry time
+ *    b. L2 idtv reinjection
+ *     if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
+ *     it needs to be reinjected without exiting to L1, interrupt
+ *     injection should be blocked as well at this point.
+ *
+ *  Unfortunately, interrupt blocking in L2 won't work with simple
+ *  intr_window_open (which depends on L2's IF). To solve this,
+ *  the following algorithm can be used:
+ *   v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
+ *   only L0 control, physical control may be different from it.
+ *       - if in L1, it behaves normally, intr window is written
+ *         to physical control as it is
+ *       - if in L2, replace it to MTF (or NMI window) if possible
+ *       - if MTF/NMI window is not used, intr window can still be
+ *         used but may have negative impact on interrupt performance.
+ */
+
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
+{
+    int r = hvm_intblk_none;
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+    {
+        if ( nvcpu->nv_vmexit_pending ||
+             nvcpu->nv_vmswitch_in_progress ||
+             (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+            r = hvm_intblk_rflags_ie;
+    }
+    else if ( nvcpu->nv_vmentry_pending )
+        r = hvm_intblk_rflags_ie;
+
+    return r;
+}
+
+static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
+{
+    u32 exit_ctrl;
+
+    /*
+     * TODO:
+     *   - if L1 intr-window exiting == 0
+     *   - vNMI
+     */
+
+    if ( nvmx_intr_blocked(v) != hvm_intblk_none )
+    {
+        enable_intr_window(v, intack);
+        return 1;
+    }
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+    {
+        if ( intack.source == hvm_intsrc_pic ||
+                 intack.source == hvm_intsrc_lapic )
+        {
+            vmx_inject_extint(intack.vector);
+
+            exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
+                            VM_EXIT_CONTROLS);
+            if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+            {
+                /* for now, duplicate the ack path in vmx_intr_assist */
+                hvm_vcpu_ack_pending_irq(v, intack);
+                pt_intr_post(v, intack);
+
+                intack = hvm_vcpu_has_pending_irq(v);
+                if ( unlikely(intack.source != hvm_intsrc_none) )
+                    enable_intr_window(v, intack);
+            }
+            else
+                enable_intr_window(v, intack);
+
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
 asmlinkage void vmx_intr_assist(void)
 {
     struct hvm_intack intack;
@@ -132,6 +229,9 @@ asmlinkage void vmx_intr_assist(void)
         if ( likely(intack.source == hvm_intsrc_none) )
             goto out;
 
+        if ( unlikely(nvmx_intr_intercept(v, intack)) )
+            goto out;
+
         intblk = hvm_interrupt_blocked(v, intack);
         if ( intblk == hvm_intblk_tpr )
         {
diff -r bd15acfc9b82 -r f14f451a780e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Jun 02 16:33:21 2011 +0800
@@ -1243,6 +1243,31 @@ void ept_sync_domain(struct domain *d)
                      __ept_sync_domain, d, 1);
 }
 
+void nvmx_enqueue_n2_exceptions(struct vcpu *v, 
+            unsigned long intr_fields, int error_code)
+{
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
+        /* enqueue the exception till the VMCS switch back to L1 */
+        nvmx->intr.intr_info = intr_fields;
+        nvmx->intr.error_code = error_code;
+        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
+        return;
+    }
+    else
+        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
+                 "on %lx %x\n", intr_fields, error_code,
+                 nvmx->intr.intr_info, nvmx->intr.error_code);
+}
+
+static int nvmx_vmexit_exceptions(struct vcpu *v, unsigned int trapnr,
+                      int errcode, unsigned long cr2)
+{
+    nvmx_enqueue_n2_exceptions(v, trapnr, errcode);
+    return NESTEDHVM_VMEXIT_DONE;
+}
+
 static void __vmx_inject_exception(int trap, int type, int error_code)
 {
     unsigned long intr_fields;
@@ -1272,11 +1297,16 @@ static void __vmx_inject_exception(int t
 
 void vmx_inject_hw_exception(int trap, int error_code)
 {
-    unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+    unsigned long intr_info;
     struct vcpu *curr = current;
 
     int type = X86_EVENTTYPE_HW_EXCEPTION;
 
+    if ( nestedhvm_vcpu_in_guestmode(curr) )
+        intr_info = vcpu_2_nvmx(curr).intr.intr_info;
+    else
+        intr_info = __vmread(VM_ENTRY_INTR_INFO);
+
     switch ( trap )
     {
     case TRAP_debug:
@@ -1308,7 +1338,16 @@ void vmx_inject_hw_exception(int trap, i
             error_code = 0;
     }
 
-    __vmx_inject_exception(trap, type, error_code);
+    if ( nestedhvm_vcpu_in_guestmode(curr) &&
+         nvmx_intercepts_exception(curr, trap, error_code) )
+    {
+        nvmx_enqueue_n2_exceptions (curr, 
+            INTR_INFO_VALID_MASK | (type<<8) | trap,
+            error_code); 
+        return;
+    }
+    else
+        __vmx_inject_exception(trap, type, error_code);
 
     if ( trap == TRAP_page_fault )
         HVMTRACE_LONG_2D(PF_INJECT, error_code,
@@ -1319,12 +1358,38 @@ void vmx_inject_hw_exception(int trap, i
 
 void vmx_inject_extint(int trap)
 {
+    struct vcpu *v = current;
+    u32    pin_based_cntrl;
+
+    if ( nestedhvm_vcpu_in_guestmode(v) ) {
+        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
+                                     PIN_BASED_VM_EXEC_CONTROL);
+        if ( pin_based_cntrl && PIN_BASED_EXT_INTR_MASK ) {
+            nvmx_enqueue_n2_exceptions (v, 
+               INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap,
+               HVM_DELIVER_NO_ERROR_CODE);
+            return;
+        }
+    }
     __vmx_inject_exception(trap, X86_EVENTTYPE_EXT_INTR,
                            HVM_DELIVER_NO_ERROR_CODE);
 }
 
 void vmx_inject_nmi(void)
 {
+    struct vcpu *v = current;
+    u32    pin_based_cntrl;
+
+    if ( nestedhvm_vcpu_in_guestmode(v) ) {
+        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
+                                     PIN_BASED_VM_EXEC_CONTROL);
+        if ( pin_based_cntrl && PIN_BASED_NMI_EXITING ) {
+            nvmx_enqueue_n2_exceptions (v, 
+               INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi,
+               HVM_DELIVER_NO_ERROR_CODE);
+            return;
+        }
+    }
     __vmx_inject_exception(2, X86_EVENTTYPE_NMI,
                            HVM_DELIVER_NO_ERROR_CODE);
 }
@@ -1424,7 +1489,10 @@ static struct hvm_function_table __read_
     .nhvm_vcpu_reset      = nvmx_vcpu_reset,
     .nhvm_vcpu_guestcr3   = nvmx_vcpu_guestcr3,
     .nhvm_vcpu_hostcr3    = nvmx_vcpu_hostcr3,
-    .nhvm_vcpu_asid       = nvmx_vcpu_asid
+    .nhvm_vcpu_asid       = nvmx_vcpu_asid,
+    .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
+    .nhvm_vcpu_vmexit_trap = nvmx_vmexit_exceptions,
+    .nhvm_intr_blocked    = nvmx_intr_blocked
 };
 
 struct hvm_function_table * __init start_vmx(void)
@@ -2237,7 +2305,8 @@ asmlinkage void vmx_vmexit_handler(struc
     hvm_maybe_deassert_evtchn_irq();
 
     idtv_info = __vmread(IDT_VECTORING_INFO);
-    if ( exit_reason != EXIT_REASON_TASK_SWITCH )
+    if ( !nestedhvm_vcpu_in_guestmode(v) && 
+         exit_reason != EXIT_REASON_TASK_SWITCH )
         vmx_idtv_reinject(idtv_info);
 
     switch ( exit_reason )
@@ -2585,6 +2654,9 @@ asmlinkage void vmx_vmexit_handler(struc
         domain_crash(v->domain);
         break;
     }
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+        nvmx_idtv_handling();
 }
 
 asmlinkage void vmx_vmenter_helper(void)
diff -r bd15acfc9b82 -r f14f451a780e xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c       Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vvmx.c       Thu Jun 02 16:33:21 2011 +0800
@@ -392,6 +392,27 @@ static void vmreturn(struct cpu_user_reg
     regs->eflags = eflags;
 }
 
+int nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
+                               int error_code)
+{
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    u32 exception_bitmap, pfec_match=0, pfec_mask=0;
+    int r;
+
+    ASSERT ( trap < 32 );
+
+    exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+    r = exception_bitmap & (1 << trap) ? 1: 0;
+
+    if ( trap == TRAP_page_fault ) {
+        pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
+        pfec_mask  = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+        if ( (error_code & pfec_mask) != pfec_match )
+            r = !r;
+    }
+    return r;
+}
+
 /*
  * Nested VMX uses "strict" condition to exit from 
  * L2 guest if either L1 VMM or L0 VMM expect to exit.
@@ -465,6 +486,7 @@ void nvmx_update_exec_control(struct vcp
         __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
     }
 
+    /* TODO: change L0 intr window to MTF or NMI window */
     __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
 }
 
@@ -868,6 +890,42 @@ static void load_vvmcs_host_state(struct
     __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
 }
 
+static void sync_exception_state(struct vcpu *v)
+{
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
+        return;
+
+    switch ( nvmx->intr.intr_info & INTR_INFO_INTR_TYPE_MASK )
+    {
+    case X86_EVENTTYPE_EXT_INTR:
+        /* rename exit_reason to EXTERNAL_INTERRUPT */
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
+                    EXIT_REASON_EXTERNAL_INTERRUPT);
+        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+                    nvmx->intr.intr_info);
+        break;
+
+    case X86_EVENTTYPE_HW_EXCEPTION:
+    case X86_EVENTTYPE_SW_INTERRUPT:
+    case X86_EVENTTYPE_SW_EXCEPTION:
+        /* throw to L1 */
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+                    nvmx->intr.intr_info);
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
+                    nvmx->intr.error_code);
+        break;
+    case X86_EVENTTYPE_NMI:
+    default:
+        gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
+               nvmx->intr.intr_info); 
+        break;
+    }
+}
+
 static void virtual_vmexit(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
@@ -878,6 +936,7 @@ static void virtual_vmexit(struct cpu_us
 
     sync_vvmcs_ro(v);
     sync_vvmcs_guest_state(v, regs);
+    sync_exception_state(v);
 
     vmx_vmcs_switch(v, v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx);
 
@@ -1169,3 +1228,40 @@ int nvmx_handle_vmwrite(struct cpu_user_
     return X86EMUL_OKAY;
 }
 
+void nvmx_idtv_handling(void)
+{
+    struct vcpu *v = current;
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    unsigned int idtv_info = __vmread(IDT_VECTORING_INFO);
+
+    if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
+        return;
+
+    /*
+     * If L0 can solve the fault that causes idt vectoring, it should
+     * be reinjected, otherwise, pass to L1.
+     */
+    if ( (__vmread(VM_EXIT_REASON) != EXIT_REASON_EPT_VIOLATION &&
+          !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK)) ||
+         (__vmread(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION &&
+          !nvcpu->nv_vmexit_pending) )
+    {
+        __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+        if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+           __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                        __vmread(IDT_VECTORING_ERROR_CODE));
+        /*
+         * SDM 23.2.4, if L1 tries to inject a software interrupt
+         * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
+         * the value of previous VM_ENTRY_INSTRUCTION_LEN.
+         *
+         * This means EXIT_INSTRUCTION_LEN is always valid here, for
+         * software interrupts both injected by L1, and generated in L2.
+         */
+        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, __vmread(VM_EXIT_INSTRUCTION_LEN));
+   }
+
+    /* TODO: NMI */
+}
+
diff -r bd15acfc9b82 -r f14f451a780e xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h        Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h        Thu Jun 02 16:33:21 2011 +0800
@@ -93,6 +93,9 @@ int nvmx_vcpu_reset(struct vcpu *v);
 uint64_t nvmx_vcpu_guestcr3(struct vcpu *v);
 uint64_t nvmx_vcpu_hostcr3(struct vcpu *v);
 uint32_t nvmx_vcpu_asid(struct vcpu *v);
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v);
+int nvmx_intercepts_exception(struct vcpu *v, 
+                              unsigned int trap, int error_code);
 
 int nvmx_handle_vmxon(struct cpu_user_regs *regs);
 int nvmx_handle_vmxoff(struct cpu_user_regs *regs);
@@ -166,6 +169,7 @@ void nvmx_update_secondary_exec_control(
                                         unsigned long value);
 void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value);
 asmlinkage void nvmx_switch_guest(void);
+void nvmx_idtv_handling(void);
 
 #endif /* __ASM_X86_HVM_VVMX_H__ */
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel