[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 10/16] vmx: nest: L1 <-> L2 context switch



This patch adds mode switch between L1 and L2

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>

---

diff -r 86c36f5c38f2 xen/arch/x86/hvm/vmx/entry.S
--- a/xen/arch/x86/hvm/vmx/entry.S      Wed Sep 08 22:07:15 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/entry.S      Wed Sep 08 22:11:52 2010 +0800
@@ -119,6 +119,7 @@
 .globl vmx_asm_do_vmentry
 vmx_asm_do_vmentry:
         call vmx_intr_assist
+        call vmx_nest_switch_mode
 
         get_current(bx)
         cli
diff -r 86c36f5c38f2 xen/arch/x86/hvm/vmx/nest.c
--- a/xen/arch/x86/hvm/vmx/nest.c       Wed Sep 08 22:07:15 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/nest.c       Wed Sep 08 22:11:52 2010 +0800
@@ -22,6 +22,8 @@
 #include <xen/config.h>
 #include <asm/types.h>
 #include <asm/p2m.h>
+#include <asm/paging.h>
+#include <asm/hvm/support.h>
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vvmcs.h>
 #include <asm/hvm/vmx/nest.h>
@@ -695,3 +697,349 @@
 
     set_shadow_control(nest, EXCEPTION_BITMAP, value);
 }
+
+/*
+ * Nested VMX context switch
+ */
+
+static unsigned long vmcs_gstate_field[] = {
+    /* 16 BITS */
+    GUEST_ES_SELECTOR,
+    GUEST_CS_SELECTOR,
+    GUEST_SS_SELECTOR,
+    GUEST_DS_SELECTOR,
+    GUEST_FS_SELECTOR,
+    GUEST_GS_SELECTOR,
+    GUEST_LDTR_SELECTOR,
+    GUEST_TR_SELECTOR,
+    /* 64 BITS */
+    VMCS_LINK_POINTER,
+    GUEST_IA32_DEBUGCTL,
+#ifndef CONFIG_X86_64
+    VMCS_LINK_POINTER_HIGH,
+    GUEST_IA32_DEBUGCTL_HIGH,
+#endif
+    /* 32 BITS */
+    GUEST_ES_LIMIT,
+    GUEST_CS_LIMIT,
+    GUEST_SS_LIMIT,
+    GUEST_DS_LIMIT,
+    GUEST_FS_LIMIT,
+    GUEST_GS_LIMIT,
+    GUEST_LDTR_LIMIT,
+    GUEST_TR_LIMIT,
+    GUEST_GDTR_LIMIT,
+    GUEST_IDTR_LIMIT,
+    GUEST_ES_AR_BYTES,
+    GUEST_CS_AR_BYTES,
+    GUEST_SS_AR_BYTES,
+    GUEST_DS_AR_BYTES,
+    GUEST_FS_AR_BYTES,
+    GUEST_GS_AR_BYTES,
+    GUEST_LDTR_AR_BYTES,
+    GUEST_TR_AR_BYTES,
+    GUEST_INTERRUPTIBILITY_INFO,
+    GUEST_ACTIVITY_STATE,
+    GUEST_SYSENTER_CS,
+    /* natural */
+    GUEST_ES_BASE,
+    GUEST_CS_BASE,
+    GUEST_SS_BASE,
+    GUEST_DS_BASE,
+    GUEST_FS_BASE,
+    GUEST_GS_BASE,
+    GUEST_LDTR_BASE,
+    GUEST_TR_BASE,
+    GUEST_GDTR_BASE,
+    GUEST_IDTR_BASE,
+    GUEST_DR7,
+    GUEST_RSP,
+    GUEST_RIP,
+    GUEST_RFLAGS,
+    GUEST_PENDING_DBG_EXCEPTIONS,
+    GUEST_SYSENTER_ESP,
+    GUEST_SYSENTER_EIP,
+};
+
+static unsigned long vmcs_ro_field[] = {
+    GUEST_PHYSICAL_ADDRESS,
+    VM_INSTRUCTION_ERROR,
+    VM_EXIT_REASON,
+    VM_EXIT_INTR_INFO,
+    VM_EXIT_INTR_ERROR_CODE,
+    IDT_VECTORING_INFO,
+    IDT_VECTORING_ERROR_CODE,
+    VM_EXIT_INSTRUCTION_LEN,
+    VMX_INSTRUCTION_INFO,
+    EXIT_QUALIFICATION,
+    GUEST_LINEAR_ADDRESS
+};
+
+static struct vmcs_host_to_guest {
+    unsigned long host_field;
+    unsigned long guest_field;
+} vmcs_h2g_field[] = {
+    {HOST_ES_SELECTOR, GUEST_ES_SELECTOR},
+    {HOST_CS_SELECTOR, GUEST_CS_SELECTOR},
+    {HOST_SS_SELECTOR, GUEST_SS_SELECTOR},
+    {HOST_DS_SELECTOR, GUEST_DS_SELECTOR},
+    {HOST_FS_SELECTOR, GUEST_FS_SELECTOR},
+    {HOST_GS_SELECTOR, GUEST_GS_SELECTOR},
+    {HOST_TR_SELECTOR, GUEST_TR_SELECTOR},
+    {HOST_SYSENTER_CS, GUEST_SYSENTER_CS},
+    {HOST_FS_BASE, GUEST_FS_BASE},
+    {HOST_GS_BASE, GUEST_GS_BASE},
+    {HOST_TR_BASE, GUEST_TR_BASE},
+    {HOST_GDTR_BASE, GUEST_GDTR_BASE},
+    {HOST_IDTR_BASE, GUEST_IDTR_BASE},
+    {HOST_SYSENTER_ESP, GUEST_SYSENTER_ESP},
+    {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP},
+};
+
+static void vvmcs_to_shadow(void *vvmcs, unsigned int field)
+{
+    u64 value;
+
+    value = __get_vvmcs(vvmcs, field);
+    __vmwrite(field, value);
+}
+
+static void vvmcs_from_shadow(void *vvmcs, unsigned int field)
+{
+    u64 value;
+    int rc;
+
+    value = __vmread_safe(field, &rc);
+    if ( !rc )
+        __set_vvmcs(vvmcs, field, value);
+}
+
+static void load_vvmcs_control(struct vmx_nest_struct *nest)
+{
+    u32 exit_control;
+    struct vcpu *v = current;
+
+    /* PIN_BASED, CPU_BASED controls: the union of L0 & L1 */
+    set_shadow_control(nest, PIN_BASED_VM_EXEC_CONTROL,
+                       vmx_pin_based_exec_control);
+    vmx_update_cpu_exec_control(v);
+
+    /* VM_EXIT_CONTROLS: owned by L0 except bits below */
+#define EXIT_CONTROL_GUEST_BITS    ((1<<2) | (1<<18) | (1<<20) | (1<<22))
+    exit_control = __get_vvmcs(nest->vvmcs, VM_EXIT_CONTROLS) &
+                   EXIT_CONTROL_GUEST_BITS;
+    exit_control |= (vmx_vmexit_control & ~EXIT_CONTROL_GUEST_BITS);
+    __vmwrite(VM_EXIT_CONTROLS, exit_control);
+
+    /* VM_ENTRY_CONTROLS: owned by L1 */
+    vvmcs_to_shadow(nest->vvmcs, VM_ENTRY_CONTROLS);
+
+    vmx_update_exception_bitmap(v);
+}
+
+static void load_vvmcs_guest_state(struct vmx_nest_struct *nest)
+{
+    int i;
+
+    /* vvmcs.gstate to svmcs.gstate */
+    for ( i = 0; i < ARRAY_SIZE(vmcs_gstate_field); i++ )
+        vvmcs_to_shadow(nest->vvmcs, vmcs_gstate_field[i]);
+
+    hvm_set_cr0(__get_vvmcs(nest->vvmcs, GUEST_CR0));
+    hvm_set_cr4(__get_vvmcs(nest->vvmcs, GUEST_CR4));
+    hvm_set_cr3(__get_vvmcs(nest->vvmcs, GUEST_CR3));
+
+    vvmcs_to_shadow(nest->vvmcs, VM_ENTRY_INTR_INFO);
+    vvmcs_to_shadow(nest->vvmcs, VM_ENTRY_EXCEPTION_ERROR_CODE);
+    vvmcs_to_shadow(nest->vvmcs, VM_ENTRY_INSTRUCTION_LEN);
+
+    /* XXX: should refer to GUEST_HOST_MASK of both L0 and L1 */
+    vvmcs_to_shadow(nest->vvmcs, CR0_READ_SHADOW);
+    vvmcs_to_shadow(nest->vvmcs, CR4_READ_SHADOW);
+    vvmcs_to_shadow(nest->vvmcs, CR0_GUEST_HOST_MASK);
+    vvmcs_to_shadow(nest->vvmcs, CR4_GUEST_HOST_MASK);
+
+    /* TODO: PDPTRs for nested ept */
+    /* TODO: CR3 target control */
+}
+
+static void virtual_vmentry(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+#ifdef __x86_64__
+    unsigned long lm_l1, lm_l2;
+#endif
+
+    vmx_vmcs_switch_current(v, v->arch.hvm_vmx.vmcs, nest->svmcs);
+
+    v->arch.hvm_vcpu.in_nesting = 1;
+    nest->vmresume_pending = 0;
+    nest->vmresume_in_progress = 1;
+
+#ifdef __x86_64__
+    /*
+     * EFER handling:
+     * hvm_set_efer won't work if CR0.PG = 1, so we change the value
+     * directly to make hvm_long_mode_enabled(v) work in L2.
+     * An additional update_paging_modes is also needed is
+     * there is 32/64 switch. v->arch.hvm_vcpu.guest_efer doesn't
+     * need to be saved, since its value on vmexit is determined by
+     * L1 exit_controls
+     */
+    lm_l1 = !!hvm_long_mode_enabled(v);
+    lm_l2 = !!(__get_vvmcs(nest->vvmcs, VM_ENTRY_CONTROLS) &
+                           VM_ENTRY_IA32E_MODE);
+
+    if ( lm_l2 )
+        v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
+    else
+        v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME);
+#endif
+
+    load_vvmcs_control(nest);
+    load_vvmcs_guest_state(nest);
+
+#ifdef __x86_64__
+    if ( lm_l1 != lm_l2 )
+    {
+        paging_update_paging_modes(v);
+    }
+#endif
+
+    regs->rip = __get_vvmcs(nest->vvmcs, GUEST_RIP);
+    regs->rsp = __get_vvmcs(nest->vvmcs, GUEST_RSP);
+    regs->rflags = __get_vvmcs(nest->vvmcs, GUEST_RFLAGS);
+
+    /* TODO: EPT_POINTER */
+}
+
+static void sync_vvmcs_guest_state(struct vmx_nest_struct *nest)
+{
+    int i;
+    unsigned long mask;
+    unsigned long cr;
+
+    /* copy svmcs.gstate back to vvmcs.gstate */
+    for ( i = 0; i < ARRAY_SIZE(vmcs_gstate_field); i++ )
+        vvmcs_from_shadow(nest->vvmcs, vmcs_gstate_field[i]);
+
+    /* SDM 20.6.6: L2 guest execution may change GUEST CR0/CR4 */
+    mask = __get_vvmcs(nest->vvmcs, CR0_GUEST_HOST_MASK);
+    if ( ~mask )
+    {
+        cr = __get_vvmcs(nest->vvmcs, GUEST_CR0);
+        cr = (cr & mask) | (__vmread(GUEST_CR4) & ~mask);
+        __set_vvmcs(nest->vvmcs, GUEST_CR0, cr);
+    }
+
+    mask = __get_vvmcs(nest->vvmcs, CR4_GUEST_HOST_MASK);
+    if ( ~mask )
+    {
+        cr = __get_vvmcs(nest->vvmcs, GUEST_CR4);
+        cr = (cr & mask) | (__vmread(GUEST_CR4) & ~mask);
+        __set_vvmcs(nest->vvmcs, GUEST_CR4, cr);
+    }
+
+    /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */
+    if ( !(__get_vvmcs(nest->vvmcs, CPU_BASED_VM_EXEC_CONTROL) &
+           CPU_BASED_CR3_LOAD_EXITING) )
+        vvmcs_from_shadow(nest->vvmcs, GUEST_CR3);
+}
+
+static void sync_vvmcs_ro(struct vmx_nest_struct *nest)
+{
+    int i;
+
+    for ( i = 0; i < ARRAY_SIZE(vmcs_ro_field); i++ )
+        vvmcs_from_shadow(nest->vvmcs, vmcs_ro_field[i]);
+}
+
+static void load_vvmcs_host_state(struct vmx_nest_struct *nest)
+{
+    int i;
+    u64 r;
+
+    for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ )
+    {
+        r = __get_vvmcs(nest->vvmcs, vmcs_h2g_field[i].host_field);
+        __vmwrite(vmcs_h2g_field[i].guest_field, r);
+    }
+
+    hvm_set_cr0(__get_vvmcs(nest->vvmcs, HOST_CR0));
+    hvm_set_cr4(__get_vvmcs(nest->vvmcs, HOST_CR4));
+    hvm_set_cr3(__get_vvmcs(nest->vvmcs, HOST_CR3));
+
+    __set_vvmcs(nest->vvmcs, VM_ENTRY_INTR_INFO, 0);
+}
+
+static void virtual_vmexit(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+#ifdef __x86_64__
+    unsigned long lm_l1, lm_l2;
+#endif
+
+    sync_vvmcs_ro(nest);
+    sync_vvmcs_guest_state(nest);
+
+    vmx_vmcs_switch_current(v, v->arch.hvm_vmx.vmcs, nest->hvmcs);
+
+    v->arch.hvm_vcpu.in_nesting = 0;
+    nest->vmexit_pending = 0;
+
+#ifdef __x86_64__
+    lm_l2 = !!hvm_long_mode_enabled(v);
+    lm_l1 = !!(__get_vvmcs(nest->vvmcs, VM_EXIT_CONTROLS) &
+                           VM_EXIT_IA32E_MODE);
+
+    if ( lm_l1 )
+        v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
+    else
+        v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME);
+#endif
+
+    vmx_update_cpu_exec_control(v);
+    vmx_update_exception_bitmap(v);
+
+    load_vvmcs_host_state(nest);
+
+#ifdef __x86_64__
+    if ( lm_l1 != lm_l2 )
+        paging_update_paging_modes(v);
+#endif
+
+    regs->rip = __get_vvmcs(nest->vvmcs, HOST_RIP);
+    regs->rsp = __get_vvmcs(nest->vvmcs, HOST_RSP);
+    regs->rflags = __vmread(GUEST_RFLAGS);
+
+    vmreturn(regs, VMSUCCEED);
+}
+
+asmlinkage void vmx_nest_switch_mode(void)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+    /*
+     * a softirq may interrupt us between a virtual vmentry is
+     * just handled and the true vmentry. If during this window,
+     * a L1 virtual interrupt causes another virtual vmexit, we
+     * cannot let that happen or VM_ENTRY_INTR_INFO will be lost.
+     */
+    if ( unlikely(nest->vmresume_in_progress) )
+        return;
+
+    if ( v->arch.hvm_vcpu.in_nesting && nest->vmexit_pending )
+    {
+        local_irq_enable();
+        virtual_vmexit(regs);
+    }
+    else if ( !v->arch.hvm_vcpu.in_nesting && nest->vmresume_pending )
+    {
+        local_irq_enable();
+        virtual_vmentry(regs);
+    }
+}
diff -r 86c36f5c38f2 xen/include/asm-x86/hvm/vmx/nest.h
--- a/xen/include/asm-x86/hvm/vmx/nest.h        Wed Sep 08 22:07:15 2010 +0800
+++ b/xen/include/asm-x86/hvm/vmx/nest.h        Wed Sep 08 22:11:52 2010 +0800
@@ -56,6 +56,8 @@
     int                  vmresume_in_progress;
 };
 
+asmlinkage void vmx_nest_switch_mode(void);
+
 int vmx_nest_handle_vmxon(struct cpu_user_regs *regs);
 int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs);
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.