[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 07/17] vmx: nest: handling VMX instruction exits



add a VMX instruction decoder and handle simple VMX instructions
except vmlaunch/vmresume and invept

Signed-off-by: Qing He <qing.he@xxxxxxxxx>

---
 b/xen/arch/x86/hvm/vmx/nest.c      |  502 +++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/Makefile      |    1 
 xen/arch/x86/hvm/vmx/vmx.c         |   43 ++-
 xen/include/asm-x86/hvm/vmx/nest.h |   10 
 4 files changed, 549 insertions(+), 7 deletions(-)

diff -r 9cb31076d2d0 -r 38a4757e94ef xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 22 22:30:09 2010 +0800
@@ -5,3 +5,4 @@
 obj-y += vmx.o
 obj-y += vpmu.o
 obj-y += vpmu_core2.o
+obj-y += nest.o
diff -r 9cb31076d2d0 -r 38a4757e94ef xen/arch/x86/hvm/vmx/nest.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/nest.c       Thu Apr 22 22:30:09 2010 +0800
@@ -0,0 +1,502 @@
+/*
+ * nest.c: nested virtualization for VMX.
+ *
+ * Copyright (c) 2010, Intel Corporation.
+ * Author: Qing He <qing.he@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <asm/types.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vvmcs.h>
+#include <asm/hvm/vmx/nest.h>
+
+/*
+ * VMX instructions support functions
+ */
+
+enum vmx_regs_enc {
+    VMX_REG_RAX,
+    VMX_REG_RCX,
+    VMX_REG_RDX,
+    VMX_REG_RBX,
+    VMX_REG_RSP,
+    VMX_REG_RBP,
+    VMX_REG_RSI,
+    VMX_REG_RDI,
+#ifdef CONFIG_X86_64
+    VMX_REG_R8,
+    VMX_REG_R9,
+    VMX_REG_R10,
+    VMX_REG_R11,
+    VMX_REG_R12,
+    VMX_REG_R13,
+    VMX_REG_R14,
+    VMX_REG_R15,
+#endif
+};
+
+enum vmx_sregs_enc {
+    VMX_SREG_ES,
+    VMX_SREG_CS,
+    VMX_SREG_SS,
+    VMX_SREG_DS,
+    VMX_SREG_FS,
+    VMX_SREG_GS,
+};
+
+enum x86_segment sreg_to_index[] = {
+    [VMX_SREG_ES] = x86_seg_es,
+    [VMX_SREG_CS] = x86_seg_cs,
+    [VMX_SREG_SS] = x86_seg_ss,
+    [VMX_SREG_DS] = x86_seg_ds,
+    [VMX_SREG_FS] = x86_seg_fs,
+    [VMX_SREG_GS] = x86_seg_gs,
+};
+
+union vmx_inst_info {
+    struct {
+        unsigned int scaling           :2; /* bit 0-1 */
+        unsigned int __rsvd0           :1; /* bit 2 */
+        unsigned int reg1              :4; /* bit 3-6 */
+        unsigned int addr_size         :3; /* bit 7-9 */
+        unsigned int memreg            :1; /* bit 10 */
+        unsigned int __rsvd1           :4; /* bit 11-14 */
+        unsigned int segment           :3; /* bit 15-17 */
+        unsigned int index_reg         :4; /* bit 18-21 */
+        unsigned int index_reg_invalid :1; /* bit 22 */
+        unsigned int base_reg          :4; /* bit 23-26 */
+        unsigned int base_reg_invalid  :1; /* bit 27 */
+        unsigned int reg2              :4; /* bit 28-31 */
+    } fields;
+    u32 word;
+};
+
+struct vmx_inst_decoded {
+#define VMX_INST_MEMREG_TYPE_MEMORY 0
+#define VMX_INST_MEMREG_TYPE_REG    1
+    int type;
+    union {
+        struct {
+            unsigned long mem;
+            unsigned int  len;
+        };
+        enum vmx_regs_enc reg1;
+    };
+
+    enum vmx_regs_enc reg2;
+};
+
+enum vmx_ops_result {
+    VMSUCCEED,
+    VMFAIL_VALID,
+    VMFAIL_INVALID,
+};
+
+#define CASE_SET_REG(REG, reg)      \
+    case VMX_REG_ ## REG: regs->reg = value; break
+#define CASE_GET_REG(REG, reg)      \
+    case VMX_REG_ ## REG: value = regs->reg; break
+
+#define CASE_EXTEND_SET_REG         \
+    CASE_EXTEND_REG(S)
+#define CASE_EXTEND_GET_REG         \
+    CASE_EXTEND_REG(G)
+
+#ifdef __i386__
+#define CASE_EXTEND_REG(T)
+#else
+#define CASE_EXTEND_REG(T)          \
+    CASE_ ## T ## ET_REG(R8, r8);   \
+    CASE_ ## T ## ET_REG(R9, r9);   \
+    CASE_ ## T ## ET_REG(R10, r10); \
+    CASE_ ## T ## ET_REG(R11, r11); \
+    CASE_ ## T ## ET_REG(R12, r12); \
+    CASE_ ## T ## ET_REG(R13, r13); \
+    CASE_ ## T ## ET_REG(R14, r14); \
+    CASE_ ## T ## ET_REG(R15, r15)
+#endif
+
+static unsigned long reg_read(struct cpu_user_regs *regs,
+                              enum vmx_regs_enc index)
+{
+    unsigned long value = 0;
+
+    switch ( index ) {
+    CASE_GET_REG(RAX, eax);
+    CASE_GET_REG(RCX, ecx);
+    CASE_GET_REG(RDX, edx);
+    CASE_GET_REG(RBX, ebx);
+    CASE_GET_REG(RBP, ebp);
+    CASE_GET_REG(RSI, esi);
+    CASE_GET_REG(RDI, edi);
+    CASE_GET_REG(RSP, esp);
+    CASE_EXTEND_GET_REG;
+    default:
+        break;
+    }
+
+    return value;
+}
+
+static void reg_write(struct cpu_user_regs *regs,
+                      enum vmx_regs_enc index,
+                      unsigned long value)
+{
+    switch ( index ) {
+    CASE_SET_REG(RAX, eax);
+    CASE_SET_REG(RCX, ecx);
+    CASE_SET_REG(RDX, edx);
+    CASE_SET_REG(RBX, ebx);
+    CASE_SET_REG(RBP, ebp);
+    CASE_SET_REG(RSI, esi);
+    CASE_SET_REG(RDI, edi);
+    CASE_SET_REG(RSP, esp);
+    CASE_EXTEND_SET_REG;
+    default:
+        break;
+    }
+}
+
+static void decode_vmx_inst(struct cpu_user_regs *regs,
+                            struct vmx_inst_decoded *decode)
+{
+    struct vcpu *v = current;
+    union vmx_inst_info info;
+    struct segment_register seg;
+    unsigned long base, index, seg_base, disp;
+    int scale;
+
+    info.word = __vmread(VMX_INSTRUCTION_INFO);
+
+    if ( info.fields.memreg ) {
+        decode->type = VMX_INST_MEMREG_TYPE_REG;
+        decode->reg1 = info.fields.reg1;
+    }
+    else
+    {
+        decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
+        hvm_get_segment_register(v, sreg_to_index[info.fields.segment], &seg);
+        seg_base = seg.base;
+
+        base = info.fields.base_reg_invalid ? 0 :
+            reg_read(regs, info.fields.base_reg);
+
+        index = info.fields.index_reg_invalid ? 0 :
+            reg_read(regs, info.fields.index_reg);
+
+        scale = 1 << info.fields.scaling;
+
+        disp = __vmread(EXIT_QUALIFICATION);
+
+
+        decode->mem = seg_base + base + index * scale + disp;
+        decode->len = 1 << (info.fields.addr_size + 1);
+    }
+
+    decode->reg2 = info.fields.reg2;
+}
+
+static void vmreturn(struct cpu_user_regs *regs, enum vmx_ops_result res)
+{
+    unsigned long eflags = regs->eflags;
+    unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                         X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF;
+
+    eflags &= ~mask;
+
+    switch ( res ) {
+    case VMSUCCEED:
+        break;
+    case VMFAIL_VALID:
+        /* TODO: error number of VMFailValid */
+        eflags |= X86_EFLAGS_ZF;
+        break;
+    case VMFAIL_INVALID:
+    default:
+        eflags |= X86_EFLAGS_CF;
+        break;
+    }
+
+    regs->eflags = eflags;
+}
+
+static void __clear_current_vvmcs(struct vmx_nest_struct *nest)
+{
+    if ( nest->svmcs )
+        __vmpclear(virt_to_maddr(nest->svmcs));
+
+    hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs, PAGE_SIZE);
+
+    nest->vmcs_invalid = 1;
+}
+
+/*
+ * VMX instructions handling
+ */
+
+int vmx_nest_handle_vmxon(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    struct vmx_inst_decoded decode;
+    unsigned long gpa = 0;
+
+    if ( !v->domain->arch.hvm_domain.nesting_avail )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+    hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
+
+    nest->guest_vmxon_pa = gpa;
+    nest->gvmcs_pa = 0;
+    nest->vmcs_invalid = 1;
+    nest->vvmcs = alloc_xenheap_page();
+    if ( !nest->vvmcs )
+    {
+        gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs failed\n");
+        vmreturn(regs, VMFAIL_INVALID);
+        goto out;
+    }
+    nest->svmcs = alloc_xenheap_page();
+    if ( !nest->svmcs )
+    {
+        gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
+        free_xenheap_page(nest->vvmcs);
+        vmreturn(regs, VMFAIL_INVALID);
+        goto out;
+    }
+
+    /*
+     * `fork' the host vmcs to shadow_vmcs
+     * vmcs_lock is not needed since we are on current
+     */
+    nest->hvmcs = v->arch.hvm_vmx.vmcs;
+    __vmpclear(virt_to_maddr(nest->hvmcs));
+    memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
+    __vmptrld(virt_to_maddr(nest->hvmcs));
+    v->arch.hvm_vmx.launched = 0;
+
+    vmreturn(regs, VMSUCCEED);
+
+out:
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    nest->guest_vmxon_pa = 0;
+    __vmpclear(virt_to_maddr(nest->svmcs));
+
+    free_xenheap_page(nest->vvmcs);
+    free_xenheap_page(nest->svmcs);
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    unsigned long gpa = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+    hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
+
+    if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff )
+    {
+        vmreturn(regs, VMFAIL_INVALID);
+        goto out;
+    }
+
+    if ( nest->gvmcs_pa != gpa )
+    {
+        if ( !nest->vmcs_invalid )
+            __clear_current_vvmcs(nest);
+        nest->gvmcs_pa = gpa;
+        ASSERT(nest->vmcs_invalid == 1);
+    }
+
+
+    if ( nest->vmcs_invalid )
+    {
+        hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa, PAGE_SIZE);
+        nest->vmcs_invalid = 0;
+    }
+
+    vmreturn(regs, VMSUCCEED);
+
+out:
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    unsigned long gpa = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+
+    gpa = nest->gvmcs_pa;
+
+    hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmclear(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    unsigned long gpa = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+    hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
+
+    if ( gpa & 0xfff )
+    {
+        vmreturn(regs, VMFAIL_VALID);
+        goto out;
+    }
+
+    if ( gpa != nest->gvmcs_pa )
+    {
+        gdprintk(XENLOG_ERR, "vmclear gpa not the same with current vmcs\n");
+        vmreturn(regs, VMSUCCEED);
+        goto out;
+    }
+
+    __clear_current_vvmcs(nest);
+
+    vmreturn(regs, VMSUCCEED);
+
+out:
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+
+
+int vmx_nest_handle_vmread(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    u64 value = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    value = __get_vvmcs(nest->vvmcs, reg_read(regs, decode.reg2));
+
+    switch ( decode.type ) {
+    case VMX_INST_MEMREG_TYPE_MEMORY:
+        hvm_copy_to_guest_virt(decode.mem, &value, decode.len, 0);
+        break;
+    case VMX_INST_MEMREG_TYPE_REG:
+        reg_write(regs, decode.reg1, value);
+        break;
+    }
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmwrite(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    u64 value = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    switch ( decode.type ) {
+    case VMX_INST_MEMREG_TYPE_MEMORY:
+        hvm_copy_from_guest_virt(&value, decode.mem, decode.len, 0);
+        break;
+    case VMX_INST_MEMREG_TYPE_REG:
+        value = reg_read(regs, decode.reg1);
+        break;
+    }
+
+    __set_vvmcs(nest->vvmcs, reg_read(regs, decode.reg2), value);
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
diff -r 9cb31076d2d0 -r 38a4757e94ef xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 22 22:30:09 2010 +0800
@@ -2605,17 +2605,46 @@
             __update_guest_eip(inst_len);
         break;
 
+    case EXIT_REASON_VMCLEAR:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmclear(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMPTRLD:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmptrld(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMPTRST:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmptrst(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMREAD:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmread(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMWRITE:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmwrite(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMXOFF:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmxoff(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMXON:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmxon(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+
     case EXIT_REASON_MWAIT_INSTRUCTION:
     case EXIT_REASON_MONITOR_INSTRUCTION:
-    case EXIT_REASON_VMCLEAR:
     case EXIT_REASON_VMLAUNCH:
-    case EXIT_REASON_VMPTRLD:
-    case EXIT_REASON_VMPTRST:
-    case EXIT_REASON_VMREAD:
     case EXIT_REASON_VMRESUME:
-    case EXIT_REASON_VMWRITE:
-    case EXIT_REASON_VMXOFF:
-    case EXIT_REASON_VMXON:
         vmx_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
         break;
 
diff -r 9cb31076d2d0 -r 38a4757e94ef xen/include/asm-x86/hvm/vmx/nest.h
--- a/xen/include/asm-x86/hvm/vmx/nest.h        Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/include/asm-x86/hvm/vmx/nest.h        Thu Apr 22 22:30:09 2010 +0800
@@ -42,4 +42,14 @@
     int                  vmcs_invalid;
 };
 
+int vmx_nest_handle_vmxon(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs);
+
+int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmclear(struct cpu_user_regs *regs);
+
+int vmx_nest_handle_vmread(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmwrite(struct cpu_user_regs *regs);
+
 #endif /* __ASM_X86_HVM_NEST_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.