WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 07/17] vmx: nest: handling VMX instruction exits

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 07/17] vmx: nest: handling VMX instruction exits
From: Qing He <qing.he@xxxxxxxxx>
Date: Thu, 22 Apr 2010 17:41:19 +0800
Cc: Qing He <qing.he@xxxxxxxxx>
Delivery-date: Thu, 22 Apr 2010 02:52:20 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1271929289-18572-1-git-send-email-qing.he@xxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1271929289-18572-1-git-send-email-qing.he@xxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
add a VMX instruction decoder and handle simple VMX instructions
except vmlaunch/vmresume and invept

Signed-off-by: Qing He <qing.he@xxxxxxxxx>

---
 b/xen/arch/x86/hvm/vmx/nest.c      |  502 +++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/Makefile      |    1 
 xen/arch/x86/hvm/vmx/vmx.c         |   43 ++-
 xen/include/asm-x86/hvm/vmx/nest.h |   10 
 4 files changed, 549 insertions(+), 7 deletions(-)

diff -r 9cb31076d2d0 -r 38a4757e94ef xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 22 22:30:09 2010 +0800
@@ -5,3 +5,4 @@
 obj-y += vmx.o
 obj-y += vpmu.o
 obj-y += vpmu_core2.o
+obj-y += nest.o
diff -r 9cb31076d2d0 -r 38a4757e94ef xen/arch/x86/hvm/vmx/nest.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/nest.c       Thu Apr 22 22:30:09 2010 +0800
@@ -0,0 +1,502 @@
+/*
+ * nest.c: nested virtualization for VMX.
+ *
+ * Copyright (c) 2010, Intel Corporation.
+ * Author: Qing He <qing.he@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <asm/types.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vvmcs.h>
+#include <asm/hvm/vmx/nest.h>
+
+/*
+ * VMX instructions support functions
+ */
+
+enum vmx_regs_enc {
+    VMX_REG_RAX,
+    VMX_REG_RCX,
+    VMX_REG_RDX,
+    VMX_REG_RBX,
+    VMX_REG_RSP,
+    VMX_REG_RBP,
+    VMX_REG_RSI,
+    VMX_REG_RDI,
+#ifdef CONFIG_X86_64
+    VMX_REG_R8,
+    VMX_REG_R9,
+    VMX_REG_R10,
+    VMX_REG_R11,
+    VMX_REG_R12,
+    VMX_REG_R13,
+    VMX_REG_R14,
+    VMX_REG_R15,
+#endif
+};
+
+enum vmx_sregs_enc {
+    VMX_SREG_ES,
+    VMX_SREG_CS,
+    VMX_SREG_SS,
+    VMX_SREG_DS,
+    VMX_SREG_FS,
+    VMX_SREG_GS,
+};
+
+enum x86_segment sreg_to_index[] = {
+    [VMX_SREG_ES] = x86_seg_es,
+    [VMX_SREG_CS] = x86_seg_cs,
+    [VMX_SREG_SS] = x86_seg_ss,
+    [VMX_SREG_DS] = x86_seg_ds,
+    [VMX_SREG_FS] = x86_seg_fs,
+    [VMX_SREG_GS] = x86_seg_gs,
+};
+
+union vmx_inst_info {
+    struct {
+        unsigned int scaling           :2; /* bit 0-1 */
+        unsigned int __rsvd0           :1; /* bit 2 */
+        unsigned int reg1              :4; /* bit 3-6 */
+        unsigned int addr_size         :3; /* bit 7-9 */
+        unsigned int memreg            :1; /* bit 10 */
+        unsigned int __rsvd1           :4; /* bit 11-14 */
+        unsigned int segment           :3; /* bit 15-17 */
+        unsigned int index_reg         :4; /* bit 18-21 */
+        unsigned int index_reg_invalid :1; /* bit 22 */
+        unsigned int base_reg          :4; /* bit 23-26 */
+        unsigned int base_reg_invalid  :1; /* bit 27 */
+        unsigned int reg2              :4; /* bit 28-31 */
+    } fields;
+    u32 word;
+};
+
+struct vmx_inst_decoded {
+#define VMX_INST_MEMREG_TYPE_MEMORY 0
+#define VMX_INST_MEMREG_TYPE_REG    1
+    int type;
+    union {
+        struct {
+            unsigned long mem;
+            unsigned int  len;
+        };
+        enum vmx_regs_enc reg1;
+    };
+
+    enum vmx_regs_enc reg2;
+};
+
+enum vmx_ops_result {
+    VMSUCCEED,
+    VMFAIL_VALID,
+    VMFAIL_INVALID,
+};
+
+#define CASE_SET_REG(REG, reg)      \
+    case VMX_REG_ ## REG: regs->reg = value; break
+#define CASE_GET_REG(REG, reg)      \
+    case VMX_REG_ ## REG: value = regs->reg; break
+
+#define CASE_EXTEND_SET_REG         \
+    CASE_EXTEND_REG(S)
+#define CASE_EXTEND_GET_REG         \
+    CASE_EXTEND_REG(G)
+
+#ifdef __i386__
+#define CASE_EXTEND_REG(T)
+#else
+#define CASE_EXTEND_REG(T)          \
+    CASE_ ## T ## ET_REG(R8, r8);   \
+    CASE_ ## T ## ET_REG(R9, r9);   \
+    CASE_ ## T ## ET_REG(R10, r10); \
+    CASE_ ## T ## ET_REG(R11, r11); \
+    CASE_ ## T ## ET_REG(R12, r12); \
+    CASE_ ## T ## ET_REG(R13, r13); \
+    CASE_ ## T ## ET_REG(R14, r14); \
+    CASE_ ## T ## ET_REG(R15, r15)
+#endif
+
+static unsigned long reg_read(struct cpu_user_regs *regs,
+                              enum vmx_regs_enc index)
+{
+    unsigned long value = 0;
+
+    switch ( index ) {
+    CASE_GET_REG(RAX, eax);
+    CASE_GET_REG(RCX, ecx);
+    CASE_GET_REG(RDX, edx);
+    CASE_GET_REG(RBX, ebx);
+    CASE_GET_REG(RBP, ebp);
+    CASE_GET_REG(RSI, esi);
+    CASE_GET_REG(RDI, edi);
+    CASE_GET_REG(RSP, esp);
+    CASE_EXTEND_GET_REG;
+    default:
+        break;
+    }
+
+    return value;
+}
+
+static void reg_write(struct cpu_user_regs *regs,
+                      enum vmx_regs_enc index,
+                      unsigned long value)
+{
+    switch ( index ) {
+    CASE_SET_REG(RAX, eax);
+    CASE_SET_REG(RCX, ecx);
+    CASE_SET_REG(RDX, edx);
+    CASE_SET_REG(RBX, ebx);
+    CASE_SET_REG(RBP, ebp);
+    CASE_SET_REG(RSI, esi);
+    CASE_SET_REG(RDI, edi);
+    CASE_SET_REG(RSP, esp);
+    CASE_EXTEND_SET_REG;
+    default:
+        break;
+    }
+}
+
+static void decode_vmx_inst(struct cpu_user_regs *regs,
+                            struct vmx_inst_decoded *decode)
+{
+    struct vcpu *v = current;
+    union vmx_inst_info info;
+    struct segment_register seg;
+    unsigned long base, index, seg_base, disp;
+    int scale;
+
+    info.word = __vmread(VMX_INSTRUCTION_INFO);
+
+    if ( info.fields.memreg ) {
+        decode->type = VMX_INST_MEMREG_TYPE_REG;
+        decode->reg1 = info.fields.reg1;
+    }
+    else
+    {
+        decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
+        hvm_get_segment_register(v, sreg_to_index[info.fields.segment], &seg);
+        seg_base = seg.base;
+
+        base = info.fields.base_reg_invalid ? 0 :
+            reg_read(regs, info.fields.base_reg);
+
+        index = info.fields.index_reg_invalid ? 0 :
+            reg_read(regs, info.fields.index_reg);
+
+        scale = 1 << info.fields.scaling;
+
+        disp = __vmread(EXIT_QUALIFICATION);
+
+
+        decode->mem = seg_base + base + index * scale + disp;
+        decode->len = 1 << (info.fields.addr_size + 1);
+    }
+
+    decode->reg2 = info.fields.reg2;
+}
+
+static void vmreturn(struct cpu_user_regs *regs, enum vmx_ops_result res)
+{
+    unsigned long eflags = regs->eflags;
+    unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                         X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF;
+
+    eflags &= ~mask;
+
+    switch ( res ) {
+    case VMSUCCEED:
+        break;
+    case VMFAIL_VALID:
+        /* TODO: error number of VMFailValid */
+        eflags |= X86_EFLAGS_ZF;
+        break;
+    case VMFAIL_INVALID:
+    default:
+        eflags |= X86_EFLAGS_CF;
+        break;
+    }
+
+    regs->eflags = eflags;
+}
+
+static void __clear_current_vvmcs(struct vmx_nest_struct *nest)
+{
+    if ( nest->svmcs )
+        __vmpclear(virt_to_maddr(nest->svmcs));
+
+    hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs, PAGE_SIZE);
+
+    nest->vmcs_invalid = 1;
+}
+
+/*
+ * VMX instructions handling
+ */
+
+int vmx_nest_handle_vmxon(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    struct vmx_inst_decoded decode;
+    unsigned long gpa = 0;
+
+    if ( !v->domain->arch.hvm_domain.nesting_avail )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+    hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
+
+    nest->guest_vmxon_pa = gpa;
+    nest->gvmcs_pa = 0;
+    nest->vmcs_invalid = 1;
+    nest->vvmcs = alloc_xenheap_page();
+    if ( !nest->vvmcs )
+    {
+        gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs failed\n");
+        vmreturn(regs, VMFAIL_INVALID);
+        goto out;
+    }
+    nest->svmcs = alloc_xenheap_page();
+    if ( !nest->svmcs )
+    {
+        gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
+        free_xenheap_page(nest->vvmcs);
+        vmreturn(regs, VMFAIL_INVALID);
+        goto out;
+    }
+
+    /*
+     * `fork' the host vmcs to shadow_vmcs
+     * vmcs_lock is not needed since we are on current
+     */
+    nest->hvmcs = v->arch.hvm_vmx.vmcs;
+    __vmpclear(virt_to_maddr(nest->hvmcs));
+    memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
+    __vmptrld(virt_to_maddr(nest->hvmcs));
+    v->arch.hvm_vmx.launched = 0;
+
+    vmreturn(regs, VMSUCCEED);
+
+out:
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    nest->guest_vmxon_pa = 0;
+    __vmpclear(virt_to_maddr(nest->svmcs));
+
+    free_xenheap_page(nest->vvmcs);
+    free_xenheap_page(nest->svmcs);
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    unsigned long gpa = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+    hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
+
+    if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff )
+    {
+        vmreturn(regs, VMFAIL_INVALID);
+        goto out;
+    }
+
+    if ( nest->gvmcs_pa != gpa )
+    {
+        if ( !nest->vmcs_invalid )
+            __clear_current_vvmcs(nest);
+        nest->gvmcs_pa = gpa;
+        ASSERT(nest->vmcs_invalid == 1);
+    }
+
+
+    if ( nest->vmcs_invalid )
+    {
+        hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa, PAGE_SIZE);
+        nest->vmcs_invalid = 0;
+    }
+
+    vmreturn(regs, VMSUCCEED);
+
+out:
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    unsigned long gpa = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+
+    gpa = nest->gvmcs_pa;
+
+    hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmclear(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    unsigned long gpa = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
+    hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
+
+    if ( gpa & 0xfff )
+    {
+        vmreturn(regs, VMFAIL_VALID);
+        goto out;
+    }
+
+    if ( gpa != nest->gvmcs_pa )
+    {
+        gdprintk(XENLOG_ERR, "vmclear gpa not the same with current vmcs\n");
+        vmreturn(regs, VMSUCCEED);
+        goto out;
+    }
+
+    __clear_current_vvmcs(nest);
+
+    vmreturn(regs, VMSUCCEED);
+
+out:
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+
+
+int vmx_nest_handle_vmread(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    u64 value = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    value = __get_vvmcs(nest->vvmcs, reg_read(regs, decode.reg2));
+
+    switch ( decode.type ) {
+    case VMX_INST_MEMREG_TYPE_MEMORY:
+        hvm_copy_to_guest_virt(decode.mem, &value, decode.len, 0);
+        break;
+    case VMX_INST_MEMREG_TYPE_REG:
+        reg_write(regs, decode.reg1, value);
+        break;
+    }
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_handle_vmwrite(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmx_inst_decoded decode;
+    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+    u64 value = 0;
+
+    if ( unlikely(!nest->guest_vmxon_pa) )
+        goto invalid_op;
+
+    decode_vmx_inst(regs, &decode);
+
+    switch ( decode.type ) {
+    case VMX_INST_MEMREG_TYPE_MEMORY:
+        hvm_copy_from_guest_virt(&value, decode.mem, decode.len, 0);
+        break;
+    case VMX_INST_MEMREG_TYPE_REG:
+        value = reg_read(regs, decode.reg1);
+        break;
+    }
+
+    __set_vvmcs(nest->vvmcs, reg_read(regs, decode.reg2), value);
+
+    vmreturn(regs, VMSUCCEED);
+    return X86EMUL_OKAY;
+
+invalid_op:
+    hvm_inject_exception(TRAP_invalid_op, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
diff -r 9cb31076d2d0 -r 38a4757e94ef xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 22 22:30:09 2010 +0800
@@ -2605,17 +2605,46 @@
             __update_guest_eip(inst_len);
         break;
 
+    case EXIT_REASON_VMCLEAR:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmclear(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMPTRLD:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmptrld(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMPTRST:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmptrst(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMREAD:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmread(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMWRITE:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmwrite(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMXOFF:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmxoff(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_VMXON:
+        inst_len = __get_instruction_length();
+        if ( vmx_nest_handle_vmxon(regs) == X86EMUL_OKAY )
+            __update_guest_eip(inst_len);
+        break;
+
     case EXIT_REASON_MWAIT_INSTRUCTION:
     case EXIT_REASON_MONITOR_INSTRUCTION:
-    case EXIT_REASON_VMCLEAR:
     case EXIT_REASON_VMLAUNCH:
-    case EXIT_REASON_VMPTRLD:
-    case EXIT_REASON_VMPTRST:
-    case EXIT_REASON_VMREAD:
     case EXIT_REASON_VMRESUME:
-    case EXIT_REASON_VMWRITE:
-    case EXIT_REASON_VMXOFF:
-    case EXIT_REASON_VMXON:
         vmx_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
         break;
 
diff -r 9cb31076d2d0 -r 38a4757e94ef xen/include/asm-x86/hvm/vmx/nest.h
--- a/xen/include/asm-x86/hvm/vmx/nest.h        Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/include/asm-x86/hvm/vmx/nest.h        Thu Apr 22 22:30:09 2010 +0800
@@ -42,4 +42,14 @@
     int                  vmcs_invalid;
 };
 
+int vmx_nest_handle_vmxon(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs);
+
+int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmclear(struct cpu_user_regs *regs);
+
+int vmx_nest_handle_vmread(struct cpu_user_regs *regs);
+int vmx_nest_handle_vmwrite(struct cpu_user_regs *regs);
+
 #endif /* __ASM_X86_HVM_NEST_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel