WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

RE: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits

To: Tim Deegan <Tim.Deegan@xxxxxxxxxx>, "He, Qing" <qing.he@xxxxxxxxx>
Subject: RE: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits
From: "Dong, Eddie" <eddie.dong@xxxxxxxxx>
Date: Wed, 15 Sep 2010 12:55:26 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, "Dong, Eddie" <eddie.dong@xxxxxxxxx>
Delivery-date: Tue, 14 Sep 2010 22:02:02 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <20100913111001.GD3844@xxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1283959344-3837-1-git-send-email-qing.he@xxxxxxxxx> <1283959344-3837-7-git-send-email-qing.he@xxxxxxxxx> <20100913111001.GD3844@xxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: ActTNEZ5CcR+hpA2QfCxDoBGzzlciwBTwKVQ
Thread-topic: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits
Tim Deegan wrote:
> At 16:22 +0100 on 08 Sep (1283962934), Qing He wrote:
>> diff -r f1c1d3077337 xen/arch/x86/hvm/vmx/nest.c
>> +/*
>> + * VMX instructions support functions
>> + */
>> +
>> +enum vmx_regs_enc {
>> +    VMX_REG_RAX,
>> +    VMX_REG_RCX,
>> +    VMX_REG_RDX,
>> +    VMX_REG_RBX,
>> +    VMX_REG_RSP,
>> +    VMX_REG_RBP,
>> +    VMX_REG_RSI,
>> +    VMX_REG_RDI,
>> +#ifdef CONFIG_X86_64
>> +    VMX_REG_R8,
>> +    VMX_REG_R9,
>> +    VMX_REG_R10,
>> +    VMX_REG_R11,
>> +    VMX_REG_R12,
>> +    VMX_REG_R13,
>> +    VMX_REG_R14,
>> +    VMX_REG_R15,
>> +#endif
>> +};
>> +
>> +enum vmx_sregs_enc {
>> +    VMX_SREG_ES,
>> +    VMX_SREG_CS,
>> +    VMX_SREG_SS,
>> +    VMX_SREG_DS,
>> +    VMX_SREG_FS,
>> +    VMX_SREG_GS,
>> +};
>> +
>> +enum x86_segment sreg_to_index[] = {
>> +    [VMX_SREG_ES] = x86_seg_es,
>> +    [VMX_SREG_CS] = x86_seg_cs,
>> +    [VMX_SREG_SS] = x86_seg_ss,
>> +    [VMX_SREG_DS] = x86_seg_ds,
>> +    [VMX_SREG_FS] = x86_seg_fs,
>> +    [VMX_SREG_GS] = x86_seg_gs,
>> +};
>
> Since you dislike adding new namespaces and translations, I'm sure you
> can get rid of these. :)  It might even simplify some of the macros
> below.

True, some dupcation here. Regarding following definition in x86_emulate.c, we 
can reuse.


static enum x86_segment
decode_segment(uint8_t modrm_reg)
{
    switch ( modrm_reg )
    {
    case 0: return x86_seg_es;
    case 1: return x86_seg_cs;
    case 2: return x86_seg_ss;
    case 3: return x86_seg_ds;
    case 4: return x86_seg_fs;
    case 5: return x86_seg_gs;
    default: break;
    }
    return decode_segment_failed;
}

BTW, should we use MACROs rather than digital # here? and can we modify 
x86_segment structure to use same naming space?

>
>> +union vmx_inst_info {
>> +    struct {
>> +        unsigned int scaling           :2; /* bit 0-1 */
>> +        unsigned int __rsvd0           :1; /* bit 2 */
>> +        unsigned int reg1              :4; /* bit 3-6 */
>> +        unsigned int addr_size         :3; /* bit 7-9 */
>> +        unsigned int memreg            :1; /* bit 10 */
>> +        unsigned int __rsvd1           :4; /* bit 11-14 */
>> +        unsigned int segment           :3; /* bit 15-17 */
>> +        unsigned int index_reg         :4; /* bit 18-21 */
>> +        unsigned int index_reg_invalid :1; /* bit 22 */
>> +        unsigned int base_reg          :4; /* bit 23-26 */
>> +        unsigned int base_reg_invalid  :1; /* bit 27 */
>> +        unsigned int reg2              :4; /* bit 28-31 */ +    }
>> fields; +    u32 word;
>> +};
>> +
>> +struct vmx_inst_decoded {
>> +#define VMX_INST_MEMREG_TYPE_MEMORY 0
>> +#define VMX_INST_MEMREG_TYPE_REG    1
>> +    int type;
>> +    union {
>> +        struct {
>> +            unsigned long mem;
>> +            unsigned int  len;
>> +        };
>> +        enum vmx_regs_enc reg1;
>> +    };
>> +
>> +    enum vmx_regs_enc reg2;
>> +};
>> +
>> +enum vmx_ops_result {
>> +    VMSUCCEED,
>> +    VMFAIL_VALID,
>> +    VMFAIL_INVALID,
>> +};
>> +
>> +#define CASE_SET_REG(REG, reg)      \
>> +    case VMX_REG_ ## REG: regs->reg = value; break
>> +#define CASE_GET_REG(REG, reg)      \
>> +    case VMX_REG_ ## REG: value = regs->reg; break +
>> +#define CASE_EXTEND_SET_REG         \
>> +    CASE_EXTEND_REG(S)
>> +#define CASE_EXTEND_GET_REG         \
>> +    CASE_EXTEND_REG(G)
>> +
>> +#ifdef __i386__
>> +#define CASE_EXTEND_REG(T)
>> +#else
>> +#define CASE_EXTEND_REG(T)          \
>> +    CASE_ ## T ## ET_REG(R8, r8);   \
>> +    CASE_ ## T ## ET_REG(R9, r9);   \
>> +    CASE_ ## T ## ET_REG(R10, r10); \
>> +    CASE_ ## T ## ET_REG(R11, r11); \
>> +    CASE_ ## T ## ET_REG(R12, r12); \
>> +    CASE_ ## T ## ET_REG(R13, r13); \
>> +    CASE_ ## T ## ET_REG(R14, r14); \
>> +    CASE_ ## T ## ET_REG(R15, r15)
>> +#endif
>> +
>> +static unsigned long reg_read(struct cpu_user_regs *regs,
>> +                              enum vmx_regs_enc index) +{
>> +    unsigned long value = 0;
>> +
>> +    switch ( index ) {
>> +    CASE_GET_REG(RAX, eax);
>> +    CASE_GET_REG(RCX, ecx);
>> +    CASE_GET_REG(RDX, edx);
>> +    CASE_GET_REG(RBX, ebx);
>> +    CASE_GET_REG(RBP, ebp);
>> +    CASE_GET_REG(RSI, esi);
>> +    CASE_GET_REG(RDI, edi);
>> +    CASE_GET_REG(RSP, esp);
>> +    CASE_EXTEND_GET_REG;
>> +    default:
>> +        break;
>> +    }
>> +
>> +    return value;
>> +}
>> +
>> +static void reg_write(struct cpu_user_regs *regs,
>> +                      enum vmx_regs_enc index,
>> +                      unsigned long value)
>> +{
>> +    switch ( index ) {
>> +    CASE_SET_REG(RAX, eax);
>> +    CASE_SET_REG(RCX, ecx);
>> +    CASE_SET_REG(RDX, edx);
>> +    CASE_SET_REG(RBX, ebx);
>> +    CASE_SET_REG(RBP, ebp);
>> +    CASE_SET_REG(RSI, esi);
>> +    CASE_SET_REG(RDI, edi);
>> +    CASE_SET_REG(RSP, esp);
>> +    CASE_EXTEND_SET_REG;
>> +    default:
>> +        break;
>> +    }
>> +}
>> +
>> +static int decode_vmx_inst(struct cpu_user_regs *regs,
>> +                           struct vmx_inst_decoded *decode) +{
>> +    struct vcpu *v = current;
>> +    union vmx_inst_info info;
>> +    struct segment_register seg;
>> +    unsigned long base, index, seg_base, disp, offset; +    int
>> scale; +
>> +    info.word = __vmread(VMX_INSTRUCTION_INFO);
>> +
>> +    if ( info.fields.memreg ) {
>> +        decode->type = VMX_INST_MEMREG_TYPE_REG;
>> +        decode->reg1 = info.fields.reg1;
>> +    }
>> +    else
>> +    {
>> +        decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
>> +        hvm_get_segment_register(v,
>> sreg_to_index[info.fields.segment], &seg); +        /* TODO: segment
>> type check */
>
> Indeed. :)
>
>> +        seg_base = seg.base;
>> +
>> +        base = info.fields.base_reg_invalid ? 0 :
>> +            reg_read(regs, info.fields.base_reg);
>> +
>> +        index = info.fields.index_reg_invalid ? 0 :
>> +            reg_read(regs, info.fields.index_reg); +
>> +        scale = 1 << info.fields.scaling;
>> +
>> +        disp = __vmread(EXIT_QUALIFICATION);
>> +
>> +        offset = base + index * scale + disp;
>> +        if ( offset > seg.limit )
>
> DYM if ( offset + len > limit )?
>
> Would it be worth trying to fold this into the existing x86_emulate
> code, which already has careful memory access checks?


Can you give more details? Re-construct hvm_virtual_to_linear_addr?


>
>> +            goto gp_fault;
>> +
>> +        decode->mem = seg_base + base + index * scale + disp;
>> +        decode->len = 1 << (info.fields.addr_size + 1); +    }
>> +
>> +    decode->reg2 = info.fields.reg2;
>> +
>> +    return X86EMUL_OKAY;
>> +
>> +gp_fault:
>> +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +static int vmx_inst_check_privilege(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct segment_register cs;
>> +
>> +    hvm_get_segment_register(v, x86_seg_cs, &cs);
>> +
>> +    if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
>> +         !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
>> +         (regs->eflags & X86_EFLAGS_VM) ||
>> +         (hvm_long_mode_enabled(v) && cs.attr.fields.l == 0) ) +
>> goto invalid_op; +
>> +    if ( (cs.sel & 3) > 0 )
>> +        goto gp_fault;
>> +
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +
>> +gp_fault:
>> +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +static void vmreturn(struct cpu_user_regs *regs, enum
>> vmx_ops_result res) +{ +    unsigned long eflags = regs->eflags;
>> +    unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF |
>> X86_EFLAGS_AF | +                         X86_EFLAGS_ZF |
>> X86_EFLAGS_SF | X86_EFLAGS_OF; + +    eflags &= ~mask;
>> +
>> +    switch ( res ) {
>> +    case VMSUCCEED:
>> +        break;
>> +    case VMFAIL_VALID:
>> +        /* TODO: error number, useful for guest VMM debugging */
>> +        eflags |= X86_EFLAGS_ZF;
>> +        break;
>> +    case VMFAIL_INVALID:
>> +    default:
>> +        eflags |= X86_EFLAGS_CF;
>> +        break;
>> +    }
>> +
>> +    regs->eflags = eflags;
>> +}
>> +
>> +static int __clear_current_vvmcs(struct vmx_nest_struct *nest) +{
>> +    int rc;
>> +
>> +    if ( nest->svmcs )
>> +        __vmpclear(virt_to_maddr(nest->svmcs));
>> +
>> +#if !CONFIG_VVMCS_MAPPING
>> +    rc = hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs,
>> PAGE_SIZE); +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +#endif
>> +
>> +    nest->vmcs_valid = 0;
>> +
>> +    return X86EMUL_OKAY;
>> +}
>> +
>> +/*
>> + * VMX instructions handling
>> + */
>> +
>> +int vmx_nest_handle_vmxon(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
>> +    struct vmx_inst_decoded decode;
>> +    unsigned long gpa = 0;
>> +    int rc;
>> +
>> +    if ( !is_nested_avail(v->domain) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>
> I think you could fold these checks and the goto target into
> decode_vmx_inst(), just to avoid repeating them in every
> vmx_nest_handle_* function.
>
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>> +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +
>> +    nest->guest_vmxon_pa = gpa;
>> +    nest->gvmcs_pa = 0;
>> +    nest->vmcs_valid = 0;
>> +#if !CONFIG_VVMCS_MAPPING
>> +    nest->vvmcs = alloc_xenheap_page();
>> +    if ( !nest->vvmcs )
>> +    {
>> +        gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs
>> failed\n"); +        vmreturn(regs, VMFAIL_INVALID);
>> +        goto out;
>> +    }
>> +#endif
>> +    nest->svmcs = alloc_xenheap_page();
>> +    if ( !nest->svmcs )
>> +    {
>> +        gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs
>> failed\n"); +        free_xenheap_page(nest->vvmcs);
>> +        vmreturn(regs, VMFAIL_INVALID);
>> +        goto out;
>> +    }
>> +
>> +    /*
>> +     * `fork' the host vmcs to shadow_vmcs
>> +     * vmcs_lock is not needed since we are on current +     */
>> +    nest->hvmcs = v->arch.hvm_vmx.vmcs;
>> +    __vmpclear(virt_to_maddr(nest->hvmcs));
>> +    memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
>> +    __vmptrld(virt_to_maddr(nest->hvmcs));
>> +    v->arch.hvm_vmx.launched = 0;
>> +
>> +    vmreturn(regs, VMSUCCEED);
>> +
>> +out:
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +    int
>> rc; +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    nest->guest_vmxon_pa = 0;
>> +    __vmpclear(virt_to_maddr(nest->svmcs));
>> +
>> +#if !CONFIG_VVMCS_MAPPING
>> +    free_xenheap_page(nest->vvmcs);
>> +#endif
>> +    free_xenheap_page(nest->svmcs);
>
> These also need to be freed on domain teardown.
>
>> +    vmreturn(regs, VMSUCCEED);
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_inst_decoded decode;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; +    int rc;
>> +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>> +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +
>> +    if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff ) +    {
>> +        vmreturn(regs, VMFAIL_INVALID);
>> +        goto out;
>> +    }
>> +
>> +    if ( nest->gvmcs_pa != gpa )
>> +    {
>> +        if ( nest->vmcs_valid )
>> +        {
>> +            rc = __clear_current_vvmcs(nest);
>> +            if ( rc != X86EMUL_OKAY )
>> +                return rc;
>> +        }
>> +        nest->gvmcs_pa = gpa;
>> +        ASSERT(nest->vmcs_valid == 0);
>> +    }
>> +
>> +
>> +    if ( !nest->vmcs_valid )
>> +    {
>> +#if CONFIG_VVMCS_MAPPING
>> +        unsigned long mfn;
>> +        p2m_type_t p2mt;
>> +
>> +        mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
>> +                               nest->gvmcs_pa >> PAGE_SHIFT,
>> &p2mt)); +        nest->vvmcs = map_domain_page_global(mfn); +#else
>> +        rc = hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa,
>> PAGE_SIZE); +        if ( rc != HVMCOPY_okay )
>> +            return X86EMUL_EXCEPTION;
>> +#endif
>> +        nest->vmcs_valid = 1;
>> +    }
>> +
>> +    vmreturn(regs, VMSUCCEED);
>> +
>> +out:
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_inst_decoded decode;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; +    int rc;
>> +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); +
>> +    gpa = nest->gvmcs_pa;
>> +
>> +    rc = hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
>> +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +
>> +    vmreturn(regs, VMSUCCEED);
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmclear(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_inst_decoded decode;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; +    int rc;
>> +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>
> Is it guaranteed that decode.len is always <= sizeof gpa here, even
> with a malicious guest?

Reusing hvm_virtual_to_linear_addr w/ consideration of last byte may be the 
best choice :)

Thx, Eddie

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>