SVM: use generic instruction decoding ... instead of custom handling. To facilitate this break out init code from _hvm_emulate_one() into the new hvm_emulate_init(), and make hvmemul_insn_fetch( globally available. Signed-off-by: Jan Beulich Reviewed-by: Andrew Cooper --- v3: Add comment explaining debug/non-debug build differences. Use unsigned int for opc_tab[] index variable. v2: Add comment to caller field. Rename REG_POISON to PTR_POISON. Align opc_tab[] initializer lines. --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -835,7 +835,7 @@ static int hvmemul_read( container_of(ctxt, struct hvm_emulate_ctxt, ctxt)); } -static int hvmemul_insn_fetch( +int hvmemul_insn_fetch( enum x86_segment seg, unsigned long offset, void *p_data, @@ -1766,15 +1766,14 @@ static const struct x86_emulate_ops hvm_ .vmfunc = hvmemul_vmfunc, }; -static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt, - const struct x86_emulate_ops *ops) +void hvm_emulate_init( + struct hvm_emulate_ctxt *hvmemul_ctxt, + const unsigned char *insn_buf, + unsigned int insn_bytes) { - struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs; struct vcpu *curr = current; - uint32_t new_intr_shadow, pfec = PFEC_page_present; - struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; + unsigned int pfec = PFEC_page_present; unsigned long addr; - int rc; if ( hvm_long_mode_enabled(curr) && hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.l ) @@ -1792,14 +1791,14 @@ static int _hvm_emulate_one(struct hvm_e if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) pfec |= PFEC_user_mode; - hvmemul_ctxt->insn_buf_eip = regs->eip; - if ( !vio->mmio_insn_bytes ) + hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->eip; + if ( !insn_bytes ) { hvmemul_ctxt->insn_buf_bytes = hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?: (hvm_virtual_to_linear_addr(x86_seg_cs, &hvmemul_ctxt->seg_reg[x86_seg_cs], - regs->eip, + hvmemul_ctxt->insn_buf_eip, sizeof(hvmemul_ctxt->insn_buf), hvm_access_insn_fetch, hvmemul_ctxt->ctxt.addr_size, @@ -1811,11 +1810,24 @@ static int _hvm_emulate_one(struct hvm_e } else { - hvmemul_ctxt->insn_buf_bytes = vio->mmio_insn_bytes; - memcpy(hvmemul_ctxt->insn_buf, vio->mmio_insn, vio->mmio_insn_bytes); + hvmemul_ctxt->insn_buf_bytes = insn_bytes; + memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes); } hvmemul_ctxt->exn_pending = 0; +} + +static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt, + const struct x86_emulate_ops *ops) +{ + const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs; + struct vcpu *curr = current; + uint32_t new_intr_shadow; + struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; + int rc; + + hvm_emulate_init(hvmemul_ctxt, vio->mmio_insn, vio->mmio_insn_bytes); + vio->mmio_retry = 0; if ( cpu_has_vmx ) --- a/xen/arch/x86/hvm/svm/emulate.c +++ b/xen/arch/x86/hvm/svm/emulate.c @@ -15,7 +15,7 @@ * this program; If not, see . */ -#include +#include #include #include #include @@ -26,41 +26,6 @@ #include #include -static unsigned int is_prefix(u8 opc) -{ - switch ( opc ) - { - case 0x66: - case 0x67: - case 0x2E: - case 0x3E: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xF0: - case 0xF3: - case 0xF2: - case 0x40 ... 0x4f: - return 1; - } - return 0; -} - -static unsigned long svm_rip2pointer(struct vcpu *v, unsigned long *limit) -{ - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long p = vmcb->cs.base + vmcb->rip; - - if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) ) - { - *limit = vmcb->cs.limit; - return (u32)p; /* mask to 32 bits */ - } - *limit = ~0UL; - return p; -} - static unsigned long svm_nextrip_insn_length(struct vcpu *v) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -89,141 +54,104 @@ static unsigned long svm_nextrip_insn_le return vmcb->nextrip - vmcb->rip; } -/* First byte: Length. Following bytes: Opcode bytes. */ -#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ } -MAKE_INSTR(INVD, 2, 0x0f, 0x08); -MAKE_INSTR(WBINVD, 2, 0x0f, 0x09); -MAKE_INSTR(CPUID, 2, 0x0f, 0xa2); -MAKE_INSTR(RDMSR, 2, 0x0f, 0x32); -MAKE_INSTR(WRMSR, 2, 0x0f, 0x30); -MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9); -MAKE_INSTR(HLT, 1, 0xf4); -MAKE_INSTR(INT3, 1, 0xcc); -MAKE_INSTR(RDTSC, 2, 0x0f, 0x31); -MAKE_INSTR(PAUSE, 1, 0x90); -MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1); -MAKE_INSTR(VMRUN, 3, 0x0f, 0x01, 0xd8); -MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda); -MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb); -MAKE_INSTR(STGI, 3, 0x0f, 0x01, 0xdc); -MAKE_INSTR(CLGI, 3, 0x0f, 0x01, 0xdd); -MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf); - -static const u8 *const opc_bytes[INSTR_MAX_COUNT] = -{ - [INSTR_INVD] = OPCODE_INVD, - [INSTR_WBINVD] = OPCODE_WBINVD, - [INSTR_CPUID] = OPCODE_CPUID, - [INSTR_RDMSR] = OPCODE_RDMSR, - [INSTR_WRMSR] = OPCODE_WRMSR, - [INSTR_VMCALL] = OPCODE_VMCALL, - [INSTR_HLT] = OPCODE_HLT, - [INSTR_INT3] = OPCODE_INT3, - [INSTR_RDTSC] = OPCODE_RDTSC, - [INSTR_PAUSE] = OPCODE_PAUSE, - [INSTR_XSETBV] = OPCODE_XSETBV, - [INSTR_VMRUN] = OPCODE_VMRUN, - [INSTR_VMLOAD] = OPCODE_VMLOAD, - [INSTR_VMSAVE] = OPCODE_VMSAVE, - [INSTR_STGI] = OPCODE_STGI, - [INSTR_CLGI] = OPCODE_CLGI, - [INSTR_INVLPGA] = OPCODE_INVLPGA, +static const struct { + unsigned int opcode; + struct { + unsigned int rm:3; + unsigned int reg:3; + unsigned int mod:2; +#define MODRM(mod, reg, rm) { rm, reg, mod } + } modrm; +} const opc_tab[INSTR_MAX_COUNT] = { + [INSTR_PAUSE] = { X86EMUL_OPC_F3(0, 0x90) }, + [INSTR_INT3] = { X86EMUL_OPC( 0, 0xcc) }, + [INSTR_HLT] = { X86EMUL_OPC( 0, 0xf4) }, + [INSTR_XSETBV] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 2, 1) }, + [INSTR_VMRUN] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 0) }, + [INSTR_VMCALL] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 1) }, + [INSTR_VMLOAD] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 2) }, + [INSTR_VMSAVE] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 3) }, + [INSTR_STGI] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 4) }, + [INSTR_CLGI] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 5) }, + [INSTR_INVLPGA] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 7) }, + [INSTR_INVD] = { X86EMUL_OPC(0x0f, 0x08) }, + [INSTR_WBINVD] = { X86EMUL_OPC(0x0f, 0x09) }, + [INSTR_WRMSR] = { X86EMUL_OPC(0x0f, 0x30) }, + [INSTR_RDTSC] = { X86EMUL_OPC(0x0f, 0x31) }, + [INSTR_RDMSR] = { X86EMUL_OPC(0x0f, 0x32) }, + [INSTR_CPUID] = { X86EMUL_OPC(0x0f, 0xa2) }, }; -static bool_t fetch(const struct vmcb_struct *vmcb, u8 *buf, - unsigned long addr, unsigned int len) -{ - uint32_t pfec = (vmcb_get_cpl(vmcb) == 3) ? PFEC_user_mode : 0; - - switch ( hvm_fetch_from_guest_virt(buf, addr, len, pfec) ) - { - case HVMCOPY_okay: - break; - case HVMCOPY_bad_gva_to_gfn: - /* OK just to give up; we'll have injected #PF already */ - return 0; - default: - /* Not OK: fetches from non-RAM pages are not supportable. */ - gdprintk(XENLOG_WARNING, "Bad instruction fetch at %#lx (%#lx)\n", - vmcb->rip, addr); - hvm_inject_hw_exception(TRAP_gp_fault, 0); - return 0; - } - return 1; -} - int __get_instruction_length_from_list(struct vcpu *v, const enum instruction_index *list, unsigned int list_count) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned int i, j, inst_len = 0; - enum instruction_index instr = 0; - u8 buf[MAX_INST_LEN]; - const u8 *opcode = NULL; - unsigned long fetch_addr, fetch_limit; - unsigned int fetch_len, max_len; - + struct hvm_emulate_ctxt ctxt; + struct x86_emulate_state *state; + unsigned int inst_len, j, modrm_rm, modrm_reg; + int modrm_mod; + + /* + * In debug builds, always use x86_decode_insn() and compare with + * hardware. + */ +#ifdef NDEBUG if ( (inst_len = svm_nextrip_insn_length(v)) != 0 ) return inst_len; if ( vmcb->exitcode == VMEXIT_IOIO ) return vmcb->exitinfo2 - vmcb->rip; +#endif - /* Fetch up to the next page break; we'll fetch from the next page - * later if we have to. */ - fetch_addr = svm_rip2pointer(v, &fetch_limit); - if ( vmcb->rip > fetch_limit ) - return 0; - max_len = min(fetch_limit - vmcb->rip + 1, MAX_INST_LEN + 0UL); - fetch_len = min_t(unsigned int, max_len, - PAGE_SIZE - (fetch_addr & ~PAGE_MASK)); - if ( !fetch(vmcb, buf, fetch_addr, fetch_len) ) + ASSERT(v == current); + hvm_emulate_prepare(&ctxt, guest_cpu_user_regs()); + hvm_emulate_init(&ctxt, NULL, 0); + state = x86_decode_insn(&ctxt.ctxt, hvmemul_insn_fetch); + if ( IS_ERR_OR_NULL(state) ) return 0; - while ( (inst_len < max_len) && is_prefix(buf[inst_len]) ) - { - inst_len++; - if ( inst_len >= fetch_len ) - { - if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len, - max_len - fetch_len) ) - return 0; - fetch_len = max_len; - } + inst_len = x86_insn_length(state, &ctxt.ctxt); + modrm_mod = x86_insn_modrm(state, &modrm_rm, &modrm_reg); + x86_emulate_free_state(state); +#ifndef NDEBUG + if ( vmcb->exitcode == VMEXIT_IOIO ) + j = vmcb->exitinfo2 - vmcb->rip; + else + j = svm_nextrip_insn_length(v); + if ( j && j != inst_len ) + { + gprintk(XENLOG_WARNING, "insn-len[%02x]=%u (exp %u)\n", + ctxt.ctxt.opcode, inst_len, j); + return j; } +#endif for ( j = 0; j < list_count; j++ ) { - instr = list[j]; - opcode = opc_bytes[instr]; + unsigned int instr = list[j]; - for ( i = 0; (i < opcode[0]) && ((inst_len + i) < max_len); i++ ) + if ( instr >= ARRAY_SIZE(opc_tab) ) + { + ASSERT_UNREACHABLE(); + break; + } + if ( opc_tab[instr].opcode == ctxt.ctxt.opcode ) { - if ( (inst_len + i) >= fetch_len ) - { - if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len, - max_len - fetch_len) ) - return 0; - fetch_len = max_len; - } + if ( !opc_tab[instr].modrm.mod ) + return inst_len; - if ( buf[inst_len+i] != opcode[i+1] ) - goto mismatch; + if ( modrm_mod == opc_tab[instr].modrm.mod && + (modrm_rm & 7) == opc_tab[instr].modrm.rm && + (modrm_reg & 7) == opc_tab[instr].modrm.reg ) + return inst_len; } - goto done; - mismatch: ; } gdprintk(XENLOG_WARNING, - "%s: Mismatch between expected and actual instruction bytes: " + "%s: Mismatch between expected and actual instruction: " "eip = %lx\n", __func__, (unsigned long)vmcb->rip); hvm_inject_hw_exception(TRAP_gp_fault, 0); return 0; - - done: - inst_len += opcode[0]; - ASSERT(inst_len <= max_len); - return inst_len; } /* --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -382,9 +382,9 @@ struct operand { } mem; }; #ifdef __x86_64__ -#define REG_POISON ((unsigned long *) 0x8086000000008086UL) /* non-canonical */ +#define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */ #else -#define REG_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */ +#define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */ #endif typedef union { @@ -1646,6 +1646,14 @@ struct x86_emulate_state { unsigned long eip; struct cpu_user_regs *regs; + +#ifndef NDEBUG + /* + * Track caller of x86_decode_insn() to spot missing as well as + * premature calls to x86_emulate_free_state(). + */ + void *caller; +#endif }; /* Helper definitions. */ @@ -1673,6 +1681,11 @@ x86_decode_onebyte( switch ( ctxt->opcode ) { + case 0x90: /* nop / pause */ + if ( repe_prefix() ) + ctxt->opcode |= X86EMUL_OPC_F3(0, 0); + break; + case 0x9a: /* call (far, absolute) */ case 0xea: /* jmp (far, absolute) */ generate_exception_if(mode_64bit(), EXC_UD, -1); @@ -1780,7 +1793,7 @@ x86_decode( override_seg = -1; ea.type = OP_MEM; ea.mem.seg = x86_seg_ds; - ea.reg = REG_POISON; + ea.reg = PTR_POISON; state->regs = ctxt->regs; state->eip = ctxt->regs->eip; @@ -2267,8 +2280,8 @@ x86_emulate( struct x86_emulate_state state; int rc; uint8_t b, d; - struct operand src = { .reg = REG_POISON }; - struct operand dst = { .reg = REG_POISON }; + struct operand src = { .reg = PTR_POISON }; + struct operand dst = { .reg = PTR_POISON }; enum x86_swint_type swint_type; struct x86_emulate_stub stub = {}; DECLARE_ALIGNED(mmval_t, mmval); @@ -2861,8 +2874,9 @@ x86_emulate( break; case 0x90: /* nop / xchg %%r8,%%rax */ + case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */ if ( !(rex_prefix & 1) ) - break; /* nop */ + break; /* nop / pause */ /* fall through */ case 0x91 ... 0x97: /* xchg reg,%%rax */ @@ -5199,3 +5213,89 @@ x86_emulate( #undef vex #undef override_seg #undef ea + +#ifdef __XEN__ + +#include + +struct x86_emulate_state * +x86_decode_insn( + struct x86_emulate_ctxt *ctxt, + int (*insn_fetch)( + enum x86_segment seg, unsigned long offset, + void *p_data, unsigned int bytes, + struct x86_emulate_ctxt *ctxt)) +{ + static DEFINE_PER_CPU(struct x86_emulate_state, state); + struct x86_emulate_state *state = &this_cpu(state); + const struct x86_emulate_ops ops = { + .insn_fetch = insn_fetch, + .read = x86emul_unhandleable_rw, + .write = PTR_POISON, + .cmpxchg = PTR_POISON, + }; + int rc = x86_decode(state, ctxt, &ops); + + if ( unlikely(rc != X86EMUL_OKAY) ) + return ERR_PTR(-rc); + +#ifndef NDEBUG + /* + * While we avoid memory allocation (by use of per-CPU data) above, + * nevertheless make sure callers properly release the state structure + * for forward compatibility. + */ + if ( state->caller ) + { + printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n", + state->caller); + dump_execution_state(); + } + state->caller = __builtin_return_address(0); +#endif + + return state; +} + +static inline void check_state(const struct x86_emulate_state *state) +{ +#ifndef NDEBUG + ASSERT(state->caller); +#endif +} + +#ifndef NDEBUG +void x86_emulate_free_state(struct x86_emulate_state *state) +{ + check_state(state); + state->caller = NULL; +} +#endif + +int +x86_insn_modrm(const struct x86_emulate_state *state, + unsigned int *rm, unsigned int *reg) +{ + check_state(state); + + if ( !(state->desc & ModRM) ) + return -EINVAL; + + if ( rm ) + *rm = state->modrm_rm; + if ( reg ) + *reg = state->modrm_reg; + + return state->modrm_mod; +} + +unsigned int +x86_insn_length(const struct x86_emulate_state *state, + const struct x86_emulate_ctxt *ctxt) +{ + check_state(state); + + return state->eip - ctxt->regs->eip; +} + +#endif --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -532,4 +532,29 @@ x86emul_unhandleable_rw( unsigned int bytes, struct x86_emulate_ctxt *ctxt); +#ifdef __XEN__ + +struct x86_emulate_state * +x86_decode_insn( + struct x86_emulate_ctxt *ctxt, + int (*insn_fetch)( + enum x86_segment seg, unsigned long offset, + void *p_data, unsigned int bytes, + struct x86_emulate_ctxt *ctxt)); + +int +x86_insn_modrm(const struct x86_emulate_state *state, + unsigned int *rm, unsigned int *reg); +unsigned int +x86_insn_length(const struct x86_emulate_state *state, + const struct x86_emulate_ctxt *ctxt); + +#ifdef NDEBUG +static inline void x86_emulate_free_state(struct x86_emulate_state *state) {} +#else +void x86_emulate_free_state(struct x86_emulate_state *state); +#endif + +#endif + #endif /* __X86_EMULATE_H__ */ --- a/xen/include/asm-x86/hvm/emulate.h +++ b/xen/include/asm-x86/hvm/emulate.h @@ -54,6 +54,10 @@ void hvm_mem_access_emulate_one(enum emu void hvm_emulate_prepare( struct hvm_emulate_ctxt *hvmemul_ctxt, struct cpu_user_regs *regs); +void hvm_emulate_init( + struct hvm_emulate_ctxt *hvmemul_ctxt, + const unsigned char *insn_buf, + unsigned int insn_bytes); void hvm_emulate_writeback( struct hvm_emulate_ctxt *hvmemul_ctxt); struct segment_register *hvmemul_get_seg_reg( @@ -61,6 +65,11 @@ struct segment_register *hvmemul_get_seg struct hvm_emulate_ctxt *hvmemul_ctxt); int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla); +int hvmemul_insn_fetch(enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); int hvmemul_do_pio_buffer(uint16_t port, unsigned int size, uint8_t dir,