x86/emulate: add support for {,v}movd {,x}mm,r/m32 and {,v}movq {,x}mm,r/m64 From: Zhi Wang Found that Windows driver was using a SSE2 instruction MOVD. Signed-off-by: Zhi Wang Signed-off-by: Mihai Donțu Signed-off-by: Jan Beulich --- v4: Re-base on decoding changes. Address Andrew's and my own review comments (where still applicable). #UD when vex.l is set. Various adjustments to the test tool change. --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -973,6 +973,296 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movd %%mm3,32(%%ecx)..."); + if ( stack_exec && cpu_has_mmx ) + { + decl_insn(movd_to_mem); + + asm volatile ( "pcmpeqb %%mm3, %%mm3\n" + put_insn(movd_to_mem, "movd %%mm3, 32(%0)") + :: "c" (NULL) ); + + memset(res, 0xbd, 64); + set_insn(movd_to_mem); + regs.ecx = (unsigned long)res; + regs.edx = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem) || + res[8] + 1 || + memcmp(res, res + 9, 28) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movd %%xmm2,32(%%edx)..."); + if ( stack_exec && cpu_has_sse2 ) + { + decl_insn(movd_to_mem2); + + asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" + put_insn(movd_to_mem2, "movd %%xmm2, 32(%0)") + :: "d" (NULL) ); + + memset(res, 0xdb, 64); + set_insn(movd_to_mem2); + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem2) || + res[8] + 1 || + memcmp(res, res + 9, 28) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovd %%xmm1,32(%%ecx)..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovd_to_mem); + + asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n" + put_insn(vmovd_to_mem, "vmovd %%xmm1, 32(%0)") + :: "c" (NULL) ); + + memset(res, 0xbd, 64); + set_insn(vmovd_to_mem); + regs.ecx = (unsigned long)res; + regs.edx = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vmovd_to_mem) || + res[8] + 1 || + memcmp(res, res + 9, 28) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movd %%mm3,%%ebx..."); + if ( stack_exec && cpu_has_mmx ) + { + decl_insn(movd_to_reg); + + /* + * Intentionally not specifying "b" as an input (or even output) here + * to not keep the compiler from using the variable, which in turn + * allows noticing whether the emulator touches the actual register + * instead of the regs field. + */ + asm volatile ( "pcmpeqb %%mm3, %%mm3\n" + put_insn(movd_to_reg, "movd %%mm3, %%ebx") + :: ); + + set_insn(movd_to_reg); +#ifdef __x86_64__ + regs.rbx = 0xbdbdbdbdbdbdbdbdUL; +#else + regs.ebx = 0xbdbdbdbdUL; +#endif + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg) || + regs.ebx != 0xffffffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movd %%xmm2,%%ebx..."); + if ( stack_exec && cpu_has_sse2 ) + { + decl_insn(movd_to_reg2); + + /* See comment next to movd above. */ + asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" + put_insn(movd_to_reg2, "movd %%xmm2, %%ebx") + :: ); + + set_insn(movd_to_reg2); +#ifdef __x86_64__ + regs.rbx = 0xbdbdbdbdbdbdbdbdUL; +#else + regs.ebx = 0xbdbdbdbdUL; +#endif + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg2) || + regs.ebx != 0xffffffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovd %%xmm1,%%ebx..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovd_to_reg); + + /* See comment next to movd above. */ + asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n" + put_insn(vmovd_to_reg, "vmovd %%xmm1, %%ebx") + :: ); + + set_insn(vmovd_to_reg); +#ifdef __x86_64__ + regs.rbx = 0xbdbdbdbdbdbdbdbdUL; +#else + regs.ebx = 0xbdbdbdbdUL; +#endif + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovd_to_reg) || + regs.ebx != 0xffffffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + +#ifdef __x86_64__ + printf("%-40s", "Testing movq %%mm3,32(%%ecx)..."); + if ( stack_exec && cpu_has_mmx ) + { + decl_insn(movq_to_mem3); + + asm volatile ( "pcmpeqb %%mm3, %%mm3\n" + put_insn(movq_to_mem3, "rex64 movd %%mm3, 32(%0)") + :: "c" (NULL) ); + + memset(res, 0xbd, 64); + set_insn(movq_to_mem3); + regs.ecx = (unsigned long)res; + regs.edx = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem3) || + *((long *)res + 4) + 1 || + memcmp(res, res + 10, 24) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movq %%xmm2,32(%%edx)..."); + if ( stack_exec ) + { + decl_insn(movq_to_mem4); + + asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" + put_insn(movq_to_mem4, "rex64 movd %%xmm2, 32(%0)") + :: "d" (NULL) ); + + memset(res, 0xdb, 64); + set_insn(movq_to_mem4); + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem4) || + *((long *)res + 4) + 1 || + memcmp(res, res + 10, 24) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovq %%xmm1,32(%%ecx)..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovq_to_mem2); + + asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n" +#if 0 /* This doesn't work, as the assembler will pick opcode D6. */ + put_insn(vmovq_to_mem2, "vmovq %%xmm1, 32(%0)") +#else + put_insn(vmovq_to_mem2, ".byte 0xc4, 0xe1, 0xf9, 0x7e, 0x49, 0x20") +#endif + :: "c" (NULL) ); + + memset(res, 0xbd, 64); + set_insn(vmovq_to_mem2); + regs.ecx = (unsigned long)res; + regs.edx = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem2) || + *((long *)res + 4) + 1 || + memcmp(res, res + 10, 24) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movq %%mm3,%%rbx..."); + if ( stack_exec && cpu_has_mmx ) + { + decl_insn(movq_to_reg); + + /* See comment next to movd above. */ + asm volatile ( "pcmpeqb %%mm3, %%mm3\n" + put_insn(movq_to_reg, "movq %%mm3, %%rbx") + :: ); + + set_insn(movq_to_reg); + regs.rbx = 0xbdbdbdbdbdbdbdbdUL; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(movq_to_reg) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movq %%xmm2,%%rbx..."); + if ( stack_exec ) + { + decl_insn(movq_to_reg2); + + /* See comment next to movd above. */ + asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" + put_insn(movq_to_reg2, "movq %%xmm2, %%rbx") + :: ); + + set_insn(movq_to_reg2); + regs.rbx = 0xbdbdbdbdbdbdbdbdUL; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(movq_to_reg2) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovq %%xmm1,%%rbx..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovq_to_reg); + + /* See comment next to movd above. */ + asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n" + put_insn(vmovq_to_reg, "vmovq %%xmm1, %%rbx") + :: ); + + set_insn(vmovq_to_reg); + regs.rbx = 0xbdbdbdbdbdbdbdbdUL; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(vmovq_to_reg) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); +#endif + #undef decl_insn #undef put_insn #undef set_insn --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -223,7 +223,7 @@ static const opcode_desc_t twobyte_table /* 0x70 - 0x7F */ SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, ModRM, ModRM, ModRM, ImplicitOps, - ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM, + ModRM, ModRM, 0, 0, ModRM, ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, /* 0x80 - 0x87 */ DstImplicit|SrcImm, DstImplicit|SrcImm, DstImplicit|SrcImm, DstImplicit|SrcImm, @@ -2291,6 +2291,10 @@ x86_decode( return X86EMUL_UNHANDLEABLE; } + if ( op_bytes == 2 && + (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) ) + op_bytes = 4; + done: return rc; } @@ -4772,6 +4776,12 @@ x86_emulate( /* vmovdqa ymm/m256,ymm */ case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu xmm/m128,xmm */ /* vmovdqu ymm/m256,ymm */ + case X86EMUL_OPC(0x0f, 0x7e): /* movd mm,r/m32 */ + /* movq mm,r/m64 */ + case X86EMUL_OPC_66(0x0f, 0x7e): /* movd xmm,r/m32 */ + /* movq xmm,r/m64 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */ + /* vmovq xmm,r/m64 */ case X86EMUL_OPC(0x0f, 0x7f): /* movq mm,mm/m64 */ case X86EMUL_OPC_66(0x0f, 0x7f): /* movdqa xmm,xmm/m128 */ case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */ @@ -4822,10 +4832,16 @@ x86_emulate( get_fpu(X86EMUL_FPU_ymm, &fic); ea.bytes = 16 << vex.l; } - if ( b == 0xd6 ) + switch ( b ) { + case 0x7e: + generate_exception_if(vex.l, EXC_UD, -1); + ea.bytes = op_bytes; + break; + case 0xd6: generate_exception_if(vex.l, EXC_UD, -1); ea.bytes = 8; + break; } if ( ea.type == OP_MEM ) { @@ -4836,15 +4852,22 @@ x86_emulate( if ( b == 0x6f ) rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp, ea.bytes, ctxt); - /* convert memory operand to (%rAX) */ + } + if ( ea.type == OP_MEM || b == 0x7e ) + { + /* Convert memory operand or GPR destination to (%rAX) */ rex_prefix &= ~REX_B; vex.b = 1; buf[4] &= 0x38; + if ( ea.type == OP_MEM ) + ea.reg = (void *)mmvalp; + else /* Ensure zero-extension of a 32-bit result. */ + *ea.reg = 0; } if ( !rc ) { copy_REX_VEX(buf, rex_prefix, vex); - asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp) + asm volatile ( "call *%0" : : "r" (stub.func), "a" (ea.reg) : "memory" ); } put_fpu(&fic);