x86emul: support load forms of {,V}MOV{D,Q} Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -823,6 +823,29 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movq 32(%ecx),%xmm1..."); + if ( stack_exec && cpu_has_sse2 ) + { + decl_insn(movq_from_mem2); + + asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n" + put_insn(movq_from_mem2, "movq 32(%0), %%xmm1") + :: "c" (NULL) ); + + set_insn(movq_from_mem2); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem2) ) + goto fail; + asm ( "pcmpgtb %%xmm0, %%xmm0\n\t" + "pcmpeqb %%xmm1, %%xmm0\n\t" + "pmovmskb %%xmm0, %0" : "=r" (rc) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing vmovq %xmm1,32(%edx)..."); if ( stack_exec && cpu_has_avx ) { @@ -847,6 +870,29 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing vmovq 32(%edx),%xmm0..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovq_from_mem); + + asm volatile ( "pcmpeqb %%xmm0, %%xmm0\n" + put_insn(vmovq_from_mem, "vmovq 32(%0), %%xmm0") + :: "d" (NULL) ); + + set_insn(vmovq_from_mem); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vmovq_from_mem) ) + goto fail; + asm ( "pcmpgtb %%xmm1, %%xmm1\n\t" + "pcmpeqb %%xmm0, %%xmm1\n\t" + "pmovmskb %%xmm1, %0" : "=r" (rc) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing movdqu %xmm2,(%ecx)..."); if ( stack_exec && cpu_has_sse2 ) { @@ -1083,6 +1129,33 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movd 32(%ecx),%mm4..."); + if ( stack_exec && cpu_has_mmx ) + { + decl_insn(movd_from_mem); + + asm volatile ( "pcmpgtb %%mm4, %%mm4\n" + put_insn(movd_from_mem, "movd 32(%0), %%mm4") + :: "c" (NULL) ); + + set_insn(movd_from_mem); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movd_from_mem) ) + goto fail; + asm ( "pxor %%mm2,%%mm2\n\t" + "pcmpeqb %%mm4, %%mm2\n\t" + "pmovmskb %%mm2, %0" : "=r" (rc) ); + if ( rc != 0xf0 ) + goto fail; + asm ( "pcmpeqb %%mm4, %%mm3\n\t" + "pmovmskb %%mm3, %0" : "=r" (rc) ); + if ( rc != 0x0f ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing movd %xmm2,32(%edx)..."); if ( stack_exec && cpu_has_sse2 ) { @@ -1107,6 +1180,34 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movd 32(%edx),%xmm3..."); + if ( stack_exec && cpu_has_sse2 ) + { + decl_insn(movd_from_mem2); + + asm volatile ( "pcmpeqb %%xmm3, %%xmm3\n" + put_insn(movd_from_mem2, "movd 32(%0), %%xmm3") + :: "d" (NULL) ); + + set_insn(movd_from_mem2); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movd_from_mem2) ) + goto fail; + asm ( "pxor %%xmm1,%%xmm1\n\t" + "pcmpeqb %%xmm3, %%xmm1\n\t" + "pmovmskb %%xmm1, %0" : "=r" (rc) ); + if ( rc != 0xfff0 ) + goto fail; + asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" + "pcmpeqb %%xmm3, %%xmm2\n\t" + "pmovmskb %%xmm2, %0" : "=r" (rc) ); + if ( rc != 0x000f ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing vmovd %xmm1,32(%ecx)..."); if ( stack_exec && cpu_has_avx ) { @@ -1131,6 +1232,34 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing vmovd 32(%ecx),%xmm2..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovd_from_mem); + + asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" + put_insn(vmovd_from_mem, "vmovd 32(%0), %%xmm2") + :: "c" (NULL) ); + + set_insn(vmovd_from_mem); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vmovd_from_mem) ) + goto fail; + asm ( "pxor %%xmm0,%%xmm0\n\t" + "pcmpeqb %%xmm2, %%xmm0\n\t" + "pmovmskb %%xmm0, %0" : "=r" (rc) ); + if ( rc != 0xfff0 ) + goto fail; + asm ( "pcmpeqb %%xmm1, %%xmm1\n\t" + "pcmpeqb %%xmm2, %%xmm1\n\t" + "pmovmskb %%xmm1, %0" : "=r" (rc) ); + if ( rc != 0x000f ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing movd %mm3,%ebx..."); if ( stack_exec && cpu_has_mmx ) { @@ -1161,6 +1290,34 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movd %ebx,%mm4..."); + if ( stack_exec && cpu_has_mmx ) + { + decl_insn(movd_from_reg); + + /* See comment next to movd above. */ + asm volatile ( "pcmpgtb %%mm4, %%mm4\n" + put_insn(movd_from_reg, "movd %%ebx, %%mm4") + :: ); + + set_insn(movd_from_reg); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(movd_from_reg) ) + goto fail; + asm ( "pxor %%mm2,%%mm2\n\t" + "pcmpeqb %%mm4, %%mm2\n\t" + "pmovmskb %%mm2, %0" : "=r" (rc) ); + if ( rc != 0xf0 ) + goto fail; + asm ( "pcmpeqb %%mm4, %%mm3\n\t" + "pmovmskb %%mm3, %0" : "=r" (rc) ); + if ( rc != 0x0f ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing movd %xmm2,%ebx..."); if ( stack_exec && cpu_has_sse2 ) { @@ -1186,6 +1343,35 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movd %ebx,%xmm3..."); + if ( stack_exec && cpu_has_sse2 ) + { + decl_insn(movd_from_reg2); + + /* See comment next to movd above. */ + asm volatile ( "pcmpgtb %%xmm3, %%xmm3\n" + put_insn(movd_from_reg2, "movd %%ebx, %%xmm3") + :: ); + + set_insn(movd_from_reg2); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(movd_from_reg2) ) + goto fail; + asm ( "pxor %%xmm1,%%xmm1\n\t" + "pcmpeqb %%xmm3, %%xmm1\n\t" + "pmovmskb %%xmm1, %0" : "=r" (rc) ); + if ( rc != 0xfff0 ) + goto fail; + asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" + "pcmpeqb %%xmm3, %%xmm2\n\t" + "pmovmskb %%xmm2, %0" : "=r" (rc) ); + if ( rc != 0x000f ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing vmovd %xmm1,%ebx..."); if ( stack_exec && cpu_has_avx ) { @@ -1208,6 +1394,35 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovd %ebx,%xmm2..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovd_from_reg); + + /* See comment next to movd above. */ + asm volatile ( "pcmpgtb %%xmm2, %%xmm2\n" + put_insn(vmovd_from_reg, "vmovd %%ebx, %%xmm2") + :: ); + + set_insn(vmovd_from_reg); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovd_from_reg) ) + goto fail; + asm ( "pxor %%xmm0,%%xmm0\n\t" + "pcmpeqb %%xmm2, %%xmm0\n\t" + "pmovmskb %%xmm0, %0" : "=r" (rc) ); + if ( rc != 0xfff0 ) + goto fail; + asm ( "pcmpeqb %%xmm1, %%xmm1\n\t" + "pcmpeqb %%xmm2, %%xmm1\n\t" + "pmovmskb %%xmm1, %0" : "=r" (rc) ); + if ( rc != 0x000f ) + goto fail; + printf("okay\n"); + } else printf("skipped\n"); --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4995,6 +4995,12 @@ x86_emulate( /* vmovntdq ymm,m256 */ fail_if(ea.type != OP_MEM); /* fall through */ + case X86EMUL_OPC(0x0f, 0x6e): /* movd r/m32,mm */ + /* movq r/m64,mm */ + case X86EMUL_OPC_66(0x0f, 0x6e): /* movd r/m32,xmm */ + /* movq r/m64,xmm */ + case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */ + /* vmovq r/m64,xmm */ case X86EMUL_OPC(0x0f, 0x6f): /* movq mm/m64,mm */ case X86EMUL_OPC_66(0x0f, 0x6f): /* movdqa xmm/m128,xmm */ case X86EMUL_OPC_F3(0x0f, 0x6f): /* movdqu xmm/m128,xmm */ @@ -5008,6 +5014,8 @@ x86_emulate( /* movq xmm,r/m64 */ case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */ /* vmovq xmm,r/m64 */ + case X86EMUL_OPC_F3(0x0f, 0x7e): /* movq xmm/m64,xmm */ + case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */ case X86EMUL_OPC(0x0f, 0x7f): /* movq mm,mm/m64 */ case X86EMUL_OPC_66(0x0f, 0x7f): /* movdqa xmm,xmm/m128 */ case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */ @@ -5019,6 +5027,7 @@ x86_emulate( case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */ { uint8_t *buf = get_stub(stub); + bool load = false; fic.insn_bytes = 5; buf[0] = 0x3e; @@ -5062,8 +5071,20 @@ x86_emulate( { case 0x7e: generate_exception_if(vex.l, EXC_UD); - ea.bytes = op_bytes; + if ( vex.pfx != vex_f3 ) + ea.bytes = op_bytes; + else + { + buf[0] = 0xf3; + ea.bytes = 8; + /* fall through */ + case 0x6f: + load = true; + } break; + case 0x6e: + load = true; + /* fall through */ case 0xd6: generate_exception_if(vex.l, EXC_UD); ea.bytes = 8; @@ -5081,13 +5102,13 @@ x86_emulate( !is_aligned(ea.mem.seg, ea.mem.off, ea.bytes, ctxt, ops), EXC_GP, 0); - if ( b == 0x6f ) + if ( load ) rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp, ea.bytes, ctxt); else fail_if(!ops->write); /* Check before running the stub. */ } - if ( ea.type == OP_MEM || b == 0x7e ) + if ( ea.type == OP_MEM || ((b & ~0x10) == 0x6e && vex.pfx != vex_f3) ) { /* Convert memory operand or GPR destination to (%rAX) */ rex_prefix &= ~REX_B; @@ -5095,7 +5116,7 @@ x86_emulate( buf[4] &= 0x38; if ( ea.type == OP_MEM ) ea.reg = (void *)mmvalp; - else /* Ensure zero-extension of a 32-bit result. */ + else if ( !load ) /* Ensure zero-extension of a 32-bit result. */ *ea.reg = 0; } if ( !rc ) @@ -5106,7 +5127,7 @@ x86_emulate( } put_fpu(&fic); put_stub(stub); - if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) ) + if ( !rc && !load && (ea.type == OP_MEM) ) { ASSERT(ops->write); /* See the fail_if() above. */ rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,