x86/emulate: add support for {,v}movq xmm,xmm/m64 From: Mihai Donțu Signed-off-by: Mihai Donțu Signed-off-by: Jan Beulich --- v4: Re-base on decoding changes. Address my own review comments (where still applicable). #UD when vex.l is set. Various adjustments to the test tool change. --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -713,6 +713,54 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing movq %%xmm0,32(%%ecx)..."); + if ( stack_exec && cpu_has_sse2 ) + { + decl_insn(movq_to_mem2); + + asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n" + put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)") + :: "c" (NULL) ); + + memset(res, 0xbd, 64); + set_insn(movq_to_mem2); + regs.ecx = (unsigned long)res; + regs.edx = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) || + *((uint64_t *)res + 4) || + memcmp(res, res + 10, 24) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)..."); + if ( stack_exec && cpu_has_avx ) + { + decl_insn(vmovq_to_mem); + + asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n" + put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)") + :: "d" (NULL) ); + + memset(res, 0xdb, 64); + set_insn(vmovq_to_mem); + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) || + *((uint64_t *)res + 4) || + memcmp(res, res + 10, 24) || + memcmp(res, res + 6, 8) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing movdqu %xmm2,(%ecx)..."); if ( stack_exec && cpu_has_sse2 ) { --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0xD0 - 0xDF */ - ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, + ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, /* 0xE0 - 0xEF */ ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, @@ -4779,6 +4779,8 @@ x86_emulate( case X86EMUL_OPC_F3(0x0f, 0x7f): /* movdqu xmm,xmm/m128 */ case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */ /* vmovdqu ymm,ymm/m256 */ + case X86EMUL_OPC_66(0x0f, 0xd6): /* movq xmm,xmm/m64 */ + case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */ { uint8_t *buf = get_stub(stub); struct fpu_insn_ctxt fic = { .insn_bytes = 5 }; @@ -4796,7 +4798,8 @@ x86_emulate( case vex_66: case vex_f3: host_and_vcpu_must_have(sse2); - buf[0] = 0x66; /* movdqa */ + /* Converting movdqu to movdqa here: Our buffer is aligned. */ + buf[0] = 0x66; get_fpu(X86EMUL_FPU_xmm, &fic); ea.bytes = 16; break; @@ -4819,6 +4822,11 @@ x86_emulate( get_fpu(X86EMUL_FPU_ymm, &fic); ea.bytes = 16 << vex.l; } + if ( b == 0xd6 ) + { + generate_exception_if(vex.l, EXC_UD, -1); + ea.bytes = 8; + } if ( ea.type == OP_MEM ) { generate_exception_if((vex.pfx == vex_66) &&