x86emul: support {,V}MOVNTDQA Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -1608,6 +1608,74 @@ int main(int argc, char **argv) goto fail; #if 0 /* Don't use AVX2 instructions for now */ asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" + "vpmovmskb %%ymm0, %0" : "=r" (rc) ); +#else + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" + "vpmovmskb %%xmm0, %0\n\t" + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); + rc |= i << 16; +#endif + if ( ~rc ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movntdqa 16(%edx),%xmm4..."); + if ( stack_exec && cpu_has_sse4_1 ) + { + decl_insn(movntdqa); + + asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n" + put_insn(movntdqa, "movntdqa 16(%0), %%xmm4") + :: "d" (NULL) ); + + set_insn(movntdqa); + memset(res, 0x55, 64); + memset(res + 4, 0xff, 16); + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(movntdqa) ) + goto fail; + asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" + "pcmpeqb %%xmm4, %%xmm2\n\t" + "pmovmskb %%xmm2, %0" : "=r" (rc) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4..."); + if ( stack_exec && cpu_has_avx2 ) + { + decl_insn(vmovntdqa); + +#if 0 /* Don't use AVX2 instructions for now */ + asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n" + put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4") + :: "c" (NULL) ); +#else + asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n" + put_insn(vmovntdqa, + ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") ); +#endif + + set_insn(vmovntdqa); + memset(res, 0x55, 96); + memset(res + 8, 0xff, 32); + regs.ecx = (unsigned long)(res + 8); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vmovntdqa) ) + goto fail; +#if 0 /* Don't use AVX2 instructions for now */ + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" "vpmovmskb %%ymm0, %0" : "=r" (rc) ); #else --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -77,6 +77,12 @@ static int cpuid( (ecx & (1U << 0)) != 0; \ }) +#define cpu_has_sse4_1 ({ \ + unsigned int eax = 1, ecx = 0; \ + emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \ + (ecx & (1U << 19)) != 0; \ +}) + #define cpu_has_xsave ({ \ unsigned int eax = 1, ecx = 0; \ emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1298,6 +1298,7 @@ static bool vcpu_has( #define vcpu_has_sse2() vcpu_has( 1, EDX, 26, ctxt, ops) #define vcpu_has_sse3() vcpu_has( 1, ECX, 0, ctxt, ops) #define vcpu_has_cx16() vcpu_has( 1, ECX, 13, ctxt, ops) +#define vcpu_has_sse4_1() vcpu_has( 1, ECX, 19, ctxt, ops) #define vcpu_has_sse4_2() vcpu_has( 1, ECX, 20, ctxt, ops) #define vcpu_has_movbe() vcpu_has( 1, ECX, 22, ctxt, ops) #define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops) @@ -1305,6 +1306,7 @@ static bool vcpu_has( #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops) #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX, 7, ctxt, ops) #define vcpu_has_bmi1() vcpu_has( 7, EBX, 3, ctxt, ops) +#define vcpu_has_avx2() vcpu_has( 7, EBX, 5, ctxt, ops) #define vcpu_has_hle() vcpu_has( 7, EBX, 4, ctxt, ops) #define vcpu_has_rtm() vcpu_has( 7, EBX, 11, ctxt, ops) #define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops) @@ -5005,6 +5007,7 @@ x86_emulate( case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */ /* vmovq r/m64,xmm */ case X86EMUL_OPC(0x0f, 0x6f): /* movq mm/m64,mm */ + movdqa: case X86EMUL_OPC_66(0x0f, 0x6f): /* movdqa xmm/m128,xmm */ case X86EMUL_OPC_F3(0x0f, 0x6f): /* movdqu xmm/m128,xmm */ case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa xmm/m128,xmm */ @@ -5442,6 +5445,22 @@ x86_emulate( } break; + case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */ + case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa m128,xmm */ + /* vmovntdqa m256,ymm */ + fail_if(ea.type != OP_MEM); + /* Ignore the non-temporal hint for now, using movdqa instead. */ + b = 0x6f; + if ( !vex.opcx ) + vcpu_must_have(sse4_1); + else + { + vex.opcx = vex_0f; + if ( vex.l ) + vcpu_must_have(avx2); + } + goto movdqa; + case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */ case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */ vcpu_must_have(movbe);