[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/3] x86emul: support {,V}MOVNTDQA



Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1608,6 +1608,74 @@ int main(int argc, char **argv)
             goto fail;
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movntdqa 16(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse4_1 )
+    {
+        decl_insn(movntdqa);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(movntdqa, "movntdqa 16(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(movntdqa);
+        memset(res, 0x55, 64);
+        memset(res + 4, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movntdqa) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx2 )
+    {
+        decl_insn(vmovntdqa);
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n"
+                       put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4")
+                       :: "c" (NULL) );
+#else
+        asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n"
+                       put_insn(vmovntdqa,
+                                ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") );
+#endif
+
+        set_insn(vmovntdqa);
+        memset(res, 0x55, 96);
+        memset(res + 8, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 8);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovntdqa) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
               "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
               "vpmovmskb %%ymm0, %0" : "=r" (rc) );
 #else
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -77,6 +77,12 @@ static int cpuid(
     (ecx & (1U << 0)) != 0; \
 })
 
+#define cpu_has_sse4_1 ({ \
+    unsigned int eax = 1, ecx = 0; \
+    emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+    (ecx & (1U << 19)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     unsigned int eax = 1, ecx = 0; \
     emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1298,6 +1298,7 @@ static bool vcpu_has(
 #define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
 #define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
+#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
@@ -1305,6 +1306,7 @@ static bool vcpu_has(
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
+#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
@@ -5005,6 +5007,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
                                          /* vmovq r/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
+    movdqa:
     case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
     case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa xmm/m128,xmm */
@@ -5442,6 +5445,22 @@ x86_emulate(
         }
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa m128,xmm */
+                                           /* vmovntdqa m256,ymm */
+        fail_if(ea.type != OP_MEM);
+        /* Ignore the non-temporal hint for now, using movdqa instead. */
+        b = 0x6f;
+        if ( !vex.opcx )
+            vcpu_must_have(sse4_1);
+        else
+        {
+            vex.opcx = vex_0f;
+            if ( vex.l )
+                vcpu_must_have(avx2);
+        }
+        goto movdqa;
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);


Attachment: x86emul-MOVNTDQA.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.