[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86: AVX instruction emulation fixes



Il 28/08/2013 10:32, Jan Beulich ha scritto:
- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M
  one as the second prefix byte
- early decoding normalized vex.reg, thus corrupting it for the main
  consumer (copy_REX_VEX()), resulting in #UD on the two-operand
  instructions we emulate

Also add respective test cases to the testing utility plus
- fix get_fpu() (the fall-through order was inverted)
- add cpu_has_avx2, even if it's currently unused (as in the new test
  cases I decided to refrain from using AVX2 instructions in order to
  be able to actually run all the tests on the hardware I have)
- slightly tweak cpu_has_avx to more consistently express the outputs
  we don't care about (sinking them all into the same variable)

This patch include the solution for full SSE support needed to solve this problem?
http://bugs.xenproject.org/xen/bug/11

Thanks for any reply.


Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x
 }
 
 #define cpu_has_avx ({ \
-    unsigned int eax = 1, ecx = 0, edx; \
-    cpuid(&eax, &edx, &ecx, &edx, NULL); \
+    unsigned int eax = 1, ecx = 0; \
+    cpuid(&eax, &eax, &ecx, &eax, NULL); \
     if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
         ecx = 0; \
     (ecx & (1U << 28)) != 0; \
 })
 
+#define cpu_has_avx2 ({ \
+    unsigned int eax = 1, ebx, ecx = 0; \
+    cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+    if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+        ebx = 0; \
+    else { \
+        eax = 7, ecx = 0; \
+        cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+    } \
+    (ebx & (1U << 5)) != 0; \
+})
+
 int get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
@@ -111,14 +123,14 @@ int get_fpu(
     {
     case X86EMUL_FPU_fpu:
         break;
-    case X86EMUL_FPU_ymm:
-        if ( cpu_has_avx )
+    case X86EMUL_FPU_mmx:
+        if ( cpu_has_mmx )
             break;
     case X86EMUL_FPU_xmm:
         if ( cpu_has_sse )
             break;
-    case X86EMUL_FPU_mmx:
-        if ( cpu_has_mmx )
+    case X86EMUL_FPU_ymm:
+        if ( cpu_has_avx )
             break;
     default:
         return X86EMUL_UNHANDLEABLE;
@@ -629,6 +641,73 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovdqu_to_mem[];
+
+        asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n"
+                       ".popsection" :: "c" (NULL) );
+
+        memcpy(instr, vmovdqu_to_mem, 15);
+        memset(res, 0x55, 128);
+        memset(res + 16, 0xff, 16);
+        memset(res + 20, 0x00, 16);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovdqu (%edx),%ymm4...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovdqu_from_mem[];
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
+#else
+        asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
+                       "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
+#endif
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n"
+                       ".popsection" :: "d" (NULL) );
+
+        memcpy(instr, vmovdqu_from_mem, 15);
+        memset(res + 4, 0xff, 16);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = 0;
+        regs.edx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( rc != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movsd %xmm5,(%ecx)...");
     memset(res, 0x77, 64);
     memset(res + 10, 0x66, 8);
@@ -683,6 +762,59 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovsd %xmm5,(%ecx)...");
+    memset(res, 0x88, 64);
+    memset(res + 10, 0x77, 8);
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovsd_to_mem[];
+
+        asm volatile ( "vbroadcastsd %0, %%ymm5\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n"
+                       ".popsection" :: "m" (res[10]), "c" (NULL) );
+
+        memcpy(instr, vmovsd_to_mem, 15);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = (unsigned long)(res + 2);
+        regs.edx    = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+    {
+        printf("skipped\n");
+        memset(res + 2, 0x77, 8);
+    }
+
+    printf("%-40s", "Testing vmovaps (%edx),%ymm7...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovaps_from_mem[];
+
+        asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovaps_from_mem: vmovaps (%0), %%ymm7\n"
+                       ".popsection" :: "d" (NULL) );
+
+        memcpy(instr, vmovaps_from_mem, 15);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = 0;
+        regs.edx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY )
+            goto fail;
+        asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t"
+              "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) );
+        if ( rc != 0xff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     for ( j = 1; j <= 2; j++ )
     {
 #if defined(__i386__)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1454,10 +1454,10 @@ x86_emulate(
                 /* VEX */
                 generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
 
-                vex.raw[0] = b;
+                vex.raw[0] = modrm;
                 if ( b & 1 )
                 {
-                    vex.raw[1] = b;
+                    vex.raw[1] = modrm;
                     vex.opcx = vex_0f;
                     vex.x = 1;
                     vex.b = 1;
@@ -1479,10 +1479,7 @@ x86_emulate(
                         }
                     }
                 }
-                vex.reg ^= 0xf;
-                if ( !mode_64bit() )
-                    vex.reg &= 0x7;
-                else if ( !vex.r )
+                if ( mode_64bit() && !vex.r )
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
@@ -3899,8 +3896,9 @@ x86_emulate(
         else
         {
             fail_if((vex.opcx != vex_0f) ||
-                    (vex.reg && ((ea.type == OP_MEM) ||
-                                 !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
+                    ((vex.reg != 0xf) &&
+                     ((ea.type == OP_MEM) ||
+                      !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
             vcpu_must_have_avx();
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
@@ -4168,7 +4166,7 @@ x86_emulate(
         }
         else
         {
-            fail_if((vex.opcx != vex_0f) || vex.reg ||
+            fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
             vcpu_must_have_avx();
             get_fpu(X86EMUL_FPU_ymm, &fic);




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.