[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] x86emul: support AVX-VNNI



These are VEX-encoded equivalents of the EVEX-encoded AVX512-VNNI ISA
extension.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
SDE: -spr

--- a/tools/libs/light/libxl_cpuid.c
+++ b/tools/libs/light/libxl_cpuid.c
@@ -226,6 +226,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
         {"core-caps",    0x00000007,  0, CPUID_REG_EDX, 30,  1},
         {"ssbd",         0x00000007,  0, CPUID_REG_EDX, 31,  1},
 
+        {"avx-vnni",     0x00000007,  1, CPUID_REG_EAX,  4,  1},
         {"avx512-bf16",  0x00000007,  1, CPUID_REG_EAX,  5,  1},
 
         {"lahfsahf",     0x80000001, NA, CPUID_REG_ECX,  0,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -175,7 +175,7 @@ static const char *const str_7d0[32] =
 
 static const char *const str_7a1[32] =
 {
-    /* 4 */                 [ 5] = "avx512-bf16",
+    [ 4] = "avx-vnni",      [ 5] = "avx512-bf16",
 };
 
 static const struct {
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -1335,6 +1335,10 @@ static const struct vex {
     { { 0x45 }, 2, T, R, pfx_66, Wn, Ln }, /* vpsrlv{d,q} */
     { { 0x46 }, 2, T, R, pfx_66, W0, Ln }, /* vpsravd */
     { { 0x47 }, 2, T, R, pfx_66, Wn, Ln }, /* vpsllv{d,q} */
+    { { 0x50 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpbusd */
+    { { 0x51 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpbusds */
+    { { 0x52 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpwssd */
+    { { 0x53 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpwssds */
     { { 0x58 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastd */
     { { 0x59 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastq */
     { { 0x5a }, 2, F, R, pfx_66, W0, L1 }, /* vbroadcasti128 */
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -5028,6 +5028,61 @@ int main(int argc, char **argv)
         printf("okay\n");
     }
 
+    printf("%-40s", "Testing vpdpwssd (%ecx),%{y,z}mmA,%{y,z}mmB...");
+    if ( stack_exec && cpu_has_avx512_vnni && cpu_has_avx_vnni )
+    {
+        /* Do the same operation two ways and compare the results. */
+        decl_insn(vpdpwssd_vex1);
+        decl_insn(vpdpwssd_vex2);
+        decl_insn(vpdpwssd_evex);
+
+        for ( i = 0; i < 24; ++i )
+            res[i] = i | (~i << 16);
+
+        asm volatile ( "vmovdqu32 32(%0), %%zmm1\n\t"
+                       "vextracti64x4 $1, %%zmm1, %%ymm2\n\t"
+                       "vpxor %%xmm0, %%xmm0, %%xmm3\n\t"
+                       "vpxor %%xmm0, %%xmm0, %%xmm4\n\t"
+                       "vpxor %%xmm0, %%xmm0, %%xmm5\n"
+                       put_insn(vpdpwssd_vex1,
+                                /* %{vex%} vpdpwssd (%1), %%ymm1, %%ymm3" */
+                                ".byte 0xc4, 0xe2, 0x75, 0x52, 0x19") "\n"
+                       put_insn(vpdpwssd_vex2,
+                                /* "%{vex%} vpdpwssd 32(%1), %%ymm2, %%ymm4" */
+                                ".byte 0xc4, 0xe2, 0x6d, 0x52, 0x61, 0x20") 
"\n"
+                       put_insn(vpdpwssd_evex,
+                                /* "vpdpwssd (%1), %%zmm1, %%zmm5" */
+                                ".byte 0x62, 0xf2, 0x75, 0x48, 0x52, 0x29")
+                       :: "r" (res), "c" (NULL) );
+
+        set_insn(vpdpwssd_vex1);
+        regs.ecx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vpdpwssd_vex1) )
+            goto fail;
+
+        set_insn(vpdpwssd_vex2);
+        regs.ecx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vpdpwssd_vex2) )
+            goto fail;
+
+        set_insn(vpdpwssd_evex);
+        regs.ecx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vpdpwssd_evex) )
+            goto fail;
+
+        asm ( "vinserti64x4 $1, %%ymm4, %%zmm3, %%zmm0\n\t"
+              "vpcmpeqd %%zmm0, %%zmm5, %%k0\n\t"
+              "kmovw %%k0, %0" : "=g" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing invpcid 16(%ecx),%%edx...");
     if ( stack_exec )
     {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -170,6 +170,7 @@ static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
 #define cpu_has_avx512_vp2intersect (cp.feat.avx512_vp2intersect && 
xcr0_mask(0xe6))
 #define cpu_has_serialize  cp.feat.serialize
+#define cpu_has_avx_vnni   (cp.feat.avx_vnni && xcr0_mask(6))
 #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
 
 #define cpu_has_xgetbv1   (cpu_has_xsave && cp.xstate.xgetbv1)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2008,6 +2008,7 @@ amd_like(const struct x86_emulate_ctxt *
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
 #define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
 #define vcpu_has_serialize()   (ctxt->cpuid->feat.serialize)
+#define vcpu_has_avx_vnni()    (ctxt->cpuid->feat.avx_vnni)
 #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
 
 #define vcpu_must_have(feat) \
@@ -9453,6 +9454,14 @@ x86_emulate(
         generate_exception_if(vex.l, EXC_UD);
         goto simd_0f_avx;
 
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x50): /* vpdpbusd 
[xy]mm/mem,[xy]mm,[xy]mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x51): /* vpdpbusds 
[xy]mm/mem,[xy]mm,[xy]mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x52): /* vpdpwssd 
[xy]mm/mem,[xy]mm,[xy]mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x53): /* vpdpwssds 
[xy]mm/mem,[xy]mm,[xy]mm */
+        host_and_vcpu_must_have(avx_vnni);
+        generate_exception_if(vex.w, EXC_UD);
+        goto simd_0f_avx;
+
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x50): /* vpdpbusd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x51): /* vpdpbusds 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x52): /* vpdpwssd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -133,6 +133,7 @@
 #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
 
 /* CPUID level 0x00000007:1.eax */
+#define cpu_has_avx_vnni        boot_cpu_has(X86_FEATURE_AVX_VNNI)
 #define cpu_has_avx512_bf16     boot_cpu_has(X86_FEATURE_AVX512_BF16)
 
 /* Synthesized. */
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -273,6 +273,7 @@ XEN_CPUFEATURE(CORE_CAPS,     9*32+30) /
 XEN_CPUFEATURE(SSBD,          9*32+31) /*A  MSR_SPEC_CTRL.SSBD available */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */
+XEN_CPUFEATURE(AVX_VNNI,     10*32+ 4) /*A  AVX-VNNI Instructions */
 XEN_CPUFEATURE(AVX512_BF16,  10*32+ 5) /*A  AVX512 BFloat16 Instructions */
 
 #endif /* XEN_CPUFEATURE */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -252,7 +252,7 @@ def crunch_numbers(state):
         # feature flags.  If want to use AVX512, AVX2 must be supported and
         # enabled.  Certain later extensions, acting on 256-bit vectors of
         # integers, better depend on AVX2 than AVX.
-        AVX2: [AVX512F, VAES, VPCLMULQDQ],
+        AVX2: [AVX512F, VAES, VPCLMULQDQ, AVX_VNNI],
 
         # AVX512F is taken to mean hardware support for 512bit registers
         # (which in practice depends on the EVEX prefix to encode) as well



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.