[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 06/11] x86emul: handle AVX512-FP16 Map6 misc insns


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Wed, 15 Jun 2022 12:29:20 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=HjhcNK1/AAtI6oe/nZgHbpEji5E8JjehJMuPbAPTZnI=; b=SsV/3Mb+y3QW+2aejVkY5OS/te4kuVkFIVnJ+GX2ekxXabSS7oiiAR832Ut14HoqC739JEfVoPsI6eC8NyLjMyBj3FTb0Cocsc1QCA5hzrhbQELCuX1UrUpfHbU2Cacsu5W6mRf6JRcrsUU9IxCH57hIl3fQNTxRDmci/o/O3vuNROBram/01zykZl9T1PDpwAZR3LaIiY2FapihxOkeFpe3XM1+yoGrimL5KN3m1Drkq2SgBOsKRKoZW5hJ3GfE/Fi1Woa/Qe4Leky5fGE82ga+wk0kiJ1ZZ3GU3W3F8MyOhx4oHL7zG32+32Lff3ur7b19cXnHaWcaMqcfsZmh6Q==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=UmzbM9VmEtu60O6d2A6xUEjeau3ZJ+a4LFwmd+AaHwBo6woOsa6fhSDB4JiE+emcPCv3Z2aSdlVtR06ZmauCmghcaSh86bsNm++UedyjFLme9uvyNXwj/qHahy7fV5xmvzgEShbx6jqBSOY6Bu+TnhVkBP4cpmfOb35OiOIQSGNcBmv09HGzG+m+oxofD9iUEQVgIVQi7qOhtwv1Z3LYnfGbuvWGs7pxWS/ssOJuzyt5nJToFFQnMVTws1ehOk6CzsX7rMu4L4fEt0WsiZIyiGvFi8tvCV82ncMkNd/LvPptYsn4B5HzTmxVZhoRCyUOjQOK0VqXG3ZSz/biyCJ/fw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Wed, 15 Jun 2022 10:38:16 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

While, as before, this leverages that the Map6 encoding space is a very
sparse clone of the "0f38" one, switch around the simd_size overriding
for opcode 2D. This way less separate overrides are needed.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -646,6 +646,8 @@ static const struct test avx512_fp16_all
     INSN(fnmsub231sh,   66, map6, bf,    el, fp16, el),
     INSN(fpclassph,       , 0f3a, 66,    vl, fp16, vl),
     INSN(fpclasssh,       , 0f3a, 67,    el, fp16, el),
+    INSN(getexpph,      66, map6, 42,    vl, fp16, vl),
+    INSN(getexpsh,      66, map6, 43,    el, fp16, el),
     INSN(getmantph,       , 0f3a, 26,    vl, fp16, vl),
     INSN(getmantsh,       , 0f3a, 27,    el, fp16, el),
     INSN(maxph,           , map5, 5f,    vl, fp16, vl),
@@ -656,10 +658,16 @@ static const struct test avx512_fp16_all
     INSN(movsh,         f3, map5, 11,    el, fp16, el),
     INSN(mulph,           , map5, 59,    vl, fp16, vl),
     INSN(mulsh,         f3, map5, 59,    el, fp16, el),
+    INSN(rcpph,         66, map6, 4c,    vl, fp16, vl),
+    INSN(rcpsh,         66, map6, 4d,    el, fp16, el),
     INSN(reduceph,        , 0f3a, 56,    vl, fp16, vl),
     INSN(reducesh,        , 0f3a, 57,    el, fp16, el),
     INSN(rndscaleph,      , 0f3a, 08,    vl, fp16, vl),
     INSN(rndscalesh,      , 0f3a, 0a,    el, fp16, el),
+    INSN(rsqrtph,       66, map6, 4e,    vl, fp16, vl),
+    INSN(rsqrtsh,       66, map6, 4f,    el, fp16, el),
+    INSN(scalefph,      66, map6, 2c,    vl, fp16, vl),
+    INSN(scalefsh,      66, map6, 2d,    el, fp16, el),
     INSN(sqrtph,          , map5, 51,    vl, fp16, vl),
     INSN(sqrtsh,        f3, map5, 51,    el, fp16, el),
     INSN(subph,           , map5, 5c,    vl, fp16, vl),
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2050,6 +2050,14 @@ static const struct evex {
     { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */
     { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */
 }, evex_map6[] = {
+    { { 0x2c }, 2, T, R, pfx_66, W0, Ln }, /* vscalefph */
+    { { 0x2d }, 2, T, R, pfx_66, W0, LIG }, /* vscalefsh */
+    { { 0x42 }, 2, T, R, pfx_66, W0, Ln }, /* vgetexpph */
+    { { 0x43 }, 2, T, R, pfx_66, W0, LIG }, /* vgetexpsh */
+    { { 0x4c }, 2, T, R, pfx_66, W0, Ln }, /* vrcpph */
+    { { 0x4d }, 2, T, R, pfx_66, W0, LIG }, /* vrcpsh */
+    { { 0x4e }, 2, T, R, pfx_66, W0, Ln }, /* vrsqrtph */
+    { { 0x4f }, 2, T, R, pfx_66, W0, LIG }, /* vrsqrtsh */
     { { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
     { { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
     { { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -358,7 +358,7 @@ static const struct ext0f38_table {
     [0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
     [0x2b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x2c] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x2d] = { .simd_size = simd_packed_fp, .d8s = d8s_dq },
+    [0x2d] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
     [0x30] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x31] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
@@ -909,8 +909,8 @@ decode_0f38(struct x86_emulate_state *s,
         ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
         break;
 
-    case X86EMUL_OPC_EVEX_66(0, 0x2d): /* vscalefs{s,d} */
-        s->simd_size = simd_scalar_vexw;
+    case X86EMUL_OPC_VEX_66(0, 0x2d): /* vmaskmovpd */
+        s->simd_size = simd_packed_fp;
         break;
 
     case X86EMUL_OPC_EVEX_66(0, 0x7a): /* vpbroadcastb */
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7780,6 +7780,8 @@ x86_emulate(
         generate_exception_if(evex.w, EXC_UD);
         goto avx512f_all_fp;
 
+    case X86EMUL_OPC_EVEX_66(6, 0x2c): /* vscalefph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x42): /* vgetexpph [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x96): /* vfmaddsub132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x97): /* vfmsubadd132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x98): /* vfmadd132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
@@ -7804,6 +7806,8 @@ x86_emulate(
             avx512_vlen_check(false);
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX_66(6, 0x2d): /* vscalefsh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x43): /* vgetexpsh xmm/m16,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x99): /* vfmadd132sh xmm/m16,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x9b): /* vfmsub132sh xmm/m16,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x9d): /* vfnmadd132sh xmm/m16,xmm,xmm{k} */
@@ -7823,6 +7827,19 @@ x86_emulate(
             avx512_vlen_check(true);
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX_66(6, 0x4c): /* vrcpph [xyz]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x4e): /* vrsqrtph [xyz]mm/mem,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w, EXC_UD);
+        goto avx512f_no_sae;
+
+    case X86EMUL_OPC_EVEX_66(6, 0x4d): /* vrcpsh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x4f): /* vrsqrtsh xmm/m16,xmm,xmm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w || evex.brs, EXC_UD);
+        avx512_vlen_check(true);
+        goto simd_zmm;
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.