[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 09/11] x86emul: handle AVX512-FP16 floating point conversion insns


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Wed, 15 Jun 2022 12:30:50 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=k2dOvs6g6kpvHhFE2Cq6miTN2SdtjeNE6N4Fh2/FJ04=; b=Q3JUbM4c/zh9J4OIqT5nY/7nWOP4SlMe0Fx3s/q7pr6nHe1C8o6EqXk+60TPR0Zkrffi2RDSBkCoJITgpgq9nMa9+vIET3sfI3b5qUlZUzbPAHiqbJgNOHNi3xcRuOqvAJ56ObSE51LJZL16643UAOMoAptPLgEmKJyTPAddBKNAp6K90PLZrYmzzHa4flSCQ1fenKK+I9x0gPEH508yj7dvbl9R5qidKgqb9qPJ5OgnbDXb+4C3ku7Szj39MG8S8JUV7JEvg2Np1/LfGPKAq41WUrwS6HgoAIESsRCAdYMpP0q5wlroNwo2PLXBKUjBEsg1UIgTLwg2M80DaywgpQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=DE0NnyhkLT+KxeYdCD6VvVynLmARSPYuxsy2X0bbwIPJrypbjKwYjUZQKcf37uka9z7chsfR8oJ6KrsAAdRhIaqQBG+TkVm9tpQhivpo4ESCCUUe+We8LloTiooCX9ez33ujcuq1iSyJSt41GuoPuc4vAqLXJNwuCUI28uG2h+bUc7OW0AaWlSDEXS9cNbsQgvgmVKqqv9nIXekrfNJV+51Hin3FWLMW7l4NK6JYBzLFKX1VtKX32//K1NaOXm4DkEozXY46tHhsVbxJuwOu0qkObG5LQpQe3mG87utKT3Z6mhs3Fk54hFyG7zOZO+jMYKYqvpfc8Ug3fh3sWKZ4TA==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Wed, 15 Jun 2022 10:31:27 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -612,8 +612,16 @@ static const struct test avx512_fp16_all
     INSN(cmpph,           , 0f3a, c2,    vl, fp16, vl),
     INSN(cmpsh,         f3, 0f3a, c2,    el, fp16, el),
     INSN(comish,          , map5, 2f,    el, fp16, el),
+    INSN(cvtpd2ph,      66, map5, 5a,    vl,    q, vl),
+    INSN(cvtph2pd,        , map5, 5a,  vl_4, fp16, vl),
+    INSN(cvtph2psx,     66, map6, 13,  vl_2, fp16, vl),
     INSN(cvtph2uw,        , map5, 7d,    vl, fp16, vl),
     INSN(cvtph2w,       66, map5, 7d,    vl, fp16, vl),
+    INSN(cvtps2phx,     66, map5, 1d,    vl,    d, vl),
+    INSN(cvtsd2sh,      f2, map5, 5a,    el,    q, el),
+    INSN(cvtsh2sd,      f3, map5, 5a,    el, fp16, el),
+    INSN(cvtsh2ss,        , map6, 13,    el, fp16, el),
+    INSN(cvtss2sh,        , map5, 1d,    el,    d, el),
     INSN(cvttph2uw,       , map5, 7c,    vl, fp16, vl),
     INSN(cvttph2w,      66, map5, 7c,    vl, fp16, vl),
     INSN(cvtuw2ph,      f2, map5, 7d,    vl, fp16, vl),
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2031,6 +2031,8 @@ static const struct evex {
 }, evex_map5[] = {
     { { 0x10 }, 2, T, R, pfx_f3, W0, LIG }, /* vmovsh */
     { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */
+    { { 0x1d }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2phx */
+    { { 0x1d }, 2, T, R, pfx_no, W0, LIG }, /* vcvtss2sh */
     { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
     { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
     { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
@@ -2039,6 +2041,10 @@ static const struct evex {
     { { 0x58 }, 2, T, R, pfx_f3, W0, LIG }, /* vaddsh */
     { { 0x59 }, 2, T, R, pfx_no, W0, Ln }, /* vmulph */
     { { 0x59 }, 2, T, R, pfx_f3, W0, LIG }, /* vmulsh */
+    { { 0x5a }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2pd */
+    { { 0x5a }, 2, T, R, pfx_66, W1, Ln }, /* vcvtpd2ph */
+    { { 0x5a }, 2, T, R, pfx_f3, W0, LIG }, /* vcvtsh2sd */
+    { { 0x5a }, 2, T, R, pfx_f2, W1, LIG }, /* vcvtsd2sh */
     { { 0x5c }, 2, T, R, pfx_no, W0, Ln }, /* vsubph */
     { { 0x5c }, 2, T, R, pfx_f3, W0, LIG }, /* vsubsh */
     { { 0x5d }, 2, T, R, pfx_no, W0, Ln }, /* vminph */
@@ -2056,6 +2062,8 @@ static const struct evex {
     { { 0x7d }, 2, T, R, pfx_f2, W0, Ln }, /* vcvtuwph */
     { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */
 }, evex_map6[] = {
+    { { 0x13 }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2psx */
+    { { 0x13 }, 2, T, R, pfx_no, W0, LIG }, /* vcvtsh2ss */
     { { 0x2c }, 2, T, R, pfx_66, W0, Ln }, /* vscalefph */
     { { 0x2d }, 2, T, R, pfx_66, W0, LIG }, /* vscalefsh */
     { { 0x42 }, 2, T, R, pfx_66, W0, Ln }, /* vgetexpph */
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -224,7 +224,9 @@ static const struct twobyte_table {
     [0x14 ... 0x15] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
     [0x16] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, 3 },
     [0x17] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
-    [0x18 ... 0x1f] = { ImplicitOps|ModRM },
+    [0x18 ... 0x1c] = { ImplicitOps|ModRM },
+    [0x1d] = { ImplicitOps|ModRM, simd_none, d8s_vl },
+    [0x1e ... 0x1f] = { ImplicitOps|ModRM },
     [0x20 ... 0x21] = { DstMem|SrcImplicit|ModRM },
     [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
     [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
@@ -1474,6 +1476,19 @@ int x86emul_decode(struct x86_emulate_st
                     s->fp16 = true;
                 break;
 
+            case 0x1d: /* vcvtps2phx / vcvtss2sh */
+                if ( s->evex.pfx & VEX_PREFIX_SCALAR_MASK )
+                    break;
+                d = DstReg | SrcMem;
+                if ( s->evex.pfx & VEX_PREFIX_DOUBLE_MASK )
+                {
+                    s->simd_size = simd_packed_fp;
+                    d |= TwoOp;
+                }
+                else
+                    s->simd_size = simd_scalar_vexw;
+                break;
+
             case 0x2e: case 0x2f: /* v{,u}comish */
                 if ( !s->evex.pfx )
                     s->fp16 = true;
@@ -1497,6 +1512,15 @@ int x86emul_decode(struct x86_emulate_st
             }
 
             disp8scale = decode_disp8scale(twobyte_table[b].d8s, s);
+
+            switch ( b )
+            {
+            case 0x5a: /* vcvtph2pd needs special casing */
+                if ( !s->evex.pfx && !s->evex.brs )
+                    disp8scale -= 2;
+                break;
+            }
+
             break;
 
         case ext_map6:
@@ -1513,6 +1537,17 @@ int x86emul_decode(struct x86_emulate_st
                     s->fp16 = true;
                 break;
 
+            case 0x13: /* vcvtph2psx / vcvtsh2ss */
+                if ( s->evex.pfx & VEX_PREFIX_SCALAR_MASK )
+                    break;
+                s->fp16 = true;
+                if ( !(s->evex.pfx & VEX_PREFIX_DOUBLE_MASK) )
+                {
+                    s->simd_size = simd_scalar_vexw;
+                    d &= ~TwoOp;
+                }
+                break;
+
             case 0x56: case 0x57: /* vf{,c}maddc{p,s}h */
             case 0xd6: case 0xd7: /* vf{,c}mulc{p,s}h */
                 break;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7780,14 +7780,25 @@ x86_emulate(
         generate_exception_if(evex.w, EXC_UD);
         goto avx512f_all_fp;
 
+    CASE_SIMD_ALL_FP(_EVEX, 5, 0x5a):  /* vcvtp{h,d}2p{h,d} 
[xyz]mm/mem,[xyz]mm{k} */
+                                       /* vcvts{h,d}2s{h,d} xmm/mem,xmm,xmm{k} 
*/
+        host_and_vcpu_must_have(avx512_fp16);
+        if ( vex.pfx & VEX_PREFIX_SCALAR_MASK )
+            d &= ~TwoOp;
+        op_bytes = 2 << (((evex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + 
evex.lr) +
+                         2 * evex.w);
+        goto avx512f_all_fp;
+
     case X86EMUL_OPC_EVEX   (5, 0x7c): /* vcvttph2uw [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(5, 0x7c): /* vcvttph2w [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX   (5, 0x7d): /* vcvtph2uw [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(5, 0x7d): /* vcvtph2w [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_F3(5, 0x7d): /* vcvtw2ph [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_F2(5, 0x7d): /* vcvtuw2ph [xyz]mm/mem,[xyz]mm{k} */
-        op_bytes = 16 << evex.lr;
+    case X86EMUL_OPC_EVEX_66(6, 0x13): /* vcvtph2psx [xy]mm/mem,[xyz]mm{k} */
+        op_bytes = 8 << ((ext == ext_map5) + evex.lr);
         /* fall through */
+    case X86EMUL_OPC_EVEX_66(5, 0x1d): /* vcvtps2phx [xyz]mm/mem,[xy]mm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x2c): /* vscalefph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x42): /* vgetexpph [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x96): /* vfmaddsub132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
@@ -7814,6 +7825,8 @@ x86_emulate(
             avx512_vlen_check(false);
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX(5, 0x1d):    /* vcvtss2sh xmm/mem,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX(6, 0x13):    /* vcvtsh2ss xmm/mem,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x2d): /* vscalefsh xmm/m16,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x43): /* vgetexpsh xmm/m16,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(6, 0x99): /* vfmadd132sh xmm/m16,xmm,xmm{k} */




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.