x86emul: support MOVDIR{I,64B} insns Note that the ISA extensions document revision 035 doesn't specify exception behavior for ModRM.mod != 0b11; assuming #UD here. Signed-off-by: Jan Beulich --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -548,6 +548,8 @@ static const struct ext0f38_table { [0xf1] = { .to_mem = 1, .two_op = 1 }, [0xf2 ... 0xf3] = {}, [0xf5 ... 0xf7] = {}, + [0xf8] = { .simd_size = simd_other }, + [0xf9] = { .to_mem = 1 }, }; /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */ @@ -1902,6 +1904,8 @@ in_protmode( #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg) #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq) #define vcpu_has_rdpid() (ctxt->cpuid->feat.rdpid) +#define vcpu_has_movdiri() (ctxt->cpuid->feat.movdiri) +#define vcpu_has_movdir64b() (ctxt->cpuid->feat.movdir64b) #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw) #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps) @@ -2693,10 +2697,12 @@ x86_decode_0f38( { case 0x00 ... 0xef: case 0xf2 ... 0xf5: - case 0xf7 ... 0xff: + case 0xf7 ... 0xf8: + case 0xfa ... 0xff: op_bytes = 0; /* fall through */ case 0xf6: /* adcx / adox */ + case 0xf9: /* movdiri */ ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); break; @@ -9896,6 +9902,32 @@ x86_emulate( : "0" ((uint32_t)src.val), "rm" (_regs.edx) ); break; + case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */ + vcpu_must_have(movdir64b); + generate_exception_if(ea.type != OP_MEM, EXC_UD); + src.val = truncate_ea(*dst.reg); + generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops), + EXC_GP, 0); + /* Ignore the non-temporal behavior for now. */ + fail_if(!ops->write); + BUILD_BUG_ON(sizeof(*mmvalp) < 64); + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64, + ctxt)) != X86EMUL_OKAY || + (rc = ops->write(x86_seg_es, src.val, mmvalp, 64, + ctxt)) != X86EMUL_OKAY ) + goto done; + state->simd_size = simd_none; + sfence = true; + break; + + case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */ + vcpu_must_have(movdiri); + generate_exception_if(dst.type != OP_MEM, EXC_UD); + /* Ignore the non-temporal behavior for now. */ + dst.val = src.val; + sfence = true; + break; + case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */ case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */ generate_exception_if(!vex.l || !vex.w, EXC_UD); --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -237,6 +237,8 @@ XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) / XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A POPCNT for vectors of DW/QW */ XEN_CPUFEATURE(RDPID, 6*32+22) /*A RDPID instruction */ XEN_CPUFEATURE(CLDEMOTE, 6*32+25) /*A CLDEMOTE instruction */ +XEN_CPUFEATURE(MOVDIRI, 6*32+27) /*A MOVDIRI instruction */ +XEN_CPUFEATURE(MOVDIR64B, 6*32+28) /*A MOVDIR64B instruction */ /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */ XEN_CPUFEATURE(ITSC, 7*32+ 8) /* Invariant TSC */ --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -2196,6 +2196,36 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); + printf("%-40s", "Testing movdiri %edx,(%ecx)..."); + instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf9; instr[3] = 0x11; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)memset(res, -1, 16); + regs.edx = 0x44332211; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (regs.eip != (unsigned long)&instr[4]) || + res[0] != 0x44332211 || ~res[1] ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing movdir64b 144(%edx),%ecx..."); + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8; + instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0; + regs.eip = (unsigned long)&instr[0]; + for ( i = 0; i < 64; ++i ) + res[i] = i - 20; + regs.edx = (unsigned long)res; + regs.ecx = (unsigned long)(res + 16); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (regs.eip != (unsigned long)&instr[9]) || + res[15] != -5 || res[32] != 12 ) + goto fail; + for ( i = 16; i < 32; ++i ) + if ( res[i] != i ) + goto fail; + printf("okay\n"); + printf("%-40s", "Testing movq %mm3,(%ecx)..."); if ( stack_exec && cpu_has_mmx ) { --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -76,6 +76,8 @@ bool emul_test_init(void) cp.feat.adx = true; cp.feat.avx512pf = cp.feat.avx512f; cp.feat.rdpid = true; + cp.feat.movdiri = true; + cp.feat.movdir64b = true; cp.extd.clzero = true; if ( cpu_has_xsave ) @@ -137,15 +139,15 @@ int emul_test_cpuid( res->c |= 1U << 22; /* - * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch - * insns, so we can always run the respective tests. + * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIR* nor the S/G + * prefetch insns, so we can always run the respective tests. */ if ( leaf == 7 && subleaf == 0 ) { res->b |= (1U << 10) | (1U << 19); if ( res->b & (1U << 16) ) res->b |= 1U << 26; - res->c |= 1U << 22; + res->c |= (1U << 22) | (1U << 27) | (1U << 28); } /*