Xen project Mailing List

Re: [Xen-devel] [PATCH 04/17] x86emul: support F16C insns

From: George Dunlap <george.dunlap@xxxxxxxxxx>

Date: Thu, 14 Sep 2017 10:13:52 +0100

Cc: xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Delivery-date: Thu, 14 Sep 2017 09:14:23 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Wed, Sep 13, 2017 at 6:10 PM, George Dunlap <george.dunlap@xxxxxxxxxx> wrote: > On Wed, Jun 21, 2017 at 1:01 PM, Jan Beulich <JBeulich@xxxxxxxx> wrote: >> Note that this avoids emulating the behavior of VCVTPS2PH found on at >> least some Intel CPUs, which update MXCSR even when the memory write >> faults. >> >> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> >> >> --- a/tools/tests/x86_emulator/test_x86_emulator.c >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c >> @@ -3028,6 +3028,47 @@ int main(int argc, char **argv) >> printf("skipped\n"); >> #endif >> >> + printf("%-40s", "Testing vcvtph2ps (%ecx),%ymm1..."); >> + if ( stack_exec && cpu_has_f16c ) >> + { >> + decl_insn(vcvtph2ps); >> + decl_insn(vcvtps2ph); >> + >> + asm volatile ( "vxorps %%xmm1, %%xmm1, %%xmm1\n" >> + put_insn(vcvtph2ps, "vcvtph2ps (%0), %%ymm1") >> + :: "c" (NULL) ); >> + >> + set_insn(vcvtph2ps); >> + res[1] = 0x40003c00; /* (1.0, 2.0) */ >> + res[2] = 0x44004200; /* (3.0, 4.0) */ >> + res[3] = 0x3400b800; /* (-.5, .25) */ >> + res[4] = 0xbc000000; /* (0.0, -1.) */ >> + memset(res + 5, 0xff, 16); >> + regs.ecx = (unsigned long)(res + 1); >> + rc = x86_emulate(&ctxt, &emulops); >> + asm volatile ( "vmovups %%ymm1, %0" : "=m" (res[16]) ); >> + if ( rc != X86EMUL_OKAY || !check_eip(vcvtph2ps) ) >> + goto fail; >> + printf("okay\n"); >> + >> + printf("%-40s", "Testing vcvtps2ph $0,%ymm1,(%edx)..."); >> + asm volatile ( "vmovups %0, %%ymm1\n" >> + put_insn(vcvtps2ph, "vcvtps2ph $0, %%ymm1, (%1)") >> + :: "m" (res[16]), "d" (NULL) ); >> + >> + set_insn(vcvtps2ph); >> + memset(res + 7, 0, 32); >> + regs.edx = (unsigned long)(res + 7); >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY || !check_eip(vcvtps2ph) || >> + memcmp(res + 1, res + 7, 16) || >> + res[11] || res[12] || res[13] || res[14] ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> #undef decl_insn >> #undef put_insn >> #undef set_insn >> --- a/tools/tests/x86_emulator/x86_emulate.h >> +++ b/tools/tests/x86_emulator/x86_emulate.h >> @@ -127,6 +127,14 @@ static inline uint64_t xgetbv(uint32_t x >> (res.c & (1U << 28)) != 0; \ >> }) >> >> +#define cpu_has_f16c ({ \ >> + struct cpuid_leaf res; \ >> + emul_test_cpuid(1, 0, &res, NULL); \ >> + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> + res.c = 0; \ >> + (res.c & (1U << 29)) != 0; \ >> +}) >> + >> #define cpu_has_avx2 ({ \ >> struct cpuid_leaf res; \ >> emul_test_cpuid(1, 0, &res, NULL); \ >> --- a/xen/arch/x86/x86_emulate/x86_emulate.c >> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c >> @@ -369,6 +369,7 @@ static const struct { >> [0x00 ... 0x0b] = { .simd_size = simd_packed_int }, >> [0x0c ... 0x0f] = { .simd_size = simd_packed_fp }, >> [0x10] = { .simd_size = simd_packed_int }, >> + [0x13] = { .simd_size = simd_other, .two_op = 1 }, >> [0x14 ... 0x15] = { .simd_size = simd_packed_fp }, >> [0x17] = { .simd_size = simd_packed_int, .two_op = 1 }, >> [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 }, >> @@ -411,6 +412,7 @@ static const struct { >> [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 }, >> [0x18] = { .simd_size = simd_128 }, >> [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, >> + [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 }, >> [0x20] = { .simd_size = simd_none }, >> [0x21] = { .simd_size = simd_other }, >> [0x22] = { .simd_size = simd_none }, >> @@ -1601,6 +1603,7 @@ static bool vcpu_has( >> #define vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops) >> #define vcpu_has_aesni() vcpu_has( 1, ECX, 25, ctxt, ops) >> #define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops) >> +#define vcpu_has_f16c() vcpu_has( 1, ECX, 29, ctxt, ops) >> #define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops) >> #define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \ >> vcpu_has_sse()) >> @@ -7216,6 +7219,12 @@ x86_emulate( >> host_and_vcpu_must_have(sse4_1); >> goto simd_0f38_common; >> >> + case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */ >> + generate_exception_if(vex.w, EXC_UD); >> + host_and_vcpu_must_have(f16c); >> + op_bytes = 8 << vex.l; >> + goto simd_0f_ymm; >> + >> case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */ >> case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */ >> case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */ >> @@ -7607,6 +7616,50 @@ x86_emulate( >> opc = init_prefixes(stub); >> goto pextr; >> >> + case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph >> $imm8,{x,y}mm,xmm/mem */ > > On the whole this stanza looks plausible; just a few comments... > >> + { >> + uint32_t mxcsr; >> + >> + generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD); >> + host_and_vcpu_must_have(f16c); >> + fail_if(!ops->write); >> + >> + opc = init_prefixes(stub); >> + opc[0] = b; >> + opc[1] = modrm; >> + if ( ea.type == OP_MEM ) >> + { >> + /* Convert memory operand to (%rAX). */ >> + vex.b = 1; >> + opc[1] &= 0x38; >> + } > > First of all, I'm not a fan of modifying the actual decoded vex prefix > here. I realize that as it happens vex.b won't be read again; but it > just seems like risky behavior. Would it make more sense to make a > copy of vex and modify that? > > Secondly, the logic here is confusing: you first copy the decoded > modrm byte, then modify one bit of the decoded vex prefix, then modify > the copied modrm byte, then (below) copy the (potentially) modified > vex prefix. > > It seems like it would be a lot more clear to have the if before the > init_prefixes, and have it look like this: > > if ( ea.type == OP_MEM ) > { > /* Convert blah */ > vex.b = 1; > modrm &= 0x38 > } I see now -- you're using a construct that is common all over the code. I think the construct could probably use changing, but currently for readability it's probably better to follow suit. -George _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.