[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 04/17] x86emul: support F16C insns



On Wed, Sep 13, 2017 at 6:10 PM, George Dunlap <george.dunlap@xxxxxxxxxx> wrote:
> On Wed, Jun 21, 2017 at 1:01 PM, Jan Beulich <JBeulich@xxxxxxxx> wrote:
>> Note that this avoids emulating the behavior of VCVTPS2PH found on at
>> least some Intel CPUs, which update MXCSR even when the memory write
>> faults.
>>
>> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>>
>> --- a/tools/tests/x86_emulator/test_x86_emulator.c
>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
>> @@ -3028,6 +3028,47 @@ int main(int argc, char **argv)
>>          printf("skipped\n");
>>  #endif
>>
>> +    printf("%-40s", "Testing vcvtph2ps (%ecx),%ymm1...");
>> +    if ( stack_exec && cpu_has_f16c )
>> +    {
>> +        decl_insn(vcvtph2ps);
>> +        decl_insn(vcvtps2ph);
>> +
>> +        asm volatile ( "vxorps %%xmm1, %%xmm1, %%xmm1\n"
>> +                       put_insn(vcvtph2ps, "vcvtph2ps (%0), %%ymm1")
>> +                       :: "c" (NULL) );
>> +
>> +        set_insn(vcvtph2ps);
>> +        res[1] = 0x40003c00; /* (1.0, 2.0) */
>> +        res[2] = 0x44004200; /* (3.0, 4.0) */
>> +        res[3] = 0x3400b800; /* (-.5, .25) */
>> +        res[4] = 0xbc000000; /* (0.0, -1.) */
>> +        memset(res + 5, 0xff, 16);
>> +        regs.ecx = (unsigned long)(res + 1);
>> +        rc = x86_emulate(&ctxt, &emulops);
>> +        asm volatile ( "vmovups %%ymm1, %0" : "=m" (res[16]) );
>> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtph2ps) )
>> +            goto fail;
>> +        printf("okay\n");
>> +
>> +        printf("%-40s", "Testing vcvtps2ph $0,%ymm1,(%edx)...");
>> +        asm volatile ( "vmovups %0, %%ymm1\n"
>> +                       put_insn(vcvtps2ph, "vcvtps2ph $0, %%ymm1, (%1)")
>> +                       :: "m" (res[16]), "d" (NULL) );
>> +
>> +        set_insn(vcvtps2ph);
>> +        memset(res + 7, 0, 32);
>> +        regs.edx = (unsigned long)(res + 7);
>> +        rc = x86_emulate(&ctxt, &emulops);
>> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtps2ph) ||
>> +             memcmp(res + 1, res + 7, 16) ||
>> +             res[11] || res[12] || res[13] || res[14] )
>> +            goto fail;
>> +        printf("okay\n");
>> +    }
>> +    else
>> +        printf("skipped\n");
>> +
>>  #undef decl_insn
>>  #undef put_insn
>>  #undef set_insn
>> --- a/tools/tests/x86_emulator/x86_emulate.h
>> +++ b/tools/tests/x86_emulator/x86_emulate.h
>> @@ -127,6 +127,14 @@ static inline uint64_t xgetbv(uint32_t x
>>      (res.c & (1U << 28)) != 0; \
>>  })
>>
>> +#define cpu_has_f16c ({ \
>> +    struct cpuid_leaf res; \
>> +    emul_test_cpuid(1, 0, &res, NULL); \
>> +    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
>> +        res.c = 0; \
>> +    (res.c & (1U << 29)) != 0; \
>> +})
>> +
>>  #define cpu_has_avx2 ({ \
>>      struct cpuid_leaf res; \
>>      emul_test_cpuid(1, 0, &res, NULL); \
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -369,6 +369,7 @@ static const struct {
>>      [0x00 ... 0x0b] = { .simd_size = simd_packed_int },
>>      [0x0c ... 0x0f] = { .simd_size = simd_packed_fp },
>>      [0x10] = { .simd_size = simd_packed_int },
>> +    [0x13] = { .simd_size = simd_other, .two_op = 1 },
>>      [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
>>      [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
>>      [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 },
>> @@ -411,6 +412,7 @@ static const struct {
>>      [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 },
>>      [0x18] = { .simd_size = simd_128 },
>>      [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
>> +    [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 },
>>      [0x20] = { .simd_size = simd_none },
>>      [0x21] = { .simd_size = simd_other },
>>      [0x22] = { .simd_size = simd_none },
>> @@ -1601,6 +1603,7 @@ static bool vcpu_has(
>>  #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
>>  #define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)
>>  #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
>> +#define vcpu_has_f16c()        vcpu_has(         1, ECX, 29, ctxt, ops)
>>  #define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
>>  #define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
>>                                 vcpu_has_sse())
>> @@ -7216,6 +7219,12 @@ x86_emulate(
>>          host_and_vcpu_must_have(sse4_1);
>>          goto simd_0f38_common;
>>
>> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */
>> +        generate_exception_if(vex.w, EXC_UD);
>> +        host_and_vcpu_must_have(f16c);
>> +        op_bytes = 8 << vex.l;
>> +        goto simd_0f_ymm;
>> +
>>      case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
>>      case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
>>      case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
>> @@ -7607,6 +7616,50 @@ x86_emulate(
>>          opc = init_prefixes(stub);
>>          goto pextr;
>>
>> +    case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph 
>> $imm8,{x,y}mm,xmm/mem */
>
> On the whole this stanza looks plausible; just a few comments...
>
>> +    {
>> +        uint32_t mxcsr;
>> +
>> +        generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD);
>> +        host_and_vcpu_must_have(f16c);
>> +        fail_if(!ops->write);
>> +
>> +        opc = init_prefixes(stub);
>> +        opc[0] = b;
>> +        opc[1] = modrm;
>> +        if ( ea.type == OP_MEM )
>> +        {
>> +            /* Convert memory operand to (%rAX). */
>> +            vex.b = 1;
>> +            opc[1] &= 0x38;
>> +        }
>
> First of all, I'm not a fan of modifying the actual decoded vex prefix
> here.  I realize that as it happens vex.b won't be read again; but it
> just seems like risky behavior.  Would it make more sense to make a
> copy of vex and modify that?
>
> Secondly, the logic here is confusing: you first copy the decoded
> modrm byte, then modify one bit of the decoded vex prefix, then modify
> the copied modrm byte, then (below) copy the (potentially) modified
> vex prefix.
>
> It seems like it would be a lot more clear to have the if before the
> init_prefixes, and have it look like this:
>
> if ( ea.type == OP_MEM )
> {
>   /* Convert blah */
>   vex.b = 1;
>   modrm &= 0x38
> }

I see now -- you're using a construct that is common all over the code.

I think the construct could probably use changing, but currently for
readability it's probably better to follow suit.

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.