[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 02/17] x86emul: support MMX/SSE{, 2, 3} moves



On 28/02/17 12:50, Jan Beulich wrote:
> Previously supported insns are being converted to the new model, and
> several new ones are being added.
>
> To keep the stub handling reasonably simple, integrate SET_SSE_PREFIX()
> into copy_REX_VEX(), at once switching the stubs to use an empty REX
> prefix instead of a double DS: one (no byte registers are being
> accessed, so an empty REX prefix has no effect), except (of course) for
> the 32-bit test harness build.

Why switch a %ds override to REX?  There doesn't appear to be any benefit.

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -364,11 +369,6 @@ enum vex_pfx {
>  
>  static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 };
>  
> -#define SET_SSE_PREFIX(dst, vex_pfx) do { \
> -    if ( vex_pfx ) \
> -        (dst) = sse_prefix[(vex_pfx) - 1]; \
> -} while (0)
> -
>  union vex {
>      uint8_t raw[2];
>      struct {
> @@ -383,15 +383,35 @@ union vex {
>      };
>  };
>  
> +#ifdef __x86_64__
> +# define PFX2 REX_PREFIX
> +#else
> +# define PFX2 0x3e
> +#endif
> +#define PFX_BYTES 3
> +#define init_prefixes(stub) ({ \
> +    uint8_t *buf_ = get_stub(stub); \
> +    buf_[0] = 0x3e; \
> +    buf_[1] = PFX2; \
> +    buf_[2] = 0x0f; \
> +    buf_ + 3; \
> +})
> +
>  #define copy_REX_VEX(ptr, rex, vex) do { \
>      if ( (vex).opcx != vex_none ) \
>      { \
>          if ( !mode_64bit() ) \
>              vex.reg |= 8; \
> -        ptr[0] = 0xc4, ptr[1] = (vex).raw[0], ptr[2] = (vex).raw[1]; \
> +        (ptr)[0 - PFX_BYTES] = 0xc4; \
> +        (ptr)[1 - PFX_BYTES] = (vex).raw[0]; \
> +        (ptr)[2 - PFX_BYTES] = (vex).raw[1]; \
> +    } \
> +    else \
> +    { \
> +        if ( (vex).pfx ) \
> +            (ptr)[0 - PFX_BYTES] = sse_prefix[(vex).pfx - 1]; \
> +        (ptr)[1 - PFX_BYTES] |= rex; \

This is no longer guarded by mode_64bit().  Won't this result in %ds |
rex in the 32bit test stubs?

>      } \
> -    else if ( mode_64bit() ) \
> -        ptr[1] = rex | REX_PREFIX; \
>  } while (0)
>  
>  union evex {
> @@ -5429,6 +5496,57 @@ x86_emulate(
>          singlestep = _regs._eflags & X86_EFLAGS_TF;
>          break;
>  
> +    CASE_SIMD_PACKED_FP(, 0x0f, 0x50):     /* movmskp{s,d} xmm,reg */
> +    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
> +    CASE_SIMD_PACKED_INT(0x0f, 0xd7):      /* pmovmskb {,x}mm,reg */
> +    case X86EMUL_OPC_VEX_66(0x0f, 0xd7):   /* vpmovmskb {x,y}mm,reg */
> +        generate_exception_if(ea.type != OP_REG, EXC_UD);
> +
> +        if ( vex.opcx == vex_none )
> +        {
> +            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
> +                vcpu_must_have(sse2);
> +            else
> +            {
> +                if ( b != 0x50 )
> +                    host_and_vcpu_must_have(mmx);
> +                vcpu_must_have(sse);
> +            }
> +            if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
> +                get_fpu(X86EMUL_FPU_xmm, &fic);
> +            else
> +                get_fpu(X86EMUL_FPU_mmx, &fic);
> +        }
> +        else
> +        {
> +            generate_exception_if(vex.reg != 0xf, EXC_UD);

Isn't this TwoOp?

> +            if ( b == 0x50 || !vex.l )
> +                host_and_vcpu_must_have(avx);
> +            else
> +                host_and_vcpu_must_have(avx2);
> +            get_fpu(X86EMUL_FPU_ymm, &fic);
> +        }
> +
> +        opc = init_prefixes(stub);
> +        opc[0] = b;
> +        /* Convert GPR destination to %rAX. */
> +        rex_prefix &= ~REX_R;
> +        vex.r = 1;
> +        if ( !mode_64bit() )
> +            vex.w = 0;
> +        opc[1] = modrm & 0xc7;
> +        fic.insn_bytes = PFX_BYTES + 2;
> +        opc[2] = 0xc3;
> +
> +        copy_REX_VEX(opc, rex_prefix, vex);
> +        invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
> +
> +        put_stub(stub);
> +        put_fpu(&fic);
> +
> +        dst.bytes = 4;

Somewhere there should probably be an ASSERT() that state->simd_size is
0, so we don't try to invoke the stub twice.

> +        break;
> +
>      CASE_SIMD_PACKED_INT(0x0f, 0x60):    /* punpcklbw {,x}mm/mem,{,x}mm */
>      case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw 
> {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      CASE_SIMD_PACKED_INT(0x0f, 0x61):    /* punpcklwd {,x}mm/mem,{,x}mm */
> @@ -6553,23 +6686,14 @@ x86_emulate(
>  
>      if ( state->simd_size )
>      {
> -#ifdef __XEN__
> -        uint8_t *buf = stub.ptr;
> -#else
> -        uint8_t *buf = get_stub(stub);
> -#endif
> -
>          generate_exception_if(!op_bytes, EXC_UD);
>          generate_exception_if(vex.opcx && (d & TwoOp) && vex.reg != 0xf,
>                                EXC_UD);
>  
> -        if ( !buf )
> +        if ( !opc )
>              BUG();
> -        if ( vex.opcx == vex_none )
> -            SET_SSE_PREFIX(buf[0], vex.pfx);
> -
> -        buf[fic.insn_bytes] = 0xc3;
> -        copy_REX_VEX(buf, rex_prefix, vex);
> +        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;

fic.insn_bytes - PFX_BYTES is in the middle of the opcode, isn't it?

~Andrew

> +        copy_REX_VEX(opc, rex_prefix, vex);
>  
>          if ( ea.type == OP_MEM )
>          {


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.