[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH v6 03/10] x86emul: support MOVDIR{I,64B} insns



> -----Original Message-----
> From: Jan Beulich <jbeulich@xxxxxxxx>
> Sent: 14 April 2020 12:45
> To: xen-devel@xxxxxxxxxxxxxxxxxxxx
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>; Wei Liu <wl@xxxxxxx>; Roger 
> Pau Monne
> <roger.pau@xxxxxxxxxx>; Paul Durrant <paul@xxxxxxx>
> Subject: [PATCH v6 03/10] x86emul: support MOVDIR{I,64B} insns
> 
> Introduce a new blk() hook, paralleling the rmw() one in a certain way,
> but being intended for larger data sizes, and hence its HVM intermediate
> handling function doesn't fall back to splitting the operation if the
> requested virtual address can't be mapped.
> 
> Note that SDM revision 071 doesn't specify exception behavior for
> ModRM.mod == 0b11; assuming #UD here.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

hvm/emulate part...

Reviewed-by: Paul Durrant <paul@xxxxxxx>

> ---
> v6: Fold MOVDIRI and MOVDIR64B changes again. Use blk() for both. All
>     tags dropped.
> v5: Introduce/use ->blk() hook. Correct asm() operands.
> v4: Split MOVDIRI and MOVDIR64B and move this one ahead. Re-base.
> v3: Update description.
> ---
> (SDE: -tnt)
> 
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -652,6 +652,18 @@ static int cmpxchg(
>      return X86EMUL_OKAY;
>  }
> 
> +static int blk(
> +    enum x86_segment seg,
> +    unsigned long offset,
> +    void *p_data,
> +    unsigned int bytes,
> +    uint32_t *eflags,
> +    struct x86_emulate_state *state,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    return x86_emul_blk((void *)offset, p_data, bytes, eflags, state, ctxt);
> +}
> +
>  static int read_segment(
>      enum x86_segment seg,
>      struct segment_register *reg,
> @@ -2339,6 +2351,54 @@ int main(int argc, char **argv)
>          goto fail;
>      printf("okay\n");
> 
> +    emulops.blk = blk;
> +
> +    printf("%-40s", "Testing movdiri %edx,(%ecx)...");
> +    if ( stack_exec && cpu_has_movdiri )
> +    {
> +        instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf9; instr[3] = 0x11;
> +
> +        regs.eip = (unsigned long)&instr[0];
> +        regs.ecx = (unsigned long)memset(res, -1, 16);
> +        regs.edx = 0x44332211;
> +
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( (rc != X86EMUL_OKAY) ||
> +             (regs.eip != (unsigned long)&instr[4]) ||
> +             res[0] != 0x44332211 || ~res[1] )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movdir64b 144(%edx),%ecx...");
> +    if ( stack_exec && cpu_has_movdir64b )
> +    {
> +        instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8;
> +        instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0;
> +
> +        regs.eip = (unsigned long)&instr[0];
> +        for ( i = 0; i < 64; ++i )
> +            res[i] = i - 20;
> +        regs.edx = (unsigned long)res;
> +        regs.ecx = (unsigned long)(res + 16);
> +
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( (rc != X86EMUL_OKAY) ||
> +             (regs.eip != (unsigned long)&instr[9]) ||
> +             res[15] != -5 || res[32] != 12 )
> +            goto fail;
> +        for ( i = 16; i < 32; ++i )
> +            if ( res[i] != i )
> +                goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    emulops.blk = NULL;
> +
>      printf("%-40s", "Testing movq %mm3,(%ecx)...");
>      if ( stack_exec && cpu_has_mmx )
>      {
> --- a/tools/tests/x86_emulator/x86-emulate.h
> +++ b/tools/tests/x86_emulator/x86-emulate.h
> @@ -154,6 +154,8 @@ static inline bool xcr0_mask(uint64_t ma
>  #define cpu_has_avx512_vnni (cp.feat.avx512_vnni && xcr0_mask(0xe6))
>  #define cpu_has_avx512_bitalg (cp.feat.avx512_bitalg && xcr0_mask(0xe6))
>  #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && 
> xcr0_mask(0xe6))
> +#define cpu_has_movdiri    cp.feat.movdiri
> +#define cpu_has_movdir64b  cp.feat.movdir64b
>  #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
>  #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
>  #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -250,12 +250,13 @@ $(BASEDIR)/include/asm-x86/asm-macros.h:
>  # sure we pick up changes when the compiler used has changed.)
>  ifeq ($(MAKECMDGOALS),asm-offsets.s)
> 
> -as-ISA-list := CLWB EPT FSGSBASE INVPCID RDRAND RDSEED SSE4_2 VMX XSAVEOPT
> +as-ISA-list := CLWB EPT FSGSBASE INVPCID MOVDIR RDRAND RDSEED SSE4_2 VMX 
> XSAVEOPT
> 
>  CLWB-insn    := clwb (%rax)
>  EPT-insn     := invept (%rax),%rax
>  FSGSBASE-insn        := rdfsbase %rax
>  INVPCID-insn := invpcid (%rax),%rax
> +MOVDIR-insn  := movdiri %rax,(%rax)
>  RDRAND-insn  := rdrand %eax
>  RDSEED-insn  := rdseed %eax
>  SSE4_2-insn  := crc32 %eax,%eax
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1409,6 +1409,44 @@ static int hvmemul_rmw(
>      return rc;
>  }
> 
> +static int hvmemul_blk(
> +    enum x86_segment seg,
> +    unsigned long offset,
> +    void *p_data,
> +    unsigned int bytes,
> +    uint32_t *eflags,
> +    struct x86_emulate_state *state,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    struct hvm_emulate_ctxt *hvmemul_ctxt =
> +        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> +    unsigned long addr;
> +    uint32_t pfec = PFEC_page_present | PFEC_write_access;
> +    int rc;
> +    void *mapping = NULL;
> +
> +    rc = hvmemul_virtual_to_linear(
> +        seg, offset, bytes, NULL, hvm_access_write, hvmemul_ctxt, &addr);
> +    if ( rc != X86EMUL_OKAY || !bytes )
> +        return rc;
> +
> +    if ( is_x86_system_segment(seg) )
> +        pfec |= PFEC_implicit;
> +    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
> +        pfec |= PFEC_user_mode;
> +
> +    mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
> +    if ( IS_ERR(mapping) )
> +        return ~PTR_ERR(mapping);
> +    if ( !mapping )
> +        return X86EMUL_UNHANDLEABLE;
> +
> +    rc = x86_emul_blk(mapping, p_data, bytes, eflags, state, ctxt);
> +    hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
> +
> +    return rc;
> +}
> +
>  static int hvmemul_write_discard(
>      enum x86_segment seg,
>      unsigned long offset,
> @@ -2475,6 +2513,7 @@ static const struct x86_emulate_ops hvm_
>      .write         = hvmemul_write,
>      .rmw           = hvmemul_rmw,
>      .cmpxchg       = hvmemul_cmpxchg,
> +    .blk           = hvmemul_blk,
>      .validate      = hvmemul_validate,
>      .rep_ins       = hvmemul_rep_ins,
>      .rep_outs      = hvmemul_rep_outs,
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -548,6 +548,8 @@ static const struct ext0f38_table {
>      [0xf1] = { .to_mem = 1, .two_op = 1 },
>      [0xf2 ... 0xf3] = {},
>      [0xf5 ... 0xf7] = {},
> +    [0xf8] = { .simd_size = simd_other },
> +    [0xf9] = { .to_mem = 1, .two_op = 1 /* Mov */ },
>  };
> 
>  /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
> @@ -851,6 +853,9 @@ struct x86_emulate_state {
>          rmw_xchg,
>          rmw_xor,
>      } rmw;
> +    enum {
> +        blk_movdir,
> +    } blk;
>      uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
>      uint8_t sib_index, sib_scale;
>      uint8_t rex_prefix;
> @@ -1914,6 +1919,8 @@ amd_like(const struct x86_emulate_ctxt *
>  #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
>  #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
>  #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
> +#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
> +#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
>  #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
>  #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
>  #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
> @@ -2722,10 +2729,12 @@ x86_decode_0f38(
>      {
>      case 0x00 ... 0xef:
>      case 0xf2 ... 0xf5:
> -    case 0xf7 ... 0xff:
> +    case 0xf7 ... 0xf8:
> +    case 0xfa ... 0xff:
>          op_bytes = 0;
>          /* fall through */
>      case 0xf6: /* adcx / adox */
> +    case 0xf9: /* movdiri */
>          ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
>          break;
> 
> @@ -10171,6 +10180,34 @@ x86_emulate(
>                              : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
>          break;
> 
> +    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
> +        host_and_vcpu_must_have(movdir64b);
> +        generate_exception_if(ea.type != OP_MEM, EXC_UD);
> +        src.val = truncate_ea(*dst.reg);
> +        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, 
> ops),
> +                              EXC_GP, 0);
> +        fail_if(!ops->blk);
> +        state->blk = blk_movdir;
> +        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
> +        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
> +                             ctxt)) != X86EMUL_OKAY ||
> +             (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
> +                            state, ctxt)) != X86EMUL_OKAY )
> +            goto done;
> +        state->simd_size = simd_none;
> +        break;
> +
> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
> +        host_and_vcpu_must_have(movdiri);
> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
> +        fail_if(!ops->blk);
> +        state->blk = blk_movdir;
> +        if ( (rc = ops->blk(dst.mem.seg, dst.mem.off, &src.val, op_bytes,
> +                            &_regs.eflags, state, ctxt)) != X86EMUL_OKAY )
> +            goto done;
> +        dst.type = OP_NONE;
> +        break;
> +
>  #ifndef X86EMUL_NO_SIMD
> 
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
> @@ -11429,6 +11466,77 @@ int x86_emul_rmw(
> 
>      return X86EMUL_OKAY;
>  }
> +
> +int x86_emul_blk(
> +    void *ptr,
> +    void *data,
> +    unsigned int bytes,
> +    uint32_t *eflags,
> +    struct x86_emulate_state *state,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( state->blk )
> +    {
> +        /*
> +         * Throughout this switch(), memory clobbers are used to compensate
> +         * that other operands may not properly express the (full) memory
> +         * ranges covered.
> +         */
> +    case blk_movdir:
> +        switch ( bytes )
> +        {
> +#ifdef __x86_64__
> +        case sizeof(uint32_t):
> +# ifdef HAVE_AS_MOVDIR
> +            asm ( "movdiri %0, (%1)"
> +                 :: "r" (*(uint32_t *)data), "r" (ptr) : "memory" );
> +# else
> +            /* movdiri %esi, (%rdi) */
> +            asm ( ".byte 0x0f, 0x38, 0xf9, 0x37"
> +                  :: "S" (*(uint32_t *)data), "D" (ptr) : "memory" );
> +# endif
> +            break;
> +#endif
> +
> +        case sizeof(unsigned long):
> +#ifdef HAVE_AS_MOVDIR
> +            asm ( "movdiri %0, (%1)"
> +                 :: "r" (*(unsigned long *)data), "r" (ptr) : "memory" );
> +#else
> +            /* movdiri %rsi, (%rdi) */
> +            asm ( ".byte 0x48, 0x0f, 0x38, 0xf9, 0x37"
> +                  :: "S" (*(unsigned long *)data), "D" (ptr) : "memory" );
> +#endif
> +            break;
> +
> +        case 64:
> +            if ( ((unsigned long)ptr & 0x3f) )
> +            {
> +                ASSERT_UNREACHABLE();
> +                return X86EMUL_UNHANDLEABLE;
> +            }
> +#ifdef HAVE_AS_MOVDIR
> +            asm ( "movdir64b (%0), %1" :: "r" (data), "r" (ptr) : "memory" );
> +#else
> +            /* movdir64b (%rsi), %rdi */
> +            asm ( ".byte 0x66, 0x0f, 0x38, 0xf8, 0x3e"
> +                  :: "S" (data), "D" (ptr) : "memory" );
> +#endif
> +            break;
> +
> +        default:
> +            ASSERT_UNREACHABLE();
> +            return X86EMUL_UNHANDLEABLE;
> +        }
> +        break;
> +
> +    default:
> +        ASSERT_UNREACHABLE();
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +
> +    return X86EMUL_OKAY;
> +}
> 
>  static void __init __maybe_unused build_assertions(void)
>  {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -310,6 +310,22 @@ struct x86_emulate_ops
>          struct x86_emulate_ctxt *ctxt);
> 
>      /*
> +     * blk: Emulate a large (block) memory access.
> +     * @p_data: [IN/OUT] (optional) Pointer to source/destination buffer.
> +     * @eflags: [IN/OUT] Pointer to EFLAGS to be updated according to
> +     *                   instruction effects.
> +     * @state:  [IN/OUT] Pointer to (opaque) emulator state.
> +     */
> +    int (*blk)(
> +        enum x86_segment seg,
> +        unsigned long offset,
> +        void *p_data,
> +        unsigned int bytes,
> +        uint32_t *eflags,
> +        struct x86_emulate_state *state,
> +        struct x86_emulate_ctxt *ctxt);
> +
> +    /*
>       * validate: Post-decode, pre-emulate hook to allow caller controlled
>       * filtering.
>       */
> @@ -793,6 +809,14 @@ x86_emul_rmw(
>      unsigned int bytes,
>      uint32_t *eflags,
>      struct x86_emulate_state *state,
> +    struct x86_emulate_ctxt *ctxt);
> +int
> +x86_emul_blk(
> +    void *ptr,
> +    void *data,
> +    unsigned int bytes,
> +    uint32_t *eflags,
> +    struct x86_emulate_state *state,
>      struct x86_emulate_ctxt *ctxt);
> 
>  static inline void x86_emul_hw_exception(
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -120,6 +120,8 @@
>  #define cpu_has_avx512_bitalg   boot_cpu_has(X86_FEATURE_AVX512_BITALG)
>  #define cpu_has_avx512_vpopcntdq boot_cpu_has(X86_FEATURE_AVX512_VPOPCNTDQ)
>  #define cpu_has_rdpid           boot_cpu_has(X86_FEATURE_RDPID)
> +#define cpu_has_movdiri         boot_cpu_has(X86_FEATURE_MOVDIRI)
> +#define cpu_has_movdir64b       boot_cpu_has(X86_FEATURE_MOVDIR64B)
> 
>  /* CPUID level 0x80000007.edx */
>  #define cpu_has_itsc            boot_cpu_has(X86_FEATURE_ITSC)
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -237,6 +237,8 @@ XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /
>  XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A  POPCNT for vectors of DW/QW */
>  XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
>  XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
> +XEN_CPUFEATURE(MOVDIRI,       6*32+27) /*A  MOVDIRI instruction */
> +XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /*A  MOVDIR64B instruction */
> 
>  /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
>  XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */





 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.