[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Ping: [PATCH 1/6] x86emul: generalize wbinvd() hook



On 01.07.2019 13:55, Jan Beulich wrote:
> The hook is already in use for other purposes, and emulating e.g.
> CLFLUSH by issuing WBINVD is, well, not very nice. Rename the hook and
> add parameters. Use lighter weight flushing insns when possible in
> hvmemul_cache_op().
> 
> hvmemul_cache_op() treating x86emul_invd the same as x86emul_wbinvd is
> to retain original behavior, but I'm not sure this is what we want in
> the long run.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Paul,

any chance I could get your ack (or otherwise) here? I thought I did
answer the one question you had raised to your satisfaction.

Thanks, Jan

> ---
> v2: Use cache_op() as hook name. Convert macros to inline functions in
>      system.h. Re-base.
> ---
> I was unsure about PREFETCH* and CLDEMOTE - both are cache management
> insns too, but the emulator currently treats them as a NOP without
> invoking any hooks.
> I was also uncertain about the new cache_flush_permitted() instance -
> generally I think it wouldn't be too bad if we allowed line flushes in
> all cases, in which case the checks in the ->wbinvd_intercept() handlers
> would suffice (as they did until now).
> 
> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> @@ -382,10 +382,13 @@ static int fuzz_invlpg(
>       return maybe_fail(ctxt, "invlpg", false);
>   }
>   
> -static int fuzz_wbinvd(
> +static int fuzz_cache_op(
> +    enum x86emul_cache_op op,
> +    enum x86_segment seg,
> +    unsigned long offset,
>       struct x86_emulate_ctxt *ctxt)
>   {
> -    return maybe_fail(ctxt, "wbinvd", true);
> +    return maybe_fail(ctxt, "cache-management", true);
>   }
>   
>   static int fuzz_write_io(
> @@ -620,7 +623,7 @@ static const struct x86_emulate_ops all_
>       SET(read_xcr),
>       SET(read_msr),
>       SET(write_msr),
> -    SET(wbinvd),
> +    SET(cache_op),
>       SET(invlpg),
>       .get_fpu    = emul_test_get_fpu,
>       .put_fpu    = emul_test_put_fpu,
> @@ -729,7 +732,7 @@ enum {
>       HOOK_read_xcr,
>       HOOK_read_msr,
>       HOOK_write_msr,
> -    HOOK_wbinvd,
> +    HOOK_cache_op,
>       HOOK_cpuid,
>       HOOK_inject_hw_exception,
>       HOOK_inject_sw_interrupt,
> @@ -773,7 +776,7 @@ static void disable_hooks(struct x86_emu
>       MAYBE_DISABLE_HOOK(read_xcr);
>       MAYBE_DISABLE_HOOK(read_msr);
>       MAYBE_DISABLE_HOOK(write_msr);
> -    MAYBE_DISABLE_HOOK(wbinvd);
> +    MAYBE_DISABLE_HOOK(cache_op);
>       MAYBE_DISABLE_HOOK(cpuid);
>       MAYBE_DISABLE_HOOK(get_fpu);
>       MAYBE_DISABLE_HOOK(invlpg);
> --- a/xen/arch/x86/Rules.mk
> +++ b/xen/arch/x86/Rules.mk
> @@ -19,7 +19,9 @@ $(call as-option-add,CFLAGS,CC,"crc32 %e
>   $(call as-option-add,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_AS_EPT)
>   $(call as-option-add,CFLAGS,CC,"rdrand %eax",-DHAVE_AS_RDRAND)
>   $(call as-option-add,CFLAGS,CC,"rdfsbase %rax",-DHAVE_AS_FSGSBASE)
> +$(call as-option-add,CFLAGS,CC,"xsaveopt (%rax)",-DHAVE_AS_XSAVEOPT)
>   $(call as-option-add,CFLAGS,CC,"rdseed %eax",-DHAVE_AS_RDSEED)
> +$(call as-option-add,CFLAGS,CC,"clwb (%rax)",-DHAVE_AS_CLWB)
>   $(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \
>                        -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \
>                        '-D__OBJECT_LABEL__=$(subst 
> $(BASEDIR)/,,$(CURDIR))/$$@')
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -25,6 +25,7 @@
>   #include <asm/hvm/trace.h>
>   #include <asm/hvm/support.h>
>   #include <asm/hvm/svm/svm.h>
> +#include <asm/iocap.h>
>   #include <asm/vm_event.h>
>   
>   static void hvmtrace_io_assist(const ioreq_t *p)
> @@ -555,16 +556,12 @@ static void *hvmemul_map_linear_addr(
>       mfn_t *mfn = &hvmemul_ctxt->mfn[0];
>   
>       /*
> -     * The caller has no legitimate reason for trying a zero-byte write, but
> -     * all other code here is written to work if the check below was dropped.
> -     *
> -     * The maximum write size depends on the number of adjacent mfns[] which
> +     * The maximum access size depends on the number of adjacent mfns[] which
>        * can be vmap()'d, accouting for possible misalignment within the 
> region.
>        * The higher level emulation callers are responsible for ensuring that
> -     * mfns[] is large enough for the requested write size.
> +     * mfns[] is large enough for the requested access size.
>        */
> -    if ( bytes == 0 ||
> -         nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) )
> +    if ( nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) )
>       {
>           ASSERT_UNREACHABLE();
>           goto unhandleable;
> @@ -669,8 +666,6 @@ static void hvmemul_unmap_linear_addr(
>       unsigned int i;
>       mfn_t *mfn = &hvmemul_ctxt->mfn[0];
>   
> -    ASSERT(bytes > 0);
> -
>       if ( nr_frames == 1 )
>           unmap_domain_page(mapping);
>       else
> @@ -1473,7 +1468,10 @@ static int hvmemul_write_msr_discard(
>       return X86EMUL_OKAY;
>   }
>   
> -static int hvmemul_wbinvd_discard(
> +static int hvmemul_cache_op_discard(
> +    enum x86emul_cache_op op,
> +    enum x86_segment seg,
> +    unsigned long offset,
>       struct x86_emulate_ctxt *ctxt)
>   {
>       return X86EMUL_OKAY;
> @@ -2149,10 +2147,65 @@ static int hvmemul_write_msr(
>       return rc;
>   }
>   
> -static int hvmemul_wbinvd(
> +static int hvmemul_cache_op(
> +    enum x86emul_cache_op op,
> +    enum x86_segment seg,
> +    unsigned long offset,
>       struct x86_emulate_ctxt *ctxt)
>   {
> -    alternative_vcall(hvm_funcs.wbinvd_intercept);
> +    struct hvm_emulate_ctxt *hvmemul_ctxt =
> +        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> +    unsigned long addr, reps = 1;
> +    uint32_t pfec = PFEC_page_present;
> +    int rc;
> +    void *mapping;
> +
> +    if ( !cache_flush_permitted(current->domain) )
> +        return X86EMUL_OKAY;
> +
> +    switch ( op )
> +    {
> +    case x86emul_clflush:
> +    case x86emul_clflushopt:
> +    case x86emul_clwb:
> +        ASSERT(!is_x86_system_segment(seg));
> +
> +        rc = hvmemul_virtual_to_linear(seg, offset, 0, &reps,
> +                                       hvm_access_read, hvmemul_ctxt, &addr);
> +        if ( rc != X86EMUL_OKAY )
> +            break;
> +
> +        if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
> +            pfec |= PFEC_user_mode;
> +
> +        mapping = hvmemul_map_linear_addr(addr, 0, pfec, hvmemul_ctxt,
> +                                          current->arch.hvm.data_cache);
> +        if ( mapping == ERR_PTR(~X86EMUL_EXCEPTION) )
> +            return X86EMUL_EXCEPTION;
> +        if ( IS_ERR_OR_NULL(mapping) )
> +            break;
> +
> +        if ( cpu_has_clflush )
> +        {
> +            if ( op == x86emul_clwb && cpu_has_clwb )
> +                clwb(mapping);
> +            else if ( op == x86emul_clflushopt && cpu_has_clflushopt )
> +                clflushopt(mapping);
> +            else
> +                clflush(mapping);
> +
> +            hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt);
> +            break;
> +        }
> +
> +        hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt);
> +        /* fall through */
> +    case x86emul_invd:
> +    case x86emul_wbinvd:
> +        alternative_vcall(hvm_funcs.wbinvd_intercept);
> +        break;
> +    }
> +
>       return X86EMUL_OKAY;
>   }
>   
> @@ -2353,7 +2406,7 @@ static const struct x86_emulate_ops hvm_
>       .write_xcr     = hvmemul_write_xcr,
>       .read_msr      = hvmemul_read_msr,
>       .write_msr     = hvmemul_write_msr,
> -    .wbinvd        = hvmemul_wbinvd,
> +    .cache_op      = hvmemul_cache_op,
>       .cpuid         = x86emul_cpuid,
>       .get_fpu       = hvmemul_get_fpu,
>       .put_fpu       = hvmemul_put_fpu,
> @@ -2380,7 +2433,7 @@ static const struct x86_emulate_ops hvm_
>       .write_xcr     = hvmemul_write_xcr,
>       .read_msr      = hvmemul_read_msr,
>       .write_msr     = hvmemul_write_msr_discard,
> -    .wbinvd        = hvmemul_wbinvd_discard,
> +    .cache_op      = hvmemul_cache_op_discard,
>       .cpuid         = x86emul_cpuid,
>       .get_fpu       = hvmemul_get_fpu,
>       .put_fpu       = hvmemul_put_fpu,
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -1118,9 +1118,11 @@ static int write_msr(unsigned int reg, u
>       return X86EMUL_UNHANDLEABLE;
>   }
>   
> -/* Name it differently to avoid clashing with wbinvd() */
> -static int _wbinvd(struct x86_emulate_ctxt *ctxt)
> +static int cache_op(enum x86emul_cache_op op, enum x86_segment seg,
> +                    unsigned long offset, struct x86_emulate_ctxt *ctxt)
>   {
> +    ASSERT(op == x86emul_wbinvd);
> +
>       /* Ignore the instruction if unprivileged. */
>       if ( !cache_flush_permitted(current->domain) )
>           /*
> @@ -1238,7 +1240,7 @@ static const struct x86_emulate_ops priv
>       .read_msr            = read_msr,
>       .write_msr           = write_msr,
>       .cpuid               = x86emul_cpuid,
> -    .wbinvd              = _wbinvd,
> +    .cache_op            = cache_op,
>   };
>   
>   int pv_emulate_privileged_op(struct cpu_user_regs *regs)
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -5933,8 +5933,11 @@ x86_emulate(
>       case X86EMUL_OPC(0x0f, 0x08): /* invd */
>       case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */
>           generate_exception_if(!mode_ring0(), EXC_GP, 0);
> -        fail_if(ops->wbinvd == NULL);
> -        if ( (rc = ops->wbinvd(ctxt)) != 0 )
> +        fail_if(!ops->cache_op);
> +        if ( (rc = ops->cache_op(b == 0x09 ? x86emul_wbinvd
> +                                           : x86emul_invd,
> +                                 x86_seg_none, 0,
> +                                 ctxt)) != X86EMUL_OKAY )
>               goto done;
>           break;
>   
> @@ -7801,8 +7804,9 @@ x86_emulate(
>               /* else clwb */
>               fail_if(!vex.pfx);
>               vcpu_must_have(clwb);
> -            fail_if(!ops->wbinvd);
> -            if ( (rc = ops->wbinvd(ctxt)) != X86EMUL_OKAY )
> +            fail_if(!ops->cache_op);
> +            if ( (rc = ops->cache_op(x86emul_clwb, ea.mem.seg, ea.mem.off,
> +                                     ctxt)) != X86EMUL_OKAY )
>                   goto done;
>               break;
>           case 7:
> @@ -7818,8 +7822,11 @@ x86_emulate(
>                   vcpu_must_have(clflush);
>               else
>                   vcpu_must_have(clflushopt);
> -            fail_if(ops->wbinvd == NULL);
> -            if ( (rc = ops->wbinvd(ctxt)) != 0 )
> +            fail_if(!ops->cache_op);
> +            if ( (rc = ops->cache_op(vex.pfx ? x86emul_clflushopt
> +                                             : x86emul_clflush,
> +                                     ea.mem.seg, ea.mem.off,
> +                                     ctxt)) != X86EMUL_OKAY )
>                   goto done;
>               break;
>           default:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -176,6 +176,14 @@ enum x86_emulate_fpu_type {
>       X86EMUL_FPU_none
>   };
>   
> +enum x86emul_cache_op {
> +    x86emul_clflush,
> +    x86emul_clflushopt,
> +    x86emul_clwb,
> +    x86emul_invd,
> +    x86emul_wbinvd,
> +};
> +
>   struct x86_emulate_state;
>   
>   /*
> @@ -452,8 +460,15 @@ struct x86_emulate_ops
>           uint64_t val,
>           struct x86_emulate_ctxt *ctxt);
>   
> -    /* wbinvd: Write-back and invalidate cache contents. */
> -    int (*wbinvd)(
> +    /*
> +     * cache_op: Write-back and/or invalidate cache contents.
> +     *
> +     * @seg:@offset applicable only to some of enum x86emul_cache_op.
> +     */
> +    int (*cache_op)(
> +        enum x86emul_cache_op op,
> +        enum x86_segment seg,
> +        unsigned long offset,
>           struct x86_emulate_ctxt *ctxt);
>   
>       /* cpuid: Emulate CPUID via given set of EAX-EDX inputs/outputs. */
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -102,6 +102,8 @@
>   #define cpu_has_rdseed          boot_cpu_has(X86_FEATURE_RDSEED)
>   #define cpu_has_smap            boot_cpu_has(X86_FEATURE_SMAP)
>   #define cpu_has_avx512_ifma     boot_cpu_has(X86_FEATURE_AVX512_IFMA)
> +#define cpu_has_clflushopt      boot_cpu_has(X86_FEATURE_CLFLUSHOPT)
> +#define cpu_has_clwb            boot_cpu_has(X86_FEATURE_CLWB)
>   #define cpu_has_avx512er        boot_cpu_has(X86_FEATURE_AVX512ER)
>   #define cpu_has_avx512cd        boot_cpu_has(X86_FEATURE_AVX512CD)
>   #define cpu_has_sha             boot_cpu_has(X86_FEATURE_SHA)
> --- a/xen/include/asm-x86/system.h
> +++ b/xen/include/asm-x86/system.h
> @@ -21,6 +21,23 @@ static inline void clflush(const void *p
>       asm volatile ( "clflush %0" :: "m" (*(const char *)p) );
>   }
>   
> +static inline void clflushopt(const void *p)
> +{
> +    asm volatile ( "data16 clflush %0" :: "m" (*(const char *)p) );
> +}
> +
> +static inline void clwb(const void *p)
> +{
> +#if defined(HAVE_AS_CLWB)
> +    asm volatile ( "clwb %0" :: "m" (*(const char *)p) );
> +#elif defined(HAVE_AS_XSAVEOPT)
> +    asm volatile ( "data16 xsaveopt %0" :: "m" (*(const char *)p) );
> +#else
> +    asm volatile ( ".byte 0x66, 0x0f, 0xae, 0x32"
> +                   :: "d" (p), "m" (*(const char *)p) );
> +#endif
> +}
> +
>   #define xchg(ptr,v) \
>       ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
>   
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.