x86emul: support 3DNow! insns Yes, recent AMD CPUs don't support them anymore, but I think we should nevertheless cope. Signed-off-by: Jan Beulich --- a/.gitignore +++ b/.gitignore @@ -223,7 +223,7 @@ tools/security/xensec_tool tools/tests/x86_emulator/*.bin tools/tests/x86_emulator/*.tmp +tools/tests/x86_emulator/3dnow*.[ch] tools/tests/x86_emulator/asm tools/tests/x86_emulator/avx*.[ch] tools/tests/x86_emulator/blowfish.h --- a/tools/tests/x86_emulator/Makefile +++ b/tools/tests/x86_emulator/Makefile @@ -11,7 +11,7 @@ all: $(TARGET) run: $(TARGET) ./$(TARGET) -SIMD := sse sse2 sse4 avx avx2 xop +SIMD := 3dnow sse sse2 sse4 avx avx2 xop FMA := fma4 fma SG := avx2-sg TESTCASES := blowfish $(SIMD) $(FMA) $(SG) @@ -19,6 +19,9 @@ TESTCASES := blowfish $(SIMD) $(FMA) $(S blowfish-cflags := "" blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic=" +3dnow-vecs := 8 +3dnow-ints := +3dnow-flts := 4 sse-vecs := 16 sse-ints := sse-flts := 4 @@ -49,8 +52,13 @@ xop-ints := 1 2 4 8 xop-flts := $(avx-flts) # For AVX and later, have the compiler avoid XMM0 to widen coverage of -# the VEX.vvvv checks in the emulator. -non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0) +# the VEX.vvvv checks in the emulator. For 3DNow!, however, force SSE +# use for floating point operations, to avoid mixing MMX and FPU register +# uses. Also enable 3DNow! extensions, but note that we can't use 3dnowa +# as the test flavor right away since -m3dnowa is being understood only +# by gcc 7.x and newer (older ones want a specific machine model instead). +3dnowa := $(call cc-option,$(CC),-m3dnowa,-march=k8) +non-sse = $(if $(filter sse%,$(1)),,$(if $(filter 3dnow%,$(1)),-msse -mfpmath=sse $(3dnowa),-ffixed-xmm0)) define simd-defs $(1)-cflags := \ @@ -81,8 +89,9 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t $(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \ for cflags in $($*-cflags) $($*-cflags-$(arch)); do \ $(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \ + prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \ flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \ - (echo "static const unsigned int $(subst -,_,$*)_$(arch)$${flavor}[] = {"; \ + (echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] = {"; \ od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \ echo "};") >>$@.new; \ rm -f $*.bin; \ --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -48,6 +48,8 @@ static inline bool _to_bool(byte_vec_t b #if VEC_SIZE == FLOAT_SIZE # define to_int(x) ((vec_t){ (int)(x)[0] }) +#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__) +# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x)) #elif VEC_SIZE == 16 && defined(__SSE2__) # if FLOAT_SIZE == 4 # define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x)) @@ -70,7 +72,24 @@ static inline bool _to_bool(byte_vec_t b }) #endif -#if FLOAT_SIZE == 4 && defined(__SSE__) +#if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__) +# define max __builtin_ia32_pfmax +# define min __builtin_ia32_pfmin +# define recip(x) ({ \ + vec_t t_ = __builtin_ia32_pfrcp(x); \ + touch(x); \ + t_[1] = __builtin_ia32_pfrcp(__builtin_ia32_pswapdsf(x))[0]; \ + touch(x); \ + __builtin_ia32_pfrcpit2(__builtin_ia32_pfrcpit1(t_, x), t_); \ +}) +# define rsqrt(x) ({ \ + vec_t t_ = __builtin_ia32_pfrsqrt(x); \ + touch(x); \ + t_[1] = __builtin_ia32_pfrsqrt(__builtin_ia32_pswapdsf(x))[0]; \ + touch(x); \ + __builtin_ia32_pfrcpit2(__builtin_ia32_pfrsqit1(__builtin_ia32_pfmul(t_, t_), x), t_); \ +}) +#elif FLOAT_SIZE == 4 && defined(__SSE__) # if VEC_SIZE == 32 && defined(__AVX__) # define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); }) # define max(x, y) __builtin_ia32_maxps256(x, y) --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -5,6 +5,7 @@ #include "x86_emulate.h" #include "blowfish.h" +#include "3dnow.h" #include "sse.h" #include "sse2.h" #include "sse4.h" @@ -28,6 +29,11 @@ static bool blowfish_check_regs(const st return regs->eax == 2 && regs->edx == 1; } +static bool simd_check__3dnow(void) +{ + return cpu_has_3dnow_ext && cpu_has_sse; +} + static bool simd_check_sse(void) { return cpu_has_sse; @@ -117,6 +123,7 @@ static const struct { #else # define SIMD(desc, feat, form) SIMD_(32, desc, feat, form) #endif + SIMD(3DNow! single, _3dnow, 8f4), SIMD(SSE scalar single, sse, f4), SIMD(SSE packed single, sse, 16f4), SIMD(SSE2 scalar single, sse2, f4), --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -166,6 +166,12 @@ static inline uint64_t xgetbv(uint32_t x (res.b & (1U << 8)) != 0; \ }) +#define cpu_has_3dnow_ext ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(0x80000001, 0, &res, NULL); \ + (res.d & (1U << 30)) != 0; \ +}) + #define cpu_has_sse4a ({ \ struct cpuid_leaf res; \ emul_test_cpuid(0x80000001, 0, &res, NULL); \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -355,6 +355,36 @@ static const struct { [0xff] = { ModRM } }; +static const uint16_t _3dnow_table[16] = { + [0x0] = (1 << 0xd) /* pi2fd */, + [0x1] = (1 << 0xd) /* pf2id */, + [0x9] = (1 << 0x0) /* pfcmpge */ | + (1 << 0x4) /* pfmin */ | + (1 << 0x6) /* pfrcp */ | + (1 << 0x7) /* pfrsqrt */ | + (1 << 0xa) /* pfsub */ | + (1 << 0xe) /* pfadd */, + [0xa] = (1 << 0x0) /* pfcmpge */ | + (1 << 0x4) /* pfmax */ | + (1 << 0x6) /* pfrcpit1 */ | + (1 << 0x7) /* pfrsqit1 */ | + (1 << 0xa) /* pfsubr */ | + (1 << 0xe) /* pfacc */, + [0xb] = (1 << 0x0) /* pfcmpeq */ | + (1 << 0x4) /* pfmul */ | + (1 << 0x6) /* pfrcpit2 */ | + (1 << 0x7) /* pmulhrw */ | + (1 << 0xf) /* pavgusb */, +}; + +static const uint16_t _3dnow_ext_table[16] = { + [0x1] = (1 << 0xd) /* pi2fw */, + [0x1] = (1 << 0xc) /* pf2iw */, + [0x8] = (1 << 0xa) /* pfnacc */ | + (1 << 0xa) /* pfpnacc */, + [0xb] = (1 << 0xb) /* pfswapd */, +}; + /* * "two_op" and "four_op" below refer to the number of register operands * (one of which possibly also allowing to be a memory one). The named @@ -1670,6 +1700,8 @@ static bool vcpu_has( #define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops) #define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \ vcpu_has_sse()) +#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, EDX, 30, ctxt, ops) +#define vcpu_has_3dnow() vcpu_has(0x80000001, EDX, 31, ctxt, ops) #define vcpu_has_lahf_lm() vcpu_has(0x80000001, ECX, 0, ctxt, ops) #define vcpu_has_cr8_legacy() vcpu_has(0x80000001, ECX, 4, ctxt, ops) #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops) @@ -5480,6 +5512,26 @@ x86_emulate( case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */ break; + case X86EMUL_OPC(0x0f, 0x0e): /* femms */ + host_and_vcpu_must_have(3dnow); + asm volatile ( "femms" ); + break; + + case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */ + if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) ) + host_and_vcpu_must_have(3dnow_ext); + else if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) ) + host_and_vcpu_must_have(3dnow); + else + generate_exception(EXC_UD); + + get_fpu(X86EMUL_FPU_mmx, &fic); + + d = DstReg | SrcMem; + op_bytes = 8; + state->simd_size = simd_other; + goto simd_0f_imm8; + #define CASE_SIMD_PACKED_INT(pfx, opc) \ case X86EMUL_OPC(pfx, opc): \ case X86EMUL_OPC_66(pfx, opc) --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -71,6 +71,8 @@ && boot_cpu_has(X86_FEATURE_FFXSR)) #define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB) #define cpu_has_rdtscp boot_cpu_has(X86_FEATURE_RDTSCP) +#define cpu_has_3dnow_ext boot_cpu_has(X86_FEATURE_3DNOWEXT) +#define cpu_has_3dnow boot_cpu_has(X86_FEATURE_3DNOW) /* CPUID level 0x80000001.ecx */ #define cpu_has_cmp_legacy boot_cpu_has(X86_FEATURE_CMP_LEGACY)