|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH for-4.21 1/2] x86/AMD: avoid REP MOVSB for Zen3/4
Along with Zen2 (which doesn't expose ERMS), both families reportedly
suffer from sub-optimal aliasing detection when deciding whether REP MOVSB
can actually be carried out the accelerated way. Therefore we want to
avoid its use in the common case (memset(), copy_page_hot()).
Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Question is whether merely avoiding REP MOVSB (but not REP MOVSQ) is going
to be good enough.
--- a/xen/arch/x86/copy_page.S
+++ b/xen/arch/x86/copy_page.S
@@ -57,6 +57,6 @@ END(copy_page_cold)
.endm
FUNC(copy_page_hot)
- ALTERNATIVE copy_page_movsq, copy_page_movsb, X86_FEATURE_ERMS
+ ALTERNATIVE copy_page_movsq, copy_page_movsb, X86_FEATURE_XEN_REP_MOVSB
RET
END(copy_page_hot)
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -1386,6 +1386,10 @@ static void cf_check init_amd(struct cpu
check_syscfg_dram_mod_en();
+ if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS)
+ && c->family != 0x19 /* Zen3/4 */)
+ setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
+
amd_log_freq(c);
}
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -684,6 +684,9 @@ static void cf_check init_intel(struct c
*/
if (c == &boot_cpu_data && c->vfm == INTEL_SKYLAKE_X)
setup_clear_cpu_cap(X86_FEATURE_CLWB);
+
+ if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS))
+ setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
}
const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = {
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -7,7 +7,7 @@
#define FSCAPINTS FEATURESET_NR_ENTRIES
/* Synthetic words follow the featureset words. */
-#define X86_NR_SYNTH 1
+#define X86_NR_SYNTH 2
#define X86_SYNTH(x) (FSCAPINTS * 32 + (x))
/* Synthetic features */
@@ -43,6 +43,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_PV, X86_SY
XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen
for HVM */
XEN_CPUFEATURE(USE_VMCALL, X86_SYNTH(30)) /* Use VMCALL instead of
VMMCALL */
XEN_CPUFEATURE(PDX_COMPRESSION, X86_SYNTH(31)) /* PDX compression */
+XEN_CPUFEATURE(XEN_REP_MOVSB, X86_SYNTH(32)) /* REP MOVSB used for
memcpy() and alike. */
/* Bug words follow the synthetic words. */
#define X86_NR_BUG 1
--- a/xen/arch/x86/memcpy.S
+++ b/xen/arch/x86/memcpy.S
@@ -10,7 +10,7 @@ FUNC(memcpy)
* precautions were taken).
*/
ALTERNATIVE "and $7, %edx; shr $3, %rcx", \
- STR(rep movsb; RET), X86_FEATURE_ERMS
+ STR(rep movsb; RET), X86_FEATURE_XEN_REP_MOVSB
rep movsq
or %edx, %ecx
jz 1f
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |