[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86: also use tzcnt instead of bsf in __scanbit()
On 26/01/15 12:03, Jan Beulich wrote: > ... when available, i.e. by runtime patching. This saves the > conditional move, having a back-to-back dependency on BSF's (EFLAGS) > result. > > The need to include asm/cpufeatures.h from asm/bitops.h requires a > workaround for an otherwise resulting circular header file dependency: > Provide a mode by which the including site of the former header can > request to only get the X86_FEATURE_* defines (and very little more) > from it, allowing it to nevertheless be included in its entirety later > on. > > While doing this I also noticed that the function's "max" parameter was > pointlessly "unsigned long" - the function only returning > "unsigned int", this can't be of any use, and hence gets converted at > once, along with the necessary adjustments to CMOVZ's output operands. > > Note that while only alternative_io() is needed by this change (and > hence gets pulled over from Linux), for completeness its input-only > counterpart alternative_input() gets added as well. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > > --- a/xen/include/asm-x86/alternative.h > +++ b/xen/include/asm-x86/alternative.h > @@ -12,6 +12,7 @@ > .byte \alt_len > .endm > #else > +#include <xen/stringify.h> > #include <xen/types.h> > > struct alt_instr { > @@ -73,6 +74,26 @@ extern void alternative_instructions(voi > #define alternative(oldinstr, newinstr, feature) \ > asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : > "memory") > > +/* > + * Alternative inline assembly with input. > + * > + * Pecularities: > + * No memory clobber here. > + * Argument numbers start with 1. > + * Best is to use constraints that are fixed size (like (%1) ... "r") > + * If you use variable sized constraints like "m" or "g" in the > + * replacement make sure to pad to the worst case length. > + * Leaving an unused argument 0 to keep API compatibility. > + */ > +#define alternative_input(oldinstr, newinstr, feature, input...) \ > + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ > + : : "i" (0), ## input) > + > +/* Like alternative_input, but with a single output argument */ > +#define alternative_io(oldinstr, newinstr, feature, output, input...) > \ > + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ > + : output : "i" (0), ## input) > + > #endif /* __ASSEMBLY__ */ > > #endif /* __X86_ALTERNATIVE_H__ */ > --- a/xen/include/asm-x86/bitops.h > +++ b/xen/include/asm-x86/bitops.h > @@ -5,7 +5,9 @@ > * Copyright 1992, Linus Torvalds. > */ > > -#include <xen/config.h> > +#include <asm/alternative.h> > +#define X86_FEATURES_ONLY > +#include <asm/cpufeature.h> > > /* > * We specify the memory operand as both input and output because the memory > @@ -313,9 +315,17 @@ extern unsigned int __find_first_zero_bi > extern unsigned int __find_next_zero_bit( > const unsigned long *addr, unsigned int size, unsigned int offset); > > -static inline unsigned int __scanbit(unsigned long val, unsigned long max) > +static inline unsigned int __scanbit(unsigned long val, unsigned int max) > { > - asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) ); > + if ( __builtin_constant_p(max) && max == BITS_PER_LONG ) > + alternative_io("bsf %[in],%[out]; cmovz %[max],%k[out]", > + "rep; bsf %[in],%[out]", > + X86_FEATURE_BMI1, > + [out] "=&r" (val), > + [in] "r" (val), [max] "r" (max)); > + else > + asm ( "bsf %1,%0 ; cmovz %2,%k0" > + : "=&r" (val) : "r" (val), "r" (max) ); > return (unsigned int)val; > } > > --- a/xen/include/asm-x86/cpufeature.h > +++ b/xen/include/asm-x86/cpufeature.h > @@ -5,10 +5,8 @@ > */ > > #ifndef __ASM_I386_CPUFEATURE_H > +#ifndef X86_FEATURES_ONLY > #define __ASM_I386_CPUFEATURE_H > - > -#ifndef __ASSEMBLY__ > -#include <xen/bitops.h> > #endif > > #define NCAPINTS 8 /* N 32-bit words worth of info */ > @@ -155,7 +153,9 @@ > #define X86_FEATURE_ADX (7*32+19) /* ADCX, ADOX instructions */ > #define X86_FEATURE_SMAP (7*32+20) /* Supervisor Mode Access Prevention > */ > > -#ifndef __ASSEMBLY__ > +#if !defined(__ASSEMBLY__) && !defined(X86_FEATURES_ONLY) > +#include <xen/bitops.h> > + > #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) > #define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) > #define cpufeat_mask(idx) (1u << ((idx) & 31)) > @@ -262,6 +262,8 @@ struct cpuid4_info { > int cpuid4_cache_lookup(int index, struct cpuid4_info *this_leaf); > #endif > > +#undef X86_FEATURES_ONLY > + > #endif /* __ASM_I386_CPUFEATURE_H */ > > /* > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |