[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86: also use tzcnt instead of bsf in __scanbit()



On 26/01/15 12:03, Jan Beulich wrote:
> ... when available, i.e. by runtime patching. This saves the
> conditional move, having a back-to-back dependency on BSF's (EFLAGS)
> result.
>
> The need to include asm/cpufeatures.h from asm/bitops.h requires a
> workaround for an otherwise resulting circular header file dependency:
> Provide a mode by which the including site of the former header can
> request to only get the X86_FEATURE_* defines (and very little more)
> from it, allowing it to nevertheless be included in its entirety later
> on.
>
> While doing this I also noticed that the function's "max" parameter was
> pointlessly "unsigned long" - the function only returning
> "unsigned int", this can't be of any use, and hence gets converted at
> once, along with the necessary adjustments to CMOVZ's output operands.
>
> Note that while only alternative_io() is needed by this change (and
> hence gets pulled over from Linux), for completeness its input-only
> counterpart alternative_input() gets added as well.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

>
> --- a/xen/include/asm-x86/alternative.h
> +++ b/xen/include/asm-x86/alternative.h
> @@ -12,6 +12,7 @@
>          .byte \alt_len
>  .endm
>  #else
> +#include <xen/stringify.h>
>  #include <xen/types.h>
>  
>  struct alt_instr {
> @@ -73,6 +74,26 @@ extern void alternative_instructions(voi
>  #define alternative(oldinstr, newinstr, feature)                        \
>          asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : 
> "memory")
>  
> +/*
> + * Alternative inline assembly with input.
> + *
> + * Pecularities:
> + * No memory clobber here.
> + * Argument numbers start with 1.
> + * Best is to use constraints that are fixed size (like (%1) ... "r")
> + * If you use variable sized constraints like "m" or "g" in the
> + * replacement make sure to pad to the worst case length.
> + * Leaving an unused argument 0 to keep API compatibility.
> + */
> +#define alternative_input(oldinstr, newinstr, feature, input...)     \
> +     asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
> +             : : "i" (0), ## input)
> +
> +/* Like alternative_input, but with a single output argument */
> +#define alternative_io(oldinstr, newinstr, feature, output, input...)        
> \
> +     asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
> +             : output : "i" (0), ## input)
> +
>  #endif  /*  __ASSEMBLY__  */
>  
>  #endif /* __X86_ALTERNATIVE_H__ */
> --- a/xen/include/asm-x86/bitops.h
> +++ b/xen/include/asm-x86/bitops.h
> @@ -5,7 +5,9 @@
>   * Copyright 1992, Linus Torvalds.
>   */
>  
> -#include <xen/config.h>
> +#include <asm/alternative.h>
> +#define X86_FEATURES_ONLY
> +#include <asm/cpufeature.h>
>  
>  /*
>   * We specify the memory operand as both input and output because the memory
> @@ -313,9 +315,17 @@ extern unsigned int __find_first_zero_bi
>  extern unsigned int __find_next_zero_bit(
>      const unsigned long *addr, unsigned int size, unsigned int offset);
>  
> -static inline unsigned int __scanbit(unsigned long val, unsigned long max)
> +static inline unsigned int __scanbit(unsigned long val, unsigned int max)
>  {
> -    asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) );
> +    if ( __builtin_constant_p(max) && max == BITS_PER_LONG )
> +        alternative_io("bsf %[in],%[out]; cmovz %[max],%k[out]",
> +                       "rep; bsf %[in],%[out]",
> +                       X86_FEATURE_BMI1,
> +                       [out] "=&r" (val),
> +                       [in] "r" (val), [max] "r" (max));
> +    else
> +        asm ( "bsf %1,%0 ; cmovz %2,%k0"
> +              : "=&r" (val) : "r" (val), "r" (max) );
>      return (unsigned int)val;
>  }
>  
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -5,10 +5,8 @@
>   */
>  
>  #ifndef __ASM_I386_CPUFEATURE_H
> +#ifndef X86_FEATURES_ONLY
>  #define __ASM_I386_CPUFEATURE_H
> -
> -#ifndef __ASSEMBLY__
> -#include <xen/bitops.h>
>  #endif
>  
>  #define NCAPINTS     8       /* N 32-bit words worth of info */
> @@ -155,7 +153,9 @@
>  #define X86_FEATURE_ADX              (7*32+19) /* ADCX, ADOX instructions */
>  #define X86_FEATURE_SMAP     (7*32+20) /* Supervisor Mode Access Prevention 
> */
>  
> -#ifndef __ASSEMBLY__
> +#if !defined(__ASSEMBLY__) && !defined(X86_FEATURES_ONLY)
> +#include <xen/bitops.h>
> +
>  #define cpu_has(c, bit)              test_bit(bit, (c)->x86_capability)
>  #define boot_cpu_has(bit)    test_bit(bit, boot_cpu_data.x86_capability)
>  #define cpufeat_mask(idx)       (1u << ((idx) & 31))
> @@ -262,6 +262,8 @@ struct cpuid4_info {
>  int cpuid4_cache_lookup(int index, struct cpuid4_info *this_leaf);
>  #endif
>  
> +#undef X86_FEATURES_ONLY
> +
>  #endif /* __ASM_I386_CPUFEATURE_H */
>  
>  /* 
>
>



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.