x86: use tzcnt instead of bsf Following a compiler change done in 2012, make use of the fact that for non-zero input BSF and TZCNT produce the same numeric result (EFLAGS setting differs), and that CPUs not knowing of TZCNT will treat the instruction as BSF (i.e. ignore what looks like a REP prefix to them). The assumption here is that TZCNT would never have worse performance than BSF. Also extend the asm() input in find_first_set_bit() to allow memory operands. Signed-off-by: Jan Beulich --- Thanks to Andrew for noticing that I forgot to post this for Xen after a similar change got accepted into the Linux kernel. --- a/xen/arch/x86/bitops.c +++ b/xen/arch/x86/bitops.c @@ -62,7 +62,7 @@ unsigned int __find_first_zero_bit( " je 2f\n\t" " xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t" " jz 1b\n\t" - " bsf %3,%0\n\t" + " rep; bsf %3,%0\n\t" " lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t" "2: sub %%ebx,%%edi\n\t" " shl $3,%%edi\n\t" --- a/xen/arch/x86/hvm/vpic.c +++ b/xen/arch/x86/hvm/vpic.c @@ -56,7 +56,7 @@ static int vpic_get_priority(struct hvm_ return VPIC_PRIO_NONE; /* prio = ffs(mask ROR vpic->priority_add); */ - asm ( "ror %%cl,%b1 ; bsf %1,%0" + asm ( "ror %%cl,%b1 ; rep; bsf %1,%0" : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) ); return prio; } --- a/xen/include/asm-x86/bitops.h +++ b/xen/include/asm-x86/bitops.h @@ -382,7 +382,7 @@ static inline unsigned int __scanbit(uns */ static inline unsigned int find_first_set_bit(unsigned long word) { - asm ( "bsf %1,%0" : "=r" (word) : "r" (word) ); + asm ( "rep; bsf %1,%0" : "=r" (word) : "rm" (word) ); return (unsigned int)word; }