summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorChristophe Leroy <christophe.leroy@csgroup.eu>2020-10-22 14:05:46 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-01-09 13:37:37 +0100
commit404653452c4382236f0ff57f88a2dbad668f8ed7 (patch)
tree60807a5d07001668ffbdc5c1ed96973d2198605e /arch
parent5603bcc51324476bafd4bf4112857d93b4909993 (diff)
powerpc/bitops: Fix possible undefined behaviour with fls() and fls64()
[ Upstream commit 1891ef21d92c4801ea082ee8ed478e304ddc6749 ] fls() and fls64() are using __builtin_ctz() and _builtin_ctzll(). On powerpc, those builtins trivially use ctlzw and ctlzd power instructions. Allthough those instructions provide the expected result with input argument 0, __builtin_ctz() and __builtin_ctzll() are documented as undefined for value 0. The easiest fix would be to use fls() and fls64() functions defined in include/asm-generic/bitops/builtin-fls.h and include/asm-generic/bitops/fls64.h, but GCC output is not optimal: 00000388 <testfls>: 388: 2c 03 00 00 cmpwi r3,0 38c: 41 82 00 10 beq 39c <testfls+0x14> 390: 7c 63 00 34 cntlzw r3,r3 394: 20 63 00 20 subfic r3,r3,32 398: 4e 80 00 20 blr 39c: 38 60 00 00 li r3,0 3a0: 4e 80 00 20 blr 000003b0 <testfls64>: 3b0: 2c 03 00 00 cmpwi r3,0 3b4: 40 82 00 1c bne 3d0 <testfls64+0x20> 3b8: 2f 84 00 00 cmpwi cr7,r4,0 3bc: 38 60 00 00 li r3,0 3c0: 4d 9e 00 20 beqlr cr7 3c4: 7c 83 00 34 cntlzw r3,r4 3c8: 20 63 00 20 subfic r3,r3,32 3cc: 4e 80 00 20 blr 3d0: 7c 63 00 34 cntlzw r3,r3 3d4: 20 63 00 40 subfic r3,r3,64 3d8: 4e 80 00 20 blr When the input of fls(x) is a constant, just check x for nullity and return either 0 or __builtin_clz(x). Otherwise, use cntlzw instruction directly. For fls64() on PPC64, do the same but with __builtin_clzll() and cntlzd instruction. On PPC32, lets take the generic fls64() which will use our fls(). The result is as expected: 00000388 <testfls>: 388: 7c 63 00 34 cntlzw r3,r3 38c: 20 63 00 20 subfic r3,r3,32 390: 4e 80 00 20 blr 000003a0 <testfls64>: 3a0: 2c 03 00 00 cmpwi r3,0 3a4: 40 82 00 10 bne 3b4 <testfls64+0x14> 3a8: 7c 83 00 34 cntlzw r3,r4 3ac: 20 63 00 20 subfic r3,r3,32 3b0: 4e 80 00 20 blr 3b4: 7c 63 00 34 cntlzw r3,r3 3b8: 20 63 00 40 subfic r3,r3,64 3bc: 4e 80 00 20 blr Fixes: 2fcff790dcb4 ("powerpc: Use builtin functions for fls()/__fls()/fls64()") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Segher Boessenkool <segher@kernel.crashing.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/348c2d3f19ffcff8abe50d52513f989c4581d000.1603375524.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/bitops.h23
1 files changed, 21 insertions, 2 deletions
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index b750ffef83c7..0ec93d940d12 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -220,15 +220,34 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
*/
static __inline__ int fls(unsigned int x)
{
- return 32 - __builtin_clz(x);
+ int lz;
+
+ if (__builtin_constant_p(x))
+ return x ? 32 - __builtin_clz(x) : 0;
+ asm("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+ return 32 - lz;
}
#include <asm-generic/bitops/builtin-__fls.h>
+/*
+ * 64-bit can do this using one cntlzd (count leading zeroes doubleword)
+ * instruction; for 32-bit we use the generic version, which does two
+ * 32-bit fls calls.
+ */
+#ifdef CONFIG_PPC64
static __inline__ int fls64(__u64 x)
{
- return 64 - __builtin_clzll(x);
+ int lz;
+
+ if (__builtin_constant_p(x))
+ return x ? 64 - __builtin_clzll(x) : 0;
+ asm("cntlzd %0,%1" : "=r" (lz) : "r" (x));
+ return 64 - lz;
}
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
#ifdef CONFIG_PPC64
unsigned int __arch_hweight8(unsigned int w);