From a9ebf306f52c756c4f9e50ee9a60cd6389d71344 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Feb 2017 16:39:38 +0100 Subject: locking/atomic: Introduce atomic_try_cmpxchg() Add a new cmpxchg interface: bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new); Where the boolean returns the result of the compare; and thus if the exchange happened; and in case of failure, the new value of *ptr is returned in *val. This allows simplification/improvement of loops like: for (;;) { new = val $op $imm; old = cmpxchg(ptr, val, new); if (old == val) break; val = old; } into: do { } while (!try_cmpxchg(ptr, &val, val $op $imm)); while also generating better code (GCC6 and onwards). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux/atomic.h') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index e71835bf60a9..aae5953817d6 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -423,6 +423,27 @@ #endif #endif /* atomic_cmpxchg_relaxed */ +#ifndef atomic_try_cmpxchg + +#define __atomic_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __o = *__po; \ + *__po = atomic_cmpxchg##type((_p), __o, (_n)); \ + (*__po == __o); \ +}) + +#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) +#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic_try_cmpxchg */ +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + /* cmpxchg_relaxed */ #ifndef cmpxchg_relaxed #define cmpxchg_relaxed cmpxchg @@ -996,6 +1017,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_cmpxchg_relaxed */ +#ifndef atomic64_try_cmpxchg + +#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __o = *__po; \ + *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \ + (*__po == __o); \ +}) + +#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) +#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic64_try_cmpxchg */ +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit v1.2.3 From 44fe84459faf1a7781595b7c64cd36daf2f2827d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Mar 2017 13:54:38 +0200 Subject: locking/atomic: Fix atomic_try_cmpxchg() semantics Dmitry noted that the new atomic_try_cmpxchg() primitive is broken when the old pointer doesn't point to the local stack. He writes: "Consider a classical lock-free stack push: node->next = atomic_read(&head); do { } while (!atomic_try_cmpxchg(&head, &node->next, node)); This code is broken with the current implementation, the problem is with unconditional update of *__po. In case of success it writes the same value back into *__po, but in case of cmpxchg success we might have lose ownership of some memory locations and potentially over what __po has pointed to. The same holds for the re-read of *__po. " He also points out that this makes it surprisingly different from the similar C/C++ atomic operation. After investigating the code-gen differences caused by this patch; and a number of alternatives (Linus dislikes this interface lots), we arrived at these results (size x86_64-defconfig/vmlinux): GCC-6.3.0: 10735757 cmpxchg 10726413 try_cmpxchg 10730509 try_cmpxchg + patch 10730445 try_cmpxchg-linus GCC-7 (20170327): 10709514 cmpxchg 10704266 try_cmpxchg 10704266 try_cmpxchg + patch 10704394 try_cmpxchg-linus From this we see that the patch has the advantage of better code-gen on GCC-7 and keeps the interface roughly consistent with the C language variant. Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: a9ebf306f52c ("locking/atomic: Introduce atomic_try_cmpxchg()") Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cmpxchg.h | 5 +++-- include/linux/atomic.h | 16 ++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux/atomic.h') diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index fb961db51a2a..d90296d061e8 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -212,8 +212,9 @@ extern void __add_wrong_size(void) default: \ __cmpxchg_wrong_size(); \ } \ - *_old = __old; \ - success; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); \ }) #define __try_cmpxchg(ptr, pold, new, size) \ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index aae5953817d6..c56be7410130 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -428,9 +428,11 @@ #define __atomic_try_cmpxchg(type, _p, _po, _n) \ ({ \ typeof(_po) __po = (_po); \ - typeof(*(_po)) __o = *__po; \ - *__po = atomic_cmpxchg##type((_p), __o, (_n)); \ - (*__po == __o); \ + typeof(*(_po)) __r, __o = *__po; \ + __r = atomic_cmpxchg##type((_p), __o, (_n)); \ + if (unlikely(__r != __o)) \ + *__po = __r; \ + likely(__r == __o); \ }) #define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) @@ -1022,9 +1024,11 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define __atomic64_try_cmpxchg(type, _p, _po, _n) \ ({ \ typeof(_po) __po = (_po); \ - typeof(*(_po)) __o = *__po; \ - *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \ - (*__po == __o); \ + typeof(*(_po)) __r, __o = *__po; \ + __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ + if (unlikely(__r != __o)) \ + *__po = __r; \ + likely(__r == __o); \ }) #define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) -- cgit v1.2.3