Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar: "The locking tree was busier in this cycle than the usual pattern - a couple of major projects happened to coincide. The main changes are: - implement the atomic_fetch_{add,sub,and,or,xor}() API natively across all SMP architectures (Peter Zijlstra) - add atomic_fetch_{inc/dec}() as well, using the generic primitives (Davidlohr Bueso) - optimize various aspects of rwsems (Jason Low, Davidlohr Bueso, Waiman Long) - optimize smp_cond_load_acquire() on arm64 and implement LSE based atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() on arm64 (Will Deacon) - introduce smp_acquire__after_ctrl_dep() and fix various barrier mis-uses and bugs (Peter Zijlstra) - after discovering ancient spin_unlock_wait() barrier bugs in its implementation and usage, strengthen its semantics and update/fix usage sites (Peter Zijlstra) - optimize mutex_trylock() fastpath (Peter Zijlstra) - ... misc fixes and cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (67 commits) locking/atomic: Introduce inc/dec variants for the atomic_fetch_$op() API locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() locking/static_keys: Fix non static symbol Sparse warning locking/qspinlock: Use __this_cpu_dec() instead of full-blown this_cpu_dec() locking/atomic, arch/tile: Fix tilepro build locking/atomic, arch/m68k: Remove comment locking/atomic, arch/arc: Fix build locking/Documentation: Clarify limited control-dependency scope locking/atomic, arch/rwsem: Employ atomic_long_fetch_add() locking/atomic, arch/qrwlock: Employ atomic_fetch_add_acquire() locking/atomic, arch/mips: Convert to _relaxed atomics locking/atomic, arch/alpha: Convert to _relaxed atomics locking/atomic: Remove the deprecated atomic_{set,clear}_mask() functions locking/atomic: Remove linux/atomic.h:atomic_fetch_or() locking/atomic: Implement atomic{,64,_long}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() locking/atomic: Fix atomic64_relaxed() bits locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}() locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-25 12:41:29 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-25 12:41:29 -0700
commit: c86ad14d305d2429c3da19462440bac50c183def (patch)
tree: bd794cd72476661faf82c440063c217bb978ce44 /arch/arm
parent: a2303849a6b4b7ba59667091e00d6bb194071d9a (diff)
parent: f06628638cf6e75f179742b6c1b35076965b9fdd (diff)
2 files changed, 113 insertions, 12 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9e10c4567eb4..66d0e215a773 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned long tmp;						\
+	int result, val;						\
+									\
+	prefetchw(&v->counter);						\
+									\
+	__asm__ __volatile__("@ atomic_fetch_" #op "\n"			\
+"1:	ldrex	%0, [%4]\n"						\
+"	" #asm_op "	%1, %0, %5\n"					\
+"	strex	%2, %1, [%4]\n"						\
+"	teq	%2, #0\n"						\
+"	bne	1b"							\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)	\
+	: "r" (&v->counter), "Ir" (i)					\
+	: "cc");							\
+									\
+	return result;							\
+}
+
 #define atomic_add_return_relaxed	atomic_add_return_relaxed
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
 
 static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
@@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	int val;							\
+									\
+	raw_local_irq_save(flags);					\
+	val = v->counter;						\
+	v->counter c_op i;						\
+	raw_local_irq_restore(flags);					\
+									\
+	return val;							\
+}
+
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or,  |=, orr)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or,  |=, orr)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v)		\
 	return result;							\
 }
 
+#define ATOMIC64_FETCH_OP(op, op1, op2)					\
+static inline long long							\
+atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v)		\
+{									\
+	long long result, val;						\
+	unsigned long tmp;						\
+									\
+	prefetchw(&v->counter);						\
+									\
+	__asm__ __volatile__("@ atomic64_fetch_" #op "\n"		\
+"1:	ldrexd	%0, %H0, [%4]\n"					\
+"	" #op1 " %Q1, %Q0, %Q5\n"					\
+"	" #op2 " %R1, %R0, %R5\n"					\
+"	strexd	%2, %1, %H1, [%4]\n"					\
+"	teq	%2, #0\n"						\
+"	bne	1b"							\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)	\
+	: "r" (&v->counter), "r" (i)					\
+	: "cc");							\
+									\
+	return result;							\
+}
+
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
 
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
 #define atomic64_add_return_relaxed	atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
 
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC64_OP(and, and, and)
-ATOMIC64_OP(andnot, bic, bic)
-ATOMIC64_OP(or,  orr, orr)
-ATOMIC64_OP(xor, eor, eor)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or,  orr, orr)
+ATOMIC64_OPS(xor, eor, eor)
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0fa418463f49..4bec45442072 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -6,6 +6,8 @@
 #endif
 
 #include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
  * memory.
  */
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->tickets.owner);
+
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.tickets.owner == tmp.tickets.next ||
+		    tmp.tickets.owner != owner)
+			break;
+
+		wfe();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-25 12:41:29 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-25 12:41:29 -0700
commit	c86ad14d305d2429c3da19462440bac50c183def (patch)
tree	bd794cd72476661faf82c440063c217bb978ce44 /arch/arm
parent	a2303849a6b4b7ba59667091e00d6bb194071d9a (diff)
parent	f06628638cf6e75f179742b6c1b35076965b9fdd (diff)