summaryrefslogtreecommitdiff
path: root/arch/powerpc/include/asm
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2020-07-24 23:14:20 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2020-07-27 00:01:23 +1000
commitaa65ff6b18e0366db1790609956a4ac7308c5668 (patch)
tree98fe1ff898cadd1de9508cb6213750838b641289 /arch/powerpc/include/asm
parent12d0b9d6c843e7dbe739ebefcf16c7e4a45e4e78 (diff)
powerpc/64s: Implement queued spinlocks and rwlocks
These have shown significantly improved performance and fairness when spinlock contention is moderate to high on very large systems. With this series including subsequent patches, on a 16 socket 1536 thread POWER9, a stress test such as same-file open/close from all CPUs gets big speedups, 11620op/s aggregate with simple spinlocks vs 384158op/s (33x faster), where the difference in throughput between the fastest and slowest thread goes from 7x to 1.4x. Thanks to the fast path being identical in terms of atomics and barriers (after a subsequent optimisation patch), single threaded performance is not changed (no measurable difference). On smaller systems, performance and fairness seems to be generally improved. Using dbench on tmpfs as a test (that starts to run into kernel spinlock contention), a 2-socket OpenPOWER POWER9 system was tested with bare metal and KVM guest configurations. Results can be found here: https://github.com/linuxppc/issues/issues/305#issuecomment-663487453 Observations are: - Queued spinlocks are equal when contention is insignificant, as expected and as measured with microbenchmarks. - When there is contention, on bare metal queued spinlocks have better throughput and max latency at all points. - When virtualised, queued spinlocks are slightly worse approaching peak throughput, but significantly better throughput and max latency at all points beyond peak, until queued spinlock maximum latency rises when clients are 2x vCPUs. The regressions haven't been analysed very well yet, there are a lot of things that can be tuned, particularly the paravirtualised locking, but the numbers already look like a good net win even on relatively small systems. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Waiman Long <longman@redhat.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20200724131423.1362108-4-npiggin@gmail.com
Diffstat (limited to 'arch/powerpc/include/asm')
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/qspinlock.h25
-rw-r--r--arch/powerpc/include/asm/spinlock.h5
-rw-r--r--arch/powerpc/include/asm/spinlock_types.h5
4 files changed, 36 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index dadbcf3a0b1e..27c2268dfd6c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -6,5 +6,6 @@ generated-y += syscall_table_spu.h
generic-y += export.h
generic-y += local64.h
generic-y += mcs_spinlock.h
+generic-y += qrwlock.h
generic-y += vtime.h
generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 000000000000..c49e33e24edd
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */
+
+#define smp_mb__after_spinlock() smp_mb()
+
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+ /*
+ * This barrier was added to simple spinlocks by commit 51d7d5205d338,
+ * but it should now be possible to remove it, asm arm64 has done with
+ * commit c6f5d02b6a0f.
+ */
+ smp_mb();
+ return atomic_read(&lock->val);
+}
+#define queued_spin_is_locked queued_spin_is_locked
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 21357fe05fe0..434615f1d761 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -3,7 +3,12 @@
#define __ASM_SPINLOCK_H
#ifdef __KERNEL__
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
+#else
#include <asm/simple_spinlock.h>
+#endif
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 3906f52dae65..c5d742f18021 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -6,6 +6,11 @@
# error "please don't include this file directly"
#endif
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+#else
#include <asm/simple_spinlock_types.h>
+#endif
#endif