summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-01-26 12:18:05 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-05-07 16:00:37 -0400
commitfb3e29a7c0876c9c0cd732df04e31c43203ea790 (patch)
tree099ac80a904cbd0657bc4e2e9c399fd035d4e5ba
parentfb0f40125feec3de7ef4524600ac83946207117e (diff)
eytzinger: Promote to include/linux/
eytzinger trees are a faster alternative to binary search. They're a bit more expensive to setup, but lookups perform much better assuming the tree isn't entirely in cache. Binary search is a worst case scenario for branch prediction and prefetching, but eytzinger trees have children adjacent in memory and thus we can prefetch before knowing the result of a comparison. An eytzinger tree is a binary tree laid out in an array, with the same geometry as the usual binary heap construction, but used as a search tree instead. Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--MAINTAINERS6
-rw-r--r--fs/bcachefs/Makefile1
-rw-r--r--fs/bcachefs/bset.c2
-rw-r--r--fs/bcachefs/eytzinger.c234
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c3
-rw-r--r--fs/bcachefs/replicas.h3
-rw-r--r--fs/bcachefs/super-io.h2
-rw-r--r--fs/bcachefs/time_stats.c2
-rw-r--r--fs/bcachefs/util.c3
-rw-r--r--include/linux/eytzinger.h (renamed from fs/bcachefs/eytzinger.h)6
-rw-r--r--lib/sort.c89
11 files changed, 106 insertions, 245 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e88a9a24a5eb..b2fd98f16e6f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8152,6 +8152,12 @@ L: iommu@lists.linux.dev
S: Maintained
F: drivers/iommu/exynos-iommu.c
+EYTZINGER TREE LIB
+M: Kent Overstreet <kent.overstreet@linux.dev>
+L: linux-bcachefs@vger.kernel.org
+S: Maintained
+F: include/linux/eytzinger.h
+
F2FS FILE SYSTEM
M: Jaegeuk Kim <jaegeuk@kernel.org>
M: Chao Yu <chao@kernel.org>
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 66ca0bbee639..73b23f60c33d 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -38,7 +38,6 @@ bcachefs-y := \
error.o \
extents.o \
extent_update.o \
- eytzinger.o \
fs.o \
fs-common.o \
fs-ioctl.o \
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 3bb477840eab..ff796d1edb73 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -9,12 +9,12 @@
#include "bcachefs.h"
#include "btree_cache.h"
#include "bset.h"
-#include "eytzinger.h"
#include "trace.h"
#include "util.h"
#include <asm/unaligned.h>
#include <linux/console.h>
+#include <linux/eytzinger.h>
#include <linux/random.h>
#include <linux/prefetch.h>
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
deleted file mode 100644
index 0f955c3c76a7..000000000000
--- a/fs/bcachefs/eytzinger.c
+++ /dev/null
@@ -1,234 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include "eytzinger.h"
-
-/**
- * is_aligned - is this pointer & size okay for word-wide copying?
- * @base: pointer to data
- * @size: size of each element
- * @align: required alignment (typically 4 or 8)
- *
- * Returns true if elements can be copied using word loads and stores.
- * The size must be a multiple of the alignment, and the base address must
- * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
- *
- * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
- * to "if ((a | b) & mask)", so we do that by hand.
- */
-__attribute_const__ __always_inline
-static bool is_aligned(const void *base, size_t size, unsigned char align)
-{
- unsigned char lsbits = (unsigned char)size;
-
- (void)base;
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- lsbits |= (unsigned char)(uintptr_t)base;
-#endif
- return (lsbits & (align - 1)) == 0;
-}
-
-/**
- * swap_words_32 - swap two elements in 32-bit chunks
- * @a: pointer to the first element to swap
- * @b: pointer to the second element to swap
- * @n: element size (must be a multiple of 4)
- *
- * Exchange the two objects in memory. This exploits base+index addressing,
- * which basically all CPUs have, to minimize loop overhead computations.
- *
- * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
- * bottom of the loop, even though the zero flag is still valid from the
- * subtract (since the intervening mov instructions don't alter the flags).
- * Gcc 8.1.0 doesn't have that problem.
- */
-static void swap_words_32(void *a, void *b, size_t n)
-{
- do {
- u32 t = *(u32 *)(a + (n -= 4));
- *(u32 *)(a + n) = *(u32 *)(b + n);
- *(u32 *)(b + n) = t;
- } while (n);
-}
-
-/**
- * swap_words_64 - swap two elements in 64-bit chunks
- * @a: pointer to the first element to swap
- * @b: pointer to the second element to swap
- * @n: element size (must be a multiple of 8)
- *
- * Exchange the two objects in memory. This exploits base+index
- * addressing, which basically all CPUs have, to minimize loop overhead
- * computations.
- *
- * We'd like to use 64-bit loads if possible. If they're not, emulating
- * one requires base+index+4 addressing which x86 has but most other
- * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
- * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
- * x32 ABI). Are there any cases the kernel needs to worry about?
- */
-static void swap_words_64(void *a, void *b, size_t n)
-{
- do {
-#ifdef CONFIG_64BIT
- u64 t = *(u64 *)(a + (n -= 8));
- *(u64 *)(a + n) = *(u64 *)(b + n);
- *(u64 *)(b + n) = t;
-#else
- /* Use two 32-bit transfers to avoid base+index+4 addressing */
- u32 t = *(u32 *)(a + (n -= 4));
- *(u32 *)(a + n) = *(u32 *)(b + n);
- *(u32 *)(b + n) = t;
-
- t = *(u32 *)(a + (n -= 4));
- *(u32 *)(a + n) = *(u32 *)(b + n);
- *(u32 *)(b + n) = t;
-#endif
- } while (n);
-}
-
-/**
- * swap_bytes - swap two elements a byte at a time
- * @a: pointer to the first element to swap
- * @b: pointer to the second element to swap
- * @n: element size
- *
- * This is the fallback if alignment doesn't allow using larger chunks.
- */
-static void swap_bytes(void *a, void *b, size_t n)
-{
- do {
- char t = ((char *)a)[--n];
- ((char *)a)[n] = ((char *)b)[n];
- ((char *)b)[n] = t;
- } while (n);
-}
-
-/*
- * The values are arbitrary as long as they can't be confused with
- * a pointer, but small integers make for the smallest compare
- * instructions.
- */
-#define SWAP_WORDS_64 (swap_r_func_t)0
-#define SWAP_WORDS_32 (swap_r_func_t)1
-#define SWAP_BYTES (swap_r_func_t)2
-#define SWAP_WRAPPER (swap_r_func_t)3
-
-struct wrapper {
- cmp_func_t cmp;
- swap_func_t swap_func;
-};
-
-/*
- * The function pointer is last to make tail calls most efficient if the
- * compiler decides not to inline this function.
- */
-static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
-{
- if (swap_func == SWAP_WRAPPER) {
- ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
- return;
- }
-
- if (swap_func == SWAP_WORDS_64)
- swap_words_64(a, b, size);
- else if (swap_func == SWAP_WORDS_32)
- swap_words_32(a, b, size);
- else if (swap_func == SWAP_BYTES)
- swap_bytes(a, b, size);
- else
- swap_func(a, b, (int)size, priv);
-}
-
-#define _CMP_WRAPPER ((cmp_r_func_t)0L)
-
-static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
-{
- if (cmp == _CMP_WRAPPER)
- return ((const struct wrapper *)priv)->cmp(a, b);
- return cmp(a, b, priv);
-}
-
-static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
- cmp_r_func_t cmp_func, const void *priv,
- size_t l, size_t r)
-{
- return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- cmp_func, priv);
-}
-
-static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
- swap_r_func_t swap_func, const void *priv,
- size_t l, size_t r)
-{
- do_swap(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size, swap_func, priv);
-}
-
-void eytzinger0_sort_r(void *base, size_t n, size_t size,
- cmp_r_func_t cmp_func,
- swap_r_func_t swap_func,
- const void *priv)
-{
- int i, c, r;
-
- /* called from 'sort' without swap function, let's pick the default */
- if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
- swap_func = NULL;
-
- if (!swap_func) {
- if (is_aligned(base, size, 8))
- swap_func = SWAP_WORDS_64;
- else if (is_aligned(base, size, 4))
- swap_func = SWAP_WORDS_32;
- else
- swap_func = SWAP_BYTES;
- }
-
- /* heapify */
- for (i = n / 2 - 1; i >= 0; --i) {
- for (r = i; r * 2 + 1 < n; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < n &&
- eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
- c++;
-
- if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
- break;
-
- eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
- }
- }
-
- /* sort */
- for (i = n - 1; i > 0; --i) {
- eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
-
- for (r = 0; r * 2 + 1 < i; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < i &&
- eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
- c++;
-
- if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
- break;
-
- eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
- }
- }
-}
-
-void eytzinger0_sort(void *base, size_t n, size_t size,
- cmp_func_t cmp_func,
- swap_func_t swap_func)
-{
- struct wrapper w = {
- .cmp = cmp_func,
- .swap_func = swap_func,
- };
-
- return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
-}
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index 37a024e034d4..bdba48abae9c 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -2,10 +2,11 @@
#include "bcachefs.h"
#include "btree_iter.h"
-#include "eytzinger.h"
#include "journal_seq_blacklist.h"
#include "super-io.h"
+#include <linux/eytzinger.h>
+
/*
* journal_seq_blacklist machinery:
*
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index 654a4b26d3a3..983cce782ac2 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -3,9 +3,10 @@
#define _BCACHEFS_REPLICAS_H
#include "bkey.h"
-#include "eytzinger.h"
#include "replicas_types.h"
+#include <linux/eytzinger.h>
+
void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
void bch2_replicas_entry_to_text(struct printbuf *,
struct bch_replicas_entry_v1 *);
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index 95e80e06316b..f37620919e11 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -3,12 +3,12 @@
#define _BCACHEFS_SUPER_IO_H
#include "extents.h"
-#include "eytzinger.h"
#include "super_types.h"
#include "super.h"
#include "sb-members.h"
#include <asm/byteorder.h>
+#include <linux/eytzinger.h>
static inline bool bch2_version_compatible(u16 version)
{
diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c
index 4508e9dcbee2..e3be1a4bfc6e 100644
--- a/fs/bcachefs/time_stats.c
+++ b/fs/bcachefs/time_stats.c
@@ -7,7 +7,7 @@
#include <linux/time.h>
#include <linux/spinlock.h>
-#include "eytzinger.h"
+#include <linux/eytzinger.h>
#include "time_stats.h"
static const struct time_unit time_units[] = {
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 92c6ad75e702..7d245881f444 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -11,6 +11,7 @@
#include <linux/console.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
+#include <linux/eytzinger.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/log2.h>
@@ -23,8 +24,6 @@
#include <linux/types.h>
#include <linux/sched/clock.h>
-#include "eytzinger.h"
-#include "mean_and_variance.h"
#include "util.h"
static const char si_units[] = "?kMGTPEZY";
diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h
index 24840aee335c..a4122567b3a2 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/include/linux/eytzinger.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _EYTZINGER_H
-#define _EYTZINGER_H
+#ifndef _LINUX_EYTZINGER_H
+#define _LINUX_EYTZINGER_H
#include <linux/bitops.h>
#include <linux/log2.h>
@@ -303,4 +303,4 @@ void eytzinger0_sort_r(void *, size_t, size_t,
cmp_r_func_t, swap_r_func_t, const void *);
void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
-#endif /* _EYTZINGER_H */
+#endif /* _LINUX_EYTZINGER_H */
diff --git a/lib/sort.c b/lib/sort.c
index a0509088f82a..f7b3c8bb1a82 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -300,3 +300,92 @@ void sort(void *base, size_t num, size_t size,
return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
}
EXPORT_SYMBOL(sort);
+
+#include <linux/eytzinger.h>
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func, const void *priv,
+ size_t l, size_t r)
+{
+ return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+ swap_r_func_t swap_func, const void *priv,
+ size_t l, size_t r)
+{
+ do_swap(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ int i, c, r;
+
+ /* called from 'sort' without swap function, let's pick the default */
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+ swap_func = NULL;
+
+ if (!swap_func) {
+ if (is_aligned(base, size, 8))
+ swap_func = SWAP_WORDS_64;
+ else if (is_aligned(base, size, 4))
+ swap_func = SWAP_WORDS_32;
+ else
+ swap_func = SWAP_BYTES;
+ }
+
+ /* heapify */
+ for (i = n / 2 - 1; i >= 0; --i) {
+ for (r = i; r * 2 + 1 < n; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < n &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+
+ /* sort */
+ for (i = n - 1; i > 0; --i) {
+ eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+ for (r = 0; r * 2 + 1 < i; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < i &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(eytzinger0_sort_r);
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap = swap_func,
+ };
+
+ return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
+EXPORT_SYMBOL_GPL(eytzinger0_sort);