Update bcachefs sources to 50847e296b34 bcachefs: Check subvol <-> inode pointers in check_inode()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
author: Kent Overstreet <kent.overstreet@linux.dev> 2024-02-05 23:09:25 -0500
committer: Kent Overstreet <kent.overstreet@linux.dev> 2024-02-06 01:07:16 -0500
commit: f3f005c76eb5636542a8f5b137bd1904d57e8f86 (patch)
tree: ecada3604cb1668411386264ed92d0e61536a93a /include/linux
parent: 1ef396b684a419b5a50ab215103486d189068800 (diff)
12 files changed, 908 insertions, 65 deletions
diff --git a/include/linux/darray.h b/include/linux/darray.h
new file mode 100644
index 00000000..ff167eb7
--- /dev/null
+++ b/include/linux/darray.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
+ */
+#ifndef _LINUX_DARRAY_H
+#define _LINUX_DARRAY_H
+
+/*
+ * Dynamic arrays
+ *
+ * Inspired by CCAN's darray
+ */
+
+#include <linux/darray_types.h>
+#include <linux/slab.h>
+
+int __darray_resize_slowpath(darray_char *, size_t, size_t, gfp_t);
+
+static inline int __darray_resize(darray_char *d, size_t element_size,
+				  size_t new_size, gfp_t gfp)
+{
+	return unlikely(new_size > d->size)
+		? __darray_resize_slowpath(d, element_size, new_size, gfp)
+		: 0;
+}
+
+#define darray_resize_gfp(_d, _new_size, _gfp)				\
+	unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))
+
+#define darray_resize(_d, _new_size)					\
+	darray_resize_gfp(_d, _new_size, GFP_KERNEL)
+
+static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
+{
+	return __darray_resize(d, t_size, d->nr + more, gfp);
+}
+
+#define darray_make_room_gfp(_d, _more, _gfp)				\
+	__darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
+
+#define darray_make_room(_d, _more)					\
+	darray_make_room_gfp(_d, _more, GFP_KERNEL)
+
+#define darray_room(_d)		((_d).size - (_d).nr)
+
+#define darray_top(_d)		((_d).data[(_d).nr])
+
+#define darray_push_gfp(_d, _item, _gfp)				\
+({									\
+	int _ret = darray_make_room_gfp((_d), 1, _gfp);			\
+									\
+	if (!_ret)							\
+		(_d)->data[(_d)->nr++] = (_item);			\
+	_ret;								\
+})
+
+#define darray_push(_d, _item)	darray_push_gfp(_d, _item, GFP_KERNEL)
+
+#define darray_pop(_d)		((_d)->data[--(_d)->nr])
+
+#define darray_first(_d)	((_d).data[0])
+#define darray_last(_d)		((_d).data[(_d).nr - 1])
+
+/* Insert/remove items into the middle of a darray: */
+
+#define array_insert_item(_array, _nr, _pos, _new_item)			\
+do {									\
+	memmove(&(_array)[(_pos) + 1],					\
+		&(_array)[(_pos)],					\
+		sizeof((_array)[0]) * ((_nr) - (_pos)));		\
+	(_nr)++;							\
+	(_array)[(_pos)] = (_new_item);					\
+} while (0)
+
+#define array_remove_items(_array, _nr, _pos, _nr_to_remove)		\
+do {									\
+	(_nr) -= (_nr_to_remove);					\
+	memmove(&(_array)[(_pos)],					\
+		&(_array)[(_pos) + (_nr_to_remove)],			\
+		sizeof((_array)[0]) * ((_nr) - (_pos)));		\
+} while (0)
+
+#define array_remove_item(_array, _nr, _pos)				\
+	array_remove_items(_array, _nr, _pos, 1)
+
+#define darray_insert_item(_d, pos, _item)				\
+({									\
+	size_t _pos = (pos);						\
+	int _ret = darray_make_room((_d), 1);				\
+									\
+	if (!_ret)							\
+		array_insert_item((_d)->data, (_d)->nr, _pos, (_item));	\
+	_ret;								\
+})
+
+#define darray_remove_items(_d, _pos, _nr_to_remove)			\
+	array_remove_items((_d)->data, (_d)->nr, (_pos) - (_d)->data, _nr_to_remove)
+
+#define darray_remove_item(_d, _pos)					\
+	darray_remove_items(_d, _pos, 1)
+
+/* Iteration: */
+
+#define __darray_for_each(_d, _i)					\
+	for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++)
+
+#define darray_for_each(_d, _i)						\
+	for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++)
+
+#define darray_for_each_reverse(_d, _i)					\
+	for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
+
+#define darray_init(_d)							\
+do {									\
+	(_d)->nr = 0;							\
+	(_d)->size = ARRAY_SIZE((_d)->preallocated);			\
+	(_d)->data = (_d)->size ? (_d)->preallocated : NULL;		\
+} while (0)
+
+#define darray_exit(_d)							\
+do {									\
+	if (!ARRAY_SIZE((_d)->preallocated) ||				\
+	    (_d)->data != (_d)->preallocated)				\
+		kvfree((_d)->data);					\
+	darray_init(_d);						\
+} while (0)
+
+#endif /* _LINUX_DARRAY_H */
diff --git a/include/linux/darray_types.h b/include/linux/darray_types.h
new file mode 100644
index 00000000..a400a0c3
--- /dev/null
+++ b/include/linux/darray_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
+ */
+#ifndef _LINUX_DARRAY_TYpES_H
+#define _LINUX_DARRAY_TYpES_H
+
+#include <linux/types.h>
+
+#define DARRAY_PREALLOCATED(_type, _nr)					\
+struct {								\
+	size_t nr, size;						\
+	_type *data;							\
+	_type preallocated[_nr];					\
+}
+
+#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
+
+typedef DARRAY(char)	darray_char;
+typedef DARRAY(char *)	darray_str;
+
+#endif /* _LINUX_DARRAY_TYpES_H */
diff --git a/include/linux/eytzinger.h b/include/linux/eytzinger.h
new file mode 100644
index 00000000..9565a5c2
--- /dev/null
+++ b/include/linux/eytzinger.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_EYTZINGER_H
+#define _LINUX_EYTZINGER_H
+
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#ifdef EYTZINGER_DEBUG
+#define EYTZINGER_BUG_ON(cond)		BUG_ON(cond)
+#else
+#define EYTZINGER_BUG_ON(cond)
+#endif
+
+/*
+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
+ * array.
+ *
+ * Consider using an eytzinger tree any time you would otherwise be doing binary
+ * search over an array. Binary search is a worst case scenario for branch
+ * prediction and prefetching, but in an eytzinger tree every node's children
+ * are adjacent in memory, thus we can prefetch children before knowing the
+ * result of the comparison, assuming multiple nodes fit on a cacheline.
+ *
+ * Two variants are provided, for one based indexing and zero based indexing.
+ *
+ * Zero based indexing is more convenient, but one based indexing has better
+ * alignment and thus better performance because each new level of the tree
+ * starts at a power of two, and thus if element 0 was cacheline aligned, each
+ * new level will be as well.
+ */
+
+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
+{
+	EYTZINGER_BUG_ON(child > 1);
+
+	return (i << 1) + child;
+}
+
+static inline unsigned eytzinger1_left_child(unsigned i)
+{
+	return eytzinger1_child(i, 0);
+}
+
+static inline unsigned eytzinger1_right_child(unsigned i)
+{
+	return eytzinger1_child(i, 1);
+}
+
+static inline unsigned eytzinger1_first(unsigned size)
+{
+	return rounddown_pow_of_two(size);
+}
+
+static inline unsigned eytzinger1_last(unsigned size)
+{
+	return rounddown_pow_of_two(size + 1) - 1;
+}
+
+/*
+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
+ *
+ * eytzinger1_next(0) == eytzinger1_first())
+ * eytzinger1_prev(0) == eytzinger1_last())
+ *
+ * eytzinger1_prev(eytzinger1_first()) == 0
+ * eytzinger1_next(eytzinger1_last()) == 0
+ */
+
+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
+{
+	EYTZINGER_BUG_ON(i > size);
+
+	if (eytzinger1_right_child(i) <= size) {
+		i = eytzinger1_right_child(i);
+
+		i <<= __fls(size + 1) - __fls(i);
+		i >>= i > size;
+	} else {
+		i >>= ffz(i) + 1;
+	}
+
+	return i;
+}
+
+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
+{
+	EYTZINGER_BUG_ON(i > size);
+
+	if (eytzinger1_left_child(i) <= size) {
+		i = eytzinger1_left_child(i) + 1;
+
+		i <<= __fls(size + 1) - __fls(i);
+		i -= 1;
+		i >>= i > size;
+	} else {
+		i >>= __ffs(i) + 1;
+	}
+
+	return i;
+}
+
+static inline unsigned eytzinger1_extra(unsigned size)
+{
+	return (size + 1 - rounddown_pow_of_two(size)) << 1;
+}
+
+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
+					      unsigned extra)
+{
+	unsigned b = __fls(i);
+	unsigned shift = __fls(size) - b;
+	int s;
+
+	EYTZINGER_BUG_ON(!i || i > size);
+
+	i  ^= 1U << b;
+	i <<= 1;
+	i  |= 1;
+	i <<= shift;
+
+	/*
+	 * sign bit trick:
+	 *
+	 * if (i > extra)
+	 *	i -= (i - extra) >> 1;
+	 */
+	s = extra - i;
+	i += (s >> 1) & (s >> 31);
+
+	return i;
+}
+
+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
+					       unsigned extra)
+{
+	unsigned shift;
+	int s;
+
+	EYTZINGER_BUG_ON(!i || i > size);
+
+	/*
+	 * sign bit trick:
+	 *
+	 * if (i > extra)
+	 *	i += i - extra;
+	 */
+	s = extra - i;
+	i -= s & (s >> 31);
+
+	shift = __ffs(i);
+
+	i >>= shift + 1;
+	i  |= 1U << (__fls(size) - shift);
+
+	return i;
+}
+
+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
+{
+	return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
+{
+	return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
+}
+
+#define eytzinger1_for_each(_i, _size)			\
+	for (unsigned (_i) = eytzinger1_first((_size));	\
+	     (_i) != 0;					\
+	     (_i) = eytzinger1_next((_i), (_size)))
+
+/* Zero based indexing version: */
+
+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
+{
+	EYTZINGER_BUG_ON(child > 1);
+
+	return (i << 1) + 1 + child;
+}
+
+static inline unsigned eytzinger0_left_child(unsigned i)
+{
+	return eytzinger0_child(i, 0);
+}
+
+static inline unsigned eytzinger0_right_child(unsigned i)
+{
+	return eytzinger0_child(i, 1);
+}
+
+static inline unsigned eytzinger0_first(unsigned size)
+{
+	return eytzinger1_first(size) - 1;
+}
+
+static inline unsigned eytzinger0_last(unsigned size)
+{
+	return eytzinger1_last(size) - 1;
+}
+
+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
+{
+	return eytzinger1_next(i + 1, size) - 1;
+}
+
+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
+{
+	return eytzinger1_prev(i + 1, size) - 1;
+}
+
+static inline unsigned eytzinger0_extra(unsigned size)
+{
+	return eytzinger1_extra(size);
+}
+
+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
+					       unsigned extra)
+{
+	return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
+}
+
+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
+					       unsigned extra)
+{
+	return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
+}
+
+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
+{
+	return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
+{
+	return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
+}
+
+#define eytzinger0_for_each(_i, _size)			\
+	for (unsigned (_i) = eytzinger0_first((_size));	\
+	     (_i) != -1;				\
+	     (_i) = eytzinger0_next((_i), (_size)))
+
+/* return greatest node <= @search, or -1 if not found */
+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
+					 cmp_func_t cmp, const void *search)
+{
+	unsigned i, n = 0;
+
+	if (!nr)
+		return -1;
+
+	do {
+		i = n;
+		n = eytzinger0_child(i, cmp(search, base + i * size) >= 0);
+	} while (n < nr);
+
+	if (n & 1) {
+		/* @i was greater than @search, return previous node: */
+
+		if (i == eytzinger0_first(nr))
+			return -1;
+
+		return eytzinger0_prev(i, nr);
+	} else {
+		return i;
+	}
+}
+
+#define eytzinger0_find(base, nr, size, _cmp, search)			\
+({									\
+	void *_base		= (base);				\
+	const void *_search	= (search);				\
+	size_t _nr		= (nr);					\
+	size_t _size		= (size);				\
+	size_t _i		= 0;					\
+	int _res;							\
+									\
+	while (_i < _nr &&						\
+	       (_res = _cmp(_search, _base + _i * _size, _size)))	\
+		_i = eytzinger0_child(_i, _res > 0);			\
+	_i;								\
+})
+
+void eytzinger0_sort_r(void *, size_t, size_t,
+		       cmp_r_func_t, swap_r_func_t, const void *);
+void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
+
+#endif /* _LINUX_EYTZINGER_H */
diff --git a/include/linux/mean_and_variance.h b/include/linux/mean_and_variance.h
new file mode 100644
index 00000000..4fcf062d
--- /dev/null
+++ b/include/linux/mean_and_variance.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MEAN_AND_VARIANCE_H_
+#define MEAN_AND_VARIANCE_H_
+
+#include <linux/types.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+#include <linux/math64.h>
+
+#define SQRT_U64_MAX 4294967295ULL
+
+/*
+ * u128_u: u128 user mode, because not all architectures support a real int128
+ * type
+ *
+ * We don't use this version in userspace, because in userspace we link with
+ * Rust and rustc has issues with u128.
+ */
+
+#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC)
+
+typedef struct {
+	unsigned __int128 v;
+} __aligned(16) u128_u;
+
+static inline u128_u u64_to_u128(u64 a)
+{
+	return (u128_u) { .v = a };
+}
+
+static inline u64 u128_lo(u128_u a)
+{
+	return a.v;
+}
+
+static inline u64 u128_hi(u128_u a)
+{
+	return a.v >> 64;
+}
+
+static inline u128_u u128_add(u128_u a, u128_u b)
+{
+	a.v += b.v;
+	return a;
+}
+
+static inline u128_u u128_sub(u128_u a, u128_u b)
+{
+	a.v -= b.v;
+	return a;
+}
+
+static inline u128_u u128_shl(u128_u a, s8 shift)
+{
+	a.v <<= shift;
+	return a;
+}
+
+static inline u128_u u128_square(u64 a)
+{
+	u128_u b = u64_to_u128(a);
+
+	b.v *= b.v;
+	return b;
+}
+
+#else
+
+typedef struct {
+	u64 hi, lo;
+} __aligned(16) u128_u;
+
+/* conversions */
+
+static inline u128_u u64_to_u128(u64 a)
+{
+	return (u128_u) { .lo = a };
+}
+
+static inline u64 u128_lo(u128_u a)
+{
+	return a.lo;
+}
+
+static inline u64 u128_hi(u128_u a)
+{
+	return a.hi;
+}
+
+/* arithmetic */
+
+static inline u128_u u128_add(u128_u a, u128_u b)
+{
+	u128_u c;
+
+	c.lo = a.lo + b.lo;
+	c.hi = a.hi + b.hi + (c.lo < a.lo);
+	return c;
+}
+
+static inline u128_u u128_sub(u128_u a, u128_u b)
+{
+	u128_u c;
+
+	c.lo = a.lo - b.lo;
+	c.hi = a.hi - b.hi - (c.lo > a.lo);
+	return c;
+}
+
+static inline u128_u u128_shl(u128_u i, s8 shift)
+{
+	u128_u r;
+
+	r.lo = i.lo << shift;
+	if (shift < 64)
+		r.hi = (i.hi << shift) | (i.lo >> (64 - shift));
+	else {
+		r.hi = i.lo << (shift - 64);
+		r.lo = 0;
+	}
+	return r;
+}
+
+static inline u128_u u128_square(u64 i)
+{
+	u128_u r;
+	u64  h = i >> 32, l = i & U32_MAX;
+
+	r =             u128_shl(u64_to_u128(h*h), 64);
+	r = u128_add(r, u128_shl(u64_to_u128(h*l), 32));
+	r = u128_add(r, u128_shl(u64_to_u128(l*h), 32));
+	r = u128_add(r,          u64_to_u128(l*l));
+	return r;
+}
+
+#endif
+
+static inline u128_u u64s_to_u128(u64 hi, u64 lo)
+{
+	u128_u c = u64_to_u128(hi);
+
+	c = u128_shl(c, 64);
+	c = u128_add(c, u64_to_u128(lo));
+	return c;
+}
+
+u128_u u128_div(u128_u n, u64 d);
+
+struct mean_and_variance {
+	s64	n;
+	s64	sum;
+	u128_u	sum_squares;
+};
+
+/* expontentially weighted variant */
+struct mean_and_variance_weighted {
+	s64	mean;
+	u64	variance;
+};
+
+/**
+ * fast_divpow2() - fast approximation for n / (1 << d)
+ * @n: numerator
+ * @d: the power of 2 denominator.
+ *
+ * note: this rounds towards 0.
+ */
+static inline s64 fast_divpow2(s64 n, u8 d)
+{
+	return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
+}
+
+/**
+ * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1
+ * and return it.
+ * @s1: the mean_and_variance to update.
+ * @v1: the new sample.
+ *
+ * see linked pdf equation 12.
+ */
+static inline void
+mean_and_variance_update(struct mean_and_variance *s, s64 v)
+{
+	s->n++;
+	s->sum += v;
+	s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v)));
+}
+
+s64 mean_and_variance_get_mean(struct mean_and_variance s);
+u64 mean_and_variance_get_variance(struct mean_and_variance s1);
+u32 mean_and_variance_get_stddev(struct mean_and_variance s);
+
+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
+		s64 v, bool initted, u8 weight);
+
+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
+		u8 weight);
+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
+		u8 weight);
+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
+		u8 weight);
+
+#endif // MEAN_AND_VAIRANCE_H_
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 506da24d..37325170 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -90,6 +90,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
 			      (void *) size);
 }
 
+void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data);
+void mempool_kvfree(void *element, void *pool_data);
+
+static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+	return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
+static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size)
+{
+	return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
 /*
  * A mempool_alloc_t and mempool_free_t for a simple page allocator that
  * allocates pages of the order specified by pool_data
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 6c4a623c..28ce667b 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -1,65 +1 @@
-#ifndef __TOOLS_LINUX_SPINLOCK_H
-#define __TOOLS_LINUX_SPINLOCK_H
-
-#include <linux/atomic.h>
-#include <pthread.h>
-
-typedef struct {
-	pthread_mutex_t lock;
-} raw_spinlock_t;
-
-#define __RAW_SPIN_LOCK_UNLOCKED(name)	(raw_spinlock_t) { .lock = PTHREAD_MUTEX_INITIALIZER  }
-
-static inline void raw_spin_lock_init(raw_spinlock_t *lock)
-{
-	pthread_mutex_init(&lock->lock, NULL);
-}
-
-static inline bool raw_spin_trylock(raw_spinlock_t *lock)
-{
-	return !pthread_mutex_trylock(&lock->lock);
-}
-
-static inline void raw_spin_lock(raw_spinlock_t *lock)
-{
-	pthread_mutex_lock(&lock->lock);
-}
-
-static inline void raw_spin_unlock(raw_spinlock_t *lock)
-{
-	pthread_mutex_unlock(&lock->lock);
-}
-
-#define raw_spin_lock_irq(lock)		raw_spin_lock(lock)
-#define raw_spin_unlock_irq(lock)	raw_spin_unlock(lock)
-
-#define raw_spin_lock_irqsave(lock, flags)		\
-do {							\
-	flags = 0;					\
-	raw_spin_lock(lock);				\
-} while (0)
-
-#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock)
-
-typedef raw_spinlock_t spinlock_t;
-
-#define __SPIN_LOCK_UNLOCKED(name)	__RAW_SPIN_LOCK_UNLOCKED(name)
-
-#define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
-
-#define spin_lock_init(lock)		raw_spin_lock_init(lock)
-#define spin_lock(lock)			raw_spin_lock(lock)
-#define spin_unlock(lock)		raw_spin_unlock(lock)
-
-#define spin_lock_nested(lock, n)	spin_lock(lock)
-
-#define spin_lock_bh(lock)		raw_spin_lock(lock)
-#define spin_unlock_bh(lock)		raw_spin_unlock(lock)
-
-#define spin_lock_irq(lock)		raw_spin_lock(lock)
-#define spin_unlock_irq(lock)		raw_spin_unlock(lock)
-
-#define spin_lock_irqsave(lock, flags)	raw_spin_lock_irqsave(lock, flags)
-#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags)
-
-#endif /* __TOOLS_LINUX_SPINLOCK_H */
+#include "linux/spinlock_types.h"
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
new file mode 100644
index 00000000..6c4a623c
--- /dev/null
+++ b/include/linux/spinlock_types.h
@@ -0,0 +1,65 @@
+#ifndef __TOOLS_LINUX_SPINLOCK_H
+#define __TOOLS_LINUX_SPINLOCK_H
+
+#include <linux/atomic.h>
+#include <pthread.h>
+
+typedef struct {
+	pthread_mutex_t lock;
+} raw_spinlock_t;
+
+#define __RAW_SPIN_LOCK_UNLOCKED(name)	(raw_spinlock_t) { .lock = PTHREAD_MUTEX_INITIALIZER  }
+
+static inline void raw_spin_lock_init(raw_spinlock_t *lock)
+{
+	pthread_mutex_init(&lock->lock, NULL);
+}
+
+static inline bool raw_spin_trylock(raw_spinlock_t *lock)
+{
+	return !pthread_mutex_trylock(&lock->lock);
+}
+
+static inline void raw_spin_lock(raw_spinlock_t *lock)
+{
+	pthread_mutex_lock(&lock->lock);
+}
+
+static inline void raw_spin_unlock(raw_spinlock_t *lock)
+{
+	pthread_mutex_unlock(&lock->lock);
+}
+
+#define raw_spin_lock_irq(lock)		raw_spin_lock(lock)
+#define raw_spin_unlock_irq(lock)	raw_spin_unlock(lock)
+
+#define raw_spin_lock_irqsave(lock, flags)		\
+do {							\
+	flags = 0;					\
+	raw_spin_lock(lock);				\
+} while (0)
+
+#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock)
+
+typedef raw_spinlock_t spinlock_t;
+
+#define __SPIN_LOCK_UNLOCKED(name)	__RAW_SPIN_LOCK_UNLOCKED(name)
+
+#define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
+
+#define spin_lock_init(lock)		raw_spin_lock_init(lock)
+#define spin_lock(lock)			raw_spin_lock(lock)
+#define spin_unlock(lock)		raw_spin_unlock(lock)
+
+#define spin_lock_nested(lock, n)	spin_lock(lock)
+
+#define spin_lock_bh(lock)		raw_spin_lock(lock)
+#define spin_unlock_bh(lock)		raw_spin_unlock(lock)
+
+#define spin_lock_irq(lock)		raw_spin_lock(lock)
+#define spin_unlock_irq(lock)		raw_spin_unlock(lock)
+
+#define spin_lock_irqsave(lock, flags)	raw_spin_lock_irqsave(lock, flags)
+#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags)
+
+#endif /* __TOOLS_LINUX_SPINLOCK_H */
diff --git a/include/linux/thread_with_file.h b/include/linux/thread_with_file.h
new file mode 100644
index 00000000..2a66e762
--- /dev/null
+++ b/include/linux/thread_with_file.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
+ */
+#ifndef _LINUX_THREAD_WITH_FILE_H
+#define _LINUX_THREAD_WITH_FILE_H
+
+struct stdio_redirect;
+
+__printf(3, 0)
+static inline void stdio_redirect_vprintf(struct stdio_redirect *s, bool nonblocking, const char *msg, va_list args) {}
+__printf(3, 4)
+static inline void stdio_redirect_printf(struct stdio_redirect *s, bool nonblocking, const char *msg, ...) {}
+
+#endif /* _LINUX_THREAD_WITH_FILE_H */
diff --git a/include/linux/thread_with_file_types.h b/include/linux/thread_with_file_types.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/include/linux/thread_with_file_types.h
diff --git a/include/linux/time.h b/include/linux/time.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/include/linux/time.h
diff --git a/include/linux/time_stats.h b/include/linux/time_stats.h
new file mode 100644
index 00000000..6df2b34a
--- /dev/null
+++ b/include/linux/time_stats.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * time_stats - collect statistics on events that have a duration, with nicely
+ * formatted textual output on demand
+ *
+ * - percpu buffering of event collection: cheap enough to shotgun
+ *   everywhere without worrying about overhead
+ *
+ * tracks:
+ *  - number of events
+ *  - maximum event duration ever seen
+ *  - sum of all event durations
+ *  - average event duration, standard and weighted
+ *  - standard deviation of event durations, standard and weighted
+ * and analagous statistics for the frequency of events
+ *
+ * We provide both mean and weighted mean (exponentially weighted), and standard
+ * deviation and weighted standard deviation, to give an efficient-to-compute
+ * view of current behaviour versus. average behaviour - "did this event source
+ * just become wonky, or is this typical?".
+ *
+ * Particularly useful for tracking down latency issues.
+ */
+#ifndef _LINUX_TIME_STATS_H
+#define _LINUX_TIME_STATS_H
+
+#include <linux/mean_and_variance.h>
+#include <linux/sched/clock.h>
+#include <linux/spinlock_types.h>
+#include <linux/string.h>
+
+struct time_unit {
+	const char	*name;
+	u64		nsecs;
+};
+
+/*
+ * given a nanosecond value, pick the preferred time units for printing:
+ */
+const struct time_unit *pick_time_units(u64 ns);
+
+/*
+ * quantiles - do not use:
+ *
+ * Only enabled if time_stats->quantiles_enabled has been manually set - don't
+ * use in new code.
+ */
+
+#define NR_QUANTILES	15
+#define QUANTILE_IDX(i)	inorder_to_eytzinger0(i, NR_QUANTILES)
+#define QUANTILE_FIRST	eytzinger0_first(NR_QUANTILES)
+#define QUANTILE_LAST	eytzinger0_last(NR_QUANTILES)
+
+struct quantiles {
+	struct quantile_entry {
+		u64	m;
+		u64	step;
+	}		entries[NR_QUANTILES];
+};
+
+struct time_stat_buffer {
+	unsigned	nr;
+	struct time_stat_buffer_entry {
+		u64	start;
+		u64	end;
+	}		entries[31];
+};
+
+struct time_stats {
+	spinlock_t	lock;
+	bool		have_quantiles;
+	/* all fields are in nanoseconds */
+	u64             min_duration;
+	u64		max_duration;
+	u64		total_duration;
+	u64             max_freq;
+	u64             min_freq;
+	u64		last_event;
+	u64		last_event_start;
+
+	struct mean_and_variance	  duration_stats;
+	struct mean_and_variance	  freq_stats;
+
+/* default weight for weighted mean and variance calculations */
+#define TIME_STATS_MV_WEIGHT	8
+
+	struct mean_and_variance_weighted duration_stats_weighted;
+	struct mean_and_variance_weighted freq_stats_weighted;
+	struct time_stat_buffer __percpu *buffer;
+
+	u64		start_time;
+};
+
+struct time_stats_quantiles {
+	struct time_stats	stats;
+	struct quantiles	quantiles;
+};
+
+static inline struct quantiles *time_stats_to_quantiles(struct time_stats *stats)
+{
+	return stats->have_quantiles
+		? &container_of(stats, struct time_stats_quantiles, stats)->quantiles
+		: NULL;
+}
+
+void __time_stats_clear_buffer(struct time_stats *, struct time_stat_buffer *);
+void __time_stats_update(struct time_stats *stats, u64, u64);
+
+/**
+ * time_stats_update - collect a new event being tracked
+ *
+ * @stats	- time_stats to update
+ * @start	- start time of event, recorded with local_clock()
+ *
+ * The end duration of the event will be the current time
+ */
+static inline void time_stats_update(struct time_stats *stats, u64 start)
+{
+	__time_stats_update(stats, start, local_clock());
+}
+
+/**
+ * track_event_change - track state change events
+ *
+ * @stats	- time_stats to update
+ * @v		- new state, true or false
+ *
+ * Use this when tracking time stats for state changes, i.e. resource X becoming
+ * blocked/unblocked.
+ */
+static inline bool track_event_change(struct time_stats *stats, bool v)
+{
+	if (v != !!stats->last_event_start) {
+		if (!v) {
+			time_stats_update(stats, stats->last_event_start);
+			stats->last_event_start = 0;
+		} else {
+			stats->last_event_start = local_clock() ?: 1;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+#define TIME_STATS_PRINT_NO_ZEROES	(1U << 0)	/* print nothing if zero count */
+struct seq_buf;
+void time_stats_to_seq_buf(struct seq_buf *, struct time_stats *,
+		const char *epoch_name, unsigned int flags);
+void time_stats_to_json(struct seq_buf *, struct time_stats *,
+		const char *epoch_name, unsigned int flags);
+
+void time_stats_exit(struct time_stats *);
+void time_stats_init(struct time_stats *);
+
+static inline void time_stats_quantiles_exit(struct time_stats_quantiles *statq)
+{
+	time_stats_exit(&statq->stats);
+}
+static inline void time_stats_quantiles_init(struct time_stats_quantiles *statq)
+{
+	time_stats_init(&statq->stats);
+	statq->stats.have_quantiles = true;
+	memset(&statq->quantiles, 0, sizeof(statq->quantiles));
+}
+
+#endif /* _LINUX_TIME_STATS_H */
diff --git a/include/linux/types.h b/include/linux/types.h
index ce454e26..6ae97c42 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -8,6 +8,7 @@
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <linux/posix_types.h>
 
 #define __SANE_USERSPACE_TYPES__	/* For PPC64, to get LL64 types */
 #include <asm/types.h>
@@ -77,6 +78,10 @@ typedef __u64 __bitwise __be64;
 
 typedef u64 sector_t;
 
+typedef void (*swap_r_func_t)(void *a, void *b, int size, const void *priv);
+typedef void (*swap_func_t)(void *a, void *b, int size);
+
+typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv);
 typedef int (*cmp_func_t)(const void *a, const void *b);
 
 typedef unsigned int __bitwise slab_flags_t;
author	Kent Overstreet <kent.overstreet@linux.dev>	2024-02-05 23:09:25 -0500
committer	Kent Overstreet <kent.overstreet@linux.dev>	2024-02-06 01:07:16 -0500
commit	f3f005c76eb5636542a8f5b137bd1904d57e8f86 (patch)
tree	ecada3604cb1668411386264ed92d0e61536a93a /include/linux
parent	1ef396b684a419b5a50ab215103486d189068800 (diff)