summaryrefslogtreecommitdiff
path: root/linux
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-02-05 23:09:25 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-02-06 01:07:16 -0500
commitf3f005c76eb5636542a8f5b137bd1904d57e8f86 (patch)
treeecada3604cb1668411386264ed92d0e61536a93a /linux
parent1ef396b684a419b5a50ab215103486d189068800 (diff)
Update bcachefs sources to 50847e296b34 bcachefs: Check subvol <-> inode pointers in check_inode()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'linux')
-rw-r--r--linux/darray.c27
-rw-r--r--linux/mean_and_variance.c167
-rw-r--r--linux/mempool.c13
-rw-r--r--linux/sort.c368
-rw-r--r--linux/time_stats.c373
5 files changed, 948 insertions, 0 deletions
diff --git a/linux/darray.c b/linux/darray.c
new file mode 100644
index 00000000..80e77959
--- /dev/null
+++ b/linux/darray.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
+ */
+
+#include <linux/darray.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+
+int __darray_resize_slowpath(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
+{
+ if (new_size > d->size) {
+ new_size = roundup_pow_of_two(new_size);
+
+ void *data = kvmalloc_array(new_size, element_size, gfp);
+ if (!data)
+ return -ENOMEM;
+
+ memcpy(data, d->data, d->size * element_size);
+ if (d->data != d->preallocated)
+ kvfree(d->data);
+ d->data = data;
+ d->size = new_size;
+ }
+
+ return 0;
+}
diff --git a/linux/mean_and_variance.c b/linux/mean_and_variance.c
new file mode 100644
index 00000000..b93d150d
--- /dev/null
+++ b/linux/mean_and_variance.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Functions for incremental mean and variance.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * Copyright © 2022 Daniel B. Hill
+ *
+ * Author: Daniel B. Hill <daniel@gluo.nz>
+ *
+ * Description:
+ *
+ * This is includes some incremental algorithms for mean and variance calculation
+ *
+ * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
+ *
+ * Create a struct and if it's the weighted variant set the w field (weight = 2^k).
+ *
+ * Use mean_and_variance[_weighted]_update() on the struct to update it's state.
+ *
+ * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation
+ * is deferred to these functions for performance reasons.
+ *
+ * see lib/math/mean_and_variance_test.c for examples of usage.
+ *
+ * DO NOT access the mean and variance fields of the weighted variants directly.
+ * DO NOT change the weight after calling update.
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+#include <linux/math64.h>
+#include <linux/mean_and_variance.h>
+#include <linux/module.h>
+
+u128_u u128_div(u128_u n, u64 d)
+{
+ u128_u r;
+ u64 rem;
+ u64 hi = u128_hi(n);
+ u64 lo = u128_lo(n);
+ u64 h = hi & ((u64) U32_MAX << 32);
+ u64 l = (hi & (u64) U32_MAX) << 32;
+
+ r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64);
+ r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32));
+ r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
+ return r;
+}
+EXPORT_SYMBOL_GPL(u128_div);
+
+/**
+ * mean_and_variance_get_mean() - get mean from @s
+ * @s: mean and variance number of samples and their sums
+ */
+s64 mean_and_variance_get_mean(struct mean_and_variance s)
+{
+ return s.n ? div64_u64(s.sum, s.n) : 0;
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
+
+/**
+ * mean_and_variance_get_variance() - get variance from @s1
+ * @s1: mean and variance number of samples and sums
+ *
+ * see linked pdf equation 12.
+ */
+u64 mean_and_variance_get_variance(struct mean_and_variance s1)
+{
+ if (s1.n) {
+ u128_u s2 = u128_div(s1.sum_squares, s1.n);
+ u64 s3 = abs(mean_and_variance_get_mean(s1));
+
+ return u128_lo(u128_sub(s2, u128_square(s3)));
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_get_variance);
+
+/**
+ * mean_and_variance_get_stddev() - get standard deviation from @s
+ * @s: mean and variance number of samples and their sums
+ */
+u32 mean_and_variance_get_stddev(struct mean_and_variance s)
+{
+ return int_sqrt64(mean_and_variance_get_variance(s));
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
+
+/**
+ * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
+ * @s: mean and variance number of samples and their sums
+ * @x: new value to include in the &mean_and_variance_weighted
+ *
+ * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
+ * values are stored bitshifted for performance and added precision.
+ */
+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
+ s64 x, bool initted, u8 weight)
+{
+ // previous weighted variance.
+ u8 w = weight;
+ u64 var_w0 = s->variance;
+ // new value weighted.
+ s64 x_w = x << w;
+ s64 diff_w = x_w - s->mean;
+ s64 diff = fast_divpow2(diff_w, w);
+ // new mean weighted.
+ s64 u_w1 = s->mean + diff;
+
+ if (!initted) {
+ s->mean = x_w;
+ s->variance = 0;
+ } else {
+ s->mean = u_w1;
+ s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
+ }
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
+
+/**
+ * mean_and_variance_weighted_get_mean() - get mean from @s
+ * @s: mean and variance number of samples and their sums
+ */
+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
+ u8 weight)
+{
+ return fast_divpow2(s.mean, weight);
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
+
+/**
+ * mean_and_variance_weighted_get_variance() -- get variance from @s
+ * @s: mean and variance number of samples and their sums
+ */
+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
+ u8 weight)
+{
+ // always positive don't need fast divpow2
+ return s.variance >> weight;
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
+
+/**
+ * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
+ * @s: mean and variance number of samples and their sums
+ */
+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
+ u8 weight)
+{
+ return int_sqrt64(mean_and_variance_weighted_get_variance(s, weight));
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
+
+MODULE_AUTHOR("Daniel B. Hill");
+MODULE_LICENSE("GPL");
diff --git a/linux/mempool.c b/linux/mempool.c
index 74d4fbb3..74ed17bf 100644
--- a/linux/mempool.c
+++ b/linux/mempool.c
@@ -522,6 +522,19 @@ void mempool_kfree(void *element, void *pool_data)
}
EXPORT_SYMBOL(mempool_kfree);
+void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data)
+{
+ size_t size = (size_t)pool_data;
+ return kvmalloc(size, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_kvmalloc);
+
+void mempool_kvfree(void *element, void *pool_data)
+{
+ kvfree(element);
+}
+EXPORT_SYMBOL(mempool_kvfree);
+
/*
* A simple mempool-backed page allocator that allocates pages
* of the order specified by pool_data.
diff --git a/linux/sort.c b/linux/sort.c
new file mode 100644
index 00000000..ffa4817b
--- /dev/null
+++ b/linux/sort.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A fast, small, non-recursive O(n log n) sort for the Linux kernel
+ *
+ * This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
+ * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
+ *
+ * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
+ * better) at the expense of stack usage and much larger code to avoid
+ * quicksort's O(n^2) worst case.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/sort.h>
+
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required alignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
+{
+ unsigned char lsbits = (unsigned char)size;
+
+ (void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+ return (lsbits & (align - 1)) == 0;
+}
+
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory. This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is still valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static void swap_words_32(void *a, void *b, size_t n)
+{
+ do {
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+ } while (n);
+}
+
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory. This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible. If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI). Are there any cases the kernel needs to worry about?
+ */
+static void swap_words_64(void *a, void *b, size_t n)
+{
+ do {
+#ifdef CONFIG_64BIT
+ u64 t = *(u64 *)(a + (n -= 8));
+ *(u64 *)(a + n) = *(u64 *)(b + n);
+ *(u64 *)(b + n) = t;
+#else
+ /* Use two 32-bit transfers to avoid base+index+4 addressing */
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+
+ t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+#endif
+ } while (n);
+}
+
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static void swap_bytes(void *a, void *b, size_t n)
+{
+ do {
+ char t = ((char *)a)[--n];
+ ((char *)a)[n] = ((char *)b)[n];
+ ((char *)b)[n] = t;
+ } while (n);
+}
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 (swap_r_func_t)0
+#define SWAP_WORDS_32 (swap_r_func_t)1
+#define SWAP_BYTES (swap_r_func_t)2
+#define SWAP_WRAPPER (swap_r_func_t)3
+
+struct wrapper {
+ cmp_func_t cmp;
+ swap_func_t swap;
+};
+
+/*
+ * The function pointer is last to make tail calls most efficient if the
+ * compiler decides not to inline this function.
+ */
+static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
+{
+ if (swap_func == SWAP_WRAPPER) {
+ ((const struct wrapper *)priv)->swap(a, b, (int)size);
+ return;
+ }
+
+ if (swap_func == SWAP_WORDS_64)
+ swap_words_64(a, b, size);
+ else if (swap_func == SWAP_WORDS_32)
+ swap_words_32(a, b, size);
+ else if (swap_func == SWAP_BYTES)
+ swap_bytes(a, b, size);
+ else
+ swap_func(a, b, (int)size, priv);
+}
+
+#define _CMP_WRAPPER ((cmp_r_func_t)0L)
+
+static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
+{
+ if (cmp == _CMP_WRAPPER)
+ return ((const struct wrapper *)priv)->cmp(a, b);
+ return cmp(a, b, priv);
+}
+
+/**
+ * parent - given the offset of the child, find the offset of the parent.
+ * @i: the offset of the heap element whose parent is sought. Non-zero.
+ * @lsbit: a precomputed 1-bit mask, equal to "size & -size"
+ * @size: size of each element
+ *
+ * In terms of array indexes, the parent of element j = @i/@size is simply
+ * (j-1)/2. But when working in byte offsets, we can't use implicit
+ * truncation of integer divides.
+ *
+ * Fortunately, we only need one bit of the quotient, not the full divide.
+ * @size has a least significant bit. That bit will be clear if @i is
+ * an even multiple of @size, and set if it's an odd multiple.
+ *
+ * Logically, we're doing "if (i & lsbit) i -= size;", but since the
+ * branch is unpredictable, it's done with a bit of clever branch-free
+ * code instead.
+ */
+__attribute_const__ __always_inline
+static size_t parent(size_t i, unsigned int lsbit, size_t size)
+{
+ i -= size;
+ i -= size & -(i & lsbit);
+ return i / 2;
+}
+
+/**
+ * sort_r - sort an array of elements
+ * @base: pointer to data to sort
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp_func: pointer to comparison function
+ * @swap_func: pointer to swap function or NULL
+ * @priv: third argument passed to comparison function
+ *
+ * This function does a heapsort on the given array. You may provide
+ * a swap_func function if you need to do something more than a memory
+ * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
+ * avoids a slow retpoline and so is significantly faster.
+ *
+ * Sorting time is O(n log n) both on average and worst-case. While
+ * quicksort is slightly faster on average, it suffers from exploitable
+ * O(n*n) worst-case behavior and extra memory requirements that make
+ * it less suitable for kernel use.
+ */
+void sort_r(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ /* pre-scale counters for performance */
+ size_t n = num * size, a = (num/2) * size;
+ const unsigned int lsbit = size & -size; /* Used to find parent */
+
+ if (!a) /* num < 2 || size == 0 */
+ return;
+
+ /* called from 'sort' without swap function, let's pick the default */
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+ swap_func = NULL;
+
+ if (!swap_func) {
+ if (is_aligned(base, size, 8))
+ swap_func = SWAP_WORDS_64;
+ else if (is_aligned(base, size, 4))
+ swap_func = SWAP_WORDS_32;
+ else
+ swap_func = SWAP_BYTES;
+ }
+
+ /*
+ * Loop invariants:
+ * 1. elements [a,n) satisfy the heap property (compare greater than
+ * all of their children),
+ * 2. elements [n,num*size) are sorted, and
+ * 3. a <= b <= c <= d <= n (whenever they are valid).
+ */
+ for (;;) {
+ size_t b, c, d;
+
+ if (a) /* Building heap: sift down --a */
+ a -= size;
+ else if (n -= size) /* Sorting: Extract root to --n */
+ do_swap(base, base + n, size, swap_func, priv);
+ else /* Sort complete */
+ break;
+
+ /*
+ * Sift element at "a" down into heap. This is the
+ * "bottom-up" variant, which significantly reduces
+ * calls to cmp_func(): we find the sift-down path all
+ * the way to the leaves (one compare per level), then
+ * backtrack to find where to insert the target element.
+ *
+ * Because elements tend to sift down close to the leaves,
+ * this uses fewer compares than doing two per level
+ * on the way down. (A bit more than half as many on
+ * average, 3/4 worst-case.)
+ */
+ for (b = a; c = 2*b + size, (d = c + size) < n;)
+ b = do_cmp(base + c, base + d, cmp_func, priv) >= 0 ? c : d;
+ if (d == n) /* Special case last leaf with no sibling */
+ b = c;
+
+ /* Now backtrack from "b" to the correct location for "a" */
+ while (b != a && do_cmp(base + a, base + b, cmp_func, priv) >= 0)
+ b = parent(b, lsbit, size);
+ c = b; /* Where "a" belongs */
+ while (b != a) { /* Shift it into place */
+ b = parent(b, lsbit, size);
+ do_swap(base + b, base + c, size, swap_func, priv);
+ }
+ }
+}
+EXPORT_SYMBOL(sort_r);
+
+#include <linux/eytzinger.h>
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func, const void *priv,
+ size_t l, size_t r)
+{
+ return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+ swap_r_func_t swap_func, const void *priv,
+ size_t l, size_t r)
+{
+ do_swap(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ int i, c, r;
+
+ /* called from 'sort' without swap function, let's pick the default */
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+ swap_func = NULL;
+
+ if (!swap_func) {
+ if (is_aligned(base, size, 8))
+ swap_func = SWAP_WORDS_64;
+ else if (is_aligned(base, size, 4))
+ swap_func = SWAP_WORDS_32;
+ else
+ swap_func = SWAP_BYTES;
+ }
+
+ /* heapify */
+ for (i = n / 2 - 1; i >= 0; --i) {
+ for (r = i; r * 2 + 1 < n; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < n &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+
+ /* sort */
+ for (i = n - 1; i > 0; --i) {
+ eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+ for (r = 0; r * 2 + 1 < i; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < i &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(eytzinger0_sort_r);
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap = swap_func,
+ };
+
+ return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
+EXPORT_SYMBOL_GPL(eytzinger0_sort);
diff --git a/linux/time_stats.c b/linux/time_stats.c
new file mode 100644
index 00000000..0b90c80c
--- /dev/null
+++ b/linux/time_stats.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/eytzinger.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/time.h>
+#include <linux/time_stats.h>
+#include <linux/spinlock.h>
+
+static const struct time_unit time_units[] = {
+ { "ns", 1 },
+ { "us", NSEC_PER_USEC },
+ { "ms", NSEC_PER_MSEC },
+ { "s", NSEC_PER_SEC },
+ { "m", (u64) NSEC_PER_SEC * 60},
+ { "h", (u64) NSEC_PER_SEC * 3600},
+ { "d", (u64) NSEC_PER_SEC * 3600 * 24},
+ { "w", (u64) NSEC_PER_SEC * 3600 * 24 * 7},
+ { "y", (u64) NSEC_PER_SEC * ((3600 * 24 * 7 * 365) + (3600 * (24 / 4) * 7))}, /* 365.25d */
+ { "eon", U64_MAX },
+};
+
+const struct time_unit *pick_time_units(u64 ns)
+{
+ const struct time_unit *u;
+
+ for (u = time_units;
+ u + 1 < time_units + ARRAY_SIZE(time_units) &&
+ ns >= u[1].nsecs << 1;
+ u++)
+ ;
+
+ return u;
+}
+EXPORT_SYMBOL_GPL(pick_time_units);
+
+static void quantiles_update(struct quantiles *q, u64 v)
+{
+ unsigned i = 0;
+
+ while (i < ARRAY_SIZE(q->entries)) {
+ struct quantile_entry *e = q->entries + i;
+
+ if (unlikely(!e->step)) {
+ e->m = v;
+ e->step = max_t(unsigned, v / 2, 1024);
+ } else if (e->m > v) {
+ e->m = e->m >= e->step
+ ? e->m - e->step
+ : 0;
+ } else if (e->m < v) {
+ e->m = e->m + e->step > e->m
+ ? e->m + e->step
+ : U32_MAX;
+ }
+
+ if ((e->m > v ? e->m - v : v - e->m) < e->step)
+ e->step = max_t(unsigned, e->step / 2, 1);
+
+ if (v >= e->m)
+ break;
+
+ i = eytzinger0_child(i, v > e->m);
+ }
+}
+
+static inline void time_stats_update_one(struct time_stats *stats,
+ u64 start, u64 end)
+{
+ u64 duration, freq;
+ bool initted = stats->last_event != 0;
+
+ if (time_after64(end, start)) {
+ struct quantiles *quantiles = time_stats_to_quantiles(stats);
+
+ duration = end - start;
+ mean_and_variance_update(&stats->duration_stats, duration);
+ mean_and_variance_weighted_update(&stats->duration_stats_weighted,
+ duration, initted, TIME_STATS_MV_WEIGHT);
+ stats->max_duration = max(stats->max_duration, duration);
+ stats->min_duration = min(stats->min_duration, duration);
+ stats->total_duration += duration;
+
+ if (quantiles)
+ quantiles_update(quantiles, duration);
+ }
+
+ if (stats->last_event && time_after64(end, stats->last_event)) {
+ freq = end - stats->last_event;
+ mean_and_variance_update(&stats->freq_stats, freq);
+ mean_and_variance_weighted_update(&stats->freq_stats_weighted,
+ freq, initted, TIME_STATS_MV_WEIGHT);
+ stats->max_freq = max(stats->max_freq, freq);
+ stats->min_freq = min(stats->min_freq, freq);
+ }
+
+ stats->last_event = end;
+}
+
+void __time_stats_clear_buffer(struct time_stats *stats,
+ struct time_stat_buffer *b)
+{
+ for (struct time_stat_buffer_entry *i = b->entries;
+ i < b->entries + ARRAY_SIZE(b->entries);
+ i++)
+ time_stats_update_one(stats, i->start, i->end);
+ b->nr = 0;
+}
+EXPORT_SYMBOL_GPL(__time_stats_clear_buffer);
+
+static noinline void time_stats_clear_buffer(struct time_stats *stats,
+ struct time_stat_buffer *b)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&stats->lock, flags);
+ __time_stats_clear_buffer(stats, b);
+ spin_unlock_irqrestore(&stats->lock, flags);
+}
+
+void __time_stats_update(struct time_stats *stats, u64 start, u64 end)
+{
+ unsigned long flags;
+
+ if (!stats->buffer) {
+ spin_lock_irqsave(&stats->lock, flags);
+ time_stats_update_one(stats, start, end);
+
+ if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
+ stats->duration_stats.n > 1024)
+ stats->buffer =
+ alloc_percpu_gfp(struct time_stat_buffer,
+ GFP_ATOMIC);
+ spin_unlock_irqrestore(&stats->lock, flags);
+ } else {
+ struct time_stat_buffer *b;
+
+ preempt_disable();
+ b = this_cpu_ptr(stats->buffer);
+
+ BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
+ b->entries[b->nr++] = (struct time_stat_buffer_entry) {
+ .start = start,
+ .end = end
+ };
+
+ if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
+ time_stats_clear_buffer(stats, b);
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(__time_stats_update);
+
+#include <linux/seq_buf.h>
+
+static void seq_buf_time_units_aligned(struct seq_buf *out, u64 ns)
+{
+ const struct time_unit *u = pick_time_units(ns);
+
+ seq_buf_printf(out, "%8llu %s", div64_u64(ns, u->nsecs), u->name);
+}
+
+static inline u64 time_stats_lifetime(const struct time_stats *stats)
+{
+ return local_clock() - stats->start_time;
+}
+
+void time_stats_to_seq_buf(struct seq_buf *out, struct time_stats *stats,
+ const char *epoch_name, unsigned int flags)
+{
+ struct quantiles *quantiles = time_stats_to_quantiles(stats);
+ s64 f_mean = 0, d_mean = 0;
+ u64 f_stddev = 0, d_stddev = 0;
+ u64 lifetime = time_stats_lifetime(stats);
+
+ if (stats->buffer) {
+ int cpu;
+
+ spin_lock_irq(&stats->lock);
+ for_each_possible_cpu(cpu)
+ __time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
+ spin_unlock_irq(&stats->lock);
+ }
+
+ if (stats->freq_stats.n) {
+ /* avoid divide by zero */
+ f_mean = mean_and_variance_get_mean(stats->freq_stats);
+ f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
+ d_mean = mean_and_variance_get_mean(stats->duration_stats);
+ d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
+ } else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
+ /* unless we didn't want zeroes anyway */
+ return;
+ }
+
+ seq_buf_printf(out, "count: %llu\n", stats->duration_stats.n);
+ seq_buf_printf(out, "lifetime: ");
+ seq_buf_time_units_aligned(out, lifetime);
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " since %-12s recent\n", epoch_name);
+
+ seq_buf_printf(out, "duration of events\n");
+
+ seq_buf_printf(out, " min: ");
+ seq_buf_time_units_aligned(out, stats->min_duration);
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " max: ");
+ seq_buf_time_units_aligned(out, stats->max_duration);
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " total: ");
+ seq_buf_time_units_aligned(out, stats->total_duration);
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " mean: ");
+ seq_buf_time_units_aligned(out, d_mean);
+ seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " stddev: ");
+ seq_buf_time_units_aligned(out, d_stddev);
+ seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, "time between events\n");
+
+ seq_buf_printf(out, " min: ");
+ seq_buf_time_units_aligned(out, stats->min_freq);
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " max: ");
+ seq_buf_time_units_aligned(out, stats->max_freq);
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " mean: ");
+ seq_buf_time_units_aligned(out, f_mean);
+ seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ seq_buf_printf(out, "\n");
+
+ seq_buf_printf(out, " stddev: ");
+ seq_buf_time_units_aligned(out, f_stddev);
+ seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ seq_buf_printf(out, "\n");
+
+ if (quantiles) {
+ int i = eytzinger0_first(NR_QUANTILES);
+ const struct time_unit *u =
+ pick_time_units(quantiles->entries[i].m);
+ u64 last_q = 0;
+
+ seq_buf_printf(out, "quantiles (%s):\t", u->name);
+ eytzinger0_for_each(i, NR_QUANTILES) {
+ bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
+
+ u64 q = max(quantiles->entries[i].m, last_q);
+ seq_buf_printf(out, "%llu ", div_u64(q, u->nsecs));
+ if (is_last)
+ seq_buf_printf(out, "\n");
+ last_q = q;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(time_stats_to_seq_buf);
+
+void time_stats_to_json(struct seq_buf *out, struct time_stats *stats,
+ const char *epoch_name, unsigned int flags)
+{
+ struct quantiles *quantiles = time_stats_to_quantiles(stats);
+ s64 f_mean = 0, d_mean = 0;
+ u64 f_stddev = 0, d_stddev = 0;
+
+ if (stats->buffer) {
+ int cpu;
+
+ spin_lock_irq(&stats->lock);
+ for_each_possible_cpu(cpu)
+ __time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
+ spin_unlock_irq(&stats->lock);
+ }
+
+ if (stats->freq_stats.n) {
+ /* avoid divide by zero */
+ f_mean = mean_and_variance_get_mean(stats->freq_stats);
+ f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
+ d_mean = mean_and_variance_get_mean(stats->duration_stats);
+ d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
+ } else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
+ /* unless we didn't want zeroes anyway */
+ return;
+ }
+
+ seq_buf_printf(out, "{\n");
+ seq_buf_printf(out, " \"epoch\": \"%s\",\n", epoch_name);
+ seq_buf_printf(out, " \"count\": %llu,\n", stats->duration_stats.n);
+
+ seq_buf_printf(out, " \"duration_ns\": {\n");
+ seq_buf_printf(out, " \"min\": %llu,\n", stats->min_duration);
+ seq_buf_printf(out, " \"max\": %llu,\n", stats->max_duration);
+ seq_buf_printf(out, " \"total\": %llu,\n", stats->total_duration);
+ seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
+ seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
+ seq_buf_printf(out, " },\n");
+
+ d_mean = mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
+ d_stddev = mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
+
+ seq_buf_printf(out, " \"duration_ewma_ns\": {\n");
+ seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
+ seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
+ seq_buf_printf(out, " },\n");
+
+ seq_buf_printf(out, " \"frequency_ns\": {\n");
+ seq_buf_printf(out, " \"min\": %llu,\n", stats->min_freq);
+ seq_buf_printf(out, " \"max\": %llu,\n", stats->max_freq);
+ seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
+ seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
+ seq_buf_printf(out, " },\n");
+
+ f_mean = mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
+ f_stddev = mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
+
+ seq_buf_printf(out, " \"frequency_ewma_ns\": {\n");
+ seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
+ seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
+
+ if (quantiles) {
+ u64 last_q = 0;
+
+ /* close frequency_ewma_ns but signal more items */
+ seq_buf_printf(out, " },\n");
+
+ seq_buf_printf(out, " \"quantiles_ns\": [\n");
+ eytzinger0_for_each(i, NR_QUANTILES) {
+ bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
+
+ u64 q = max(quantiles->entries[i].m, last_q);
+ seq_buf_printf(out, " %llu", q);
+ if (!is_last)
+ seq_buf_printf(out, ", ");
+ last_q = q;
+ }
+ seq_buf_printf(out, " ]\n");
+ } else {
+ /* close frequency_ewma_ns without dumping further */
+ seq_buf_printf(out, " }\n");
+ }
+
+ seq_buf_printf(out, "}\n");
+}
+EXPORT_SYMBOL_GPL(time_stats_to_json);
+
+void time_stats_exit(struct time_stats *stats)
+{
+ free_percpu(stats->buffer);
+}
+EXPORT_SYMBOL_GPL(time_stats_exit);
+
+void time_stats_init(struct time_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->min_duration = U64_MAX;
+ stats->min_freq = U64_MAX;
+ stats->start_time = local_clock();
+ spin_lock_init(&stats->lock);
+}
+EXPORT_SYMBOL_GPL(time_stats_init);
+
+MODULE_AUTHOR("Kent Overstreet");
+MODULE_LICENSE("GPL");