summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/list_nulls.h145
-rw-r--r--include/linux/overflow.h346
-rw-r--r--include/linux/poison.h85
-rw-r--r--include/linux/random.h1
-rw-r--r--include/linux/rcupdate.h28
-rw-r--r--include/linux/rhashtable-types.h135
-rw-r--r--include/linux/rhashtable.h1205
-rw-r--r--include/linux/six.h1
-rw-r--r--include/linux/slab.h1
-rw-r--r--include/linux/types.h2
10 files changed, 1749 insertions, 200 deletions
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
new file mode 100644
index 00000000..fa6e8471
--- /dev/null
+++ b/include/linux/list_nulls.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LIST_NULLS_H
+#define _LINUX_LIST_NULLS_H
+
+#include <linux/poison.h>
+#include <linux/const.h>
+
+/*
+ * Special version of lists, where end of list is not a NULL pointer,
+ * but a 'nulls' marker, which can have many different values.
+ * (up to 2^31 different values guaranteed on all platforms)
+ *
+ * In the standard hlist, termination of a list is the NULL pointer.
+ * In this special 'nulls' variant, we use the fact that objects stored in
+ * a list are aligned on a word (4 or 8 bytes alignment).
+ * We therefore use the last significant bit of 'ptr' :
+ * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
+ * Set to 0 : This is a pointer to some object (ptr)
+ */
+
+struct hlist_nulls_head {
+ struct hlist_nulls_node *first;
+};
+
+struct hlist_nulls_node {
+ struct hlist_nulls_node *next, **pprev;
+};
+#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
+#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
+ ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
+
+#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+ })
+/**
+ * ptr_is_a_nulls - Test if a ptr is a nulls
+ * @ptr: ptr to be tested
+ *
+ */
+static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
+{
+ return ((unsigned long)ptr & 1);
+}
+
+/**
+ * get_nulls_value - Get the 'nulls' value of the end of chain
+ * @ptr: end of chain
+ *
+ * Should be called only if is_a_nulls(ptr);
+ */
+static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
+{
+ return ((unsigned long)ptr) >> 1;
+}
+
+/**
+ * hlist_nulls_unhashed - Has node been removed and reinitialized?
+ * @h: Node to be checked
+ *
+ * Not that not all removal functions will leave a node in unhashed state.
+ * For example, hlist_del_init_rcu() leaves the node in unhashed state,
+ * but hlist_nulls_del() does not.
+ */
+static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
+{
+ return !h->pprev;
+}
+
+/**
+ * hlist_nulls_unhashed_lockless - Has node been removed and reinitialized?
+ * @h: Node to be checked
+ *
+ * Not that not all removal functions will leave a node in unhashed state.
+ * For example, hlist_del_init_rcu() leaves the node in unhashed state,
+ * but hlist_nulls_del() does not. Unlike hlist_nulls_unhashed(), this
+ * function may be used locklessly.
+ */
+static inline int hlist_nulls_unhashed_lockless(const struct hlist_nulls_node *h)
+{
+ return !READ_ONCE(h->pprev);
+}
+
+static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
+{
+ return is_a_nulls(READ_ONCE(h->first));
+}
+
+static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
+ struct hlist_nulls_head *h)
+{
+ struct hlist_nulls_node *first = h->first;
+
+ n->next = first;
+ WRITE_ONCE(n->pprev, &h->first);
+ h->first = n;
+ if (!is_a_nulls(first))
+ WRITE_ONCE(first->pprev, &n->next);
+}
+
+static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
+{
+ struct hlist_nulls_node *next = n->next;
+ struct hlist_nulls_node **pprev = n->pprev;
+
+ WRITE_ONCE(*pprev, next);
+ if (!is_a_nulls(next))
+ WRITE_ONCE(next->pprev, pprev);
+}
+
+static inline void hlist_nulls_del(struct hlist_nulls_node *n)
+{
+ __hlist_nulls_del(n);
+ WRITE_ONCE(n->pprev, LIST_POISON2);
+}
+
+/**
+ * hlist_nulls_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; \
+ (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @member: the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry_from(tpos, pos, member) \
+ for (; (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+#endif
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
new file mode 100644
index 00000000..ef74051d
--- /dev/null
+++ b/include/linux/overflow.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+#include <linux/limits.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+/*
+ * Avoids triggering -Wtype-limits compilation warning,
+ * while using unsigned data types to check a < 0.
+ */
+#define is_non_negative(a) ((a) > 0 || (a) == 0)
+#define is_negative(a) (!(is_non_negative(a)))
+
+/*
+ * Allows for effectively applying __must_check to a macro so we can have
+ * both the type-agnostic benefits of the macros while also being able to
+ * enforce that the return value is, in fact, checked.
+ */
+static inline bool __must_check __must_check_overflow(bool overflow)
+{
+ return unlikely(overflow);
+}
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) __must_check_overflow(({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_add_overflow(__a, __b, __d); \
+}))
+
+#define check_sub_overflow(a, b, d) __must_check_overflow(({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_sub_overflow(__a, __b, __d); \
+}))
+
+#define check_mul_overflow(a, b, d) __must_check_overflow(({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_mul_overflow(__a, __b, __d); \
+}))
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a + __b; \
+ *__d < __a; \
+})
+#define __unsigned_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a - __b; \
+ __a < __b; \
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a * __b; \
+ __builtin_constant_p(__b) ? \
+ __b > 0 && __a > type_max(typeof(__a)) / __b : \
+ __a > 0 && __b > type_max(typeof(__b)) / __a; \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a + (u64)__b; \
+ (((~(__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a - (u64)__b; \
+ ((((__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ typeof(a) __tmax = type_max(typeof(a)); \
+ typeof(a) __tmin = type_min(typeof(a)); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a * (u64)__b; \
+ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
+ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
+ (__b == (typeof(__b))-1 && __a == __tmin); \
+})
+
+
+#define check_add_overflow(a, b, d) __must_check_overflow( \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_add_overflow(a, b, d), \
+ __unsigned_add_overflow(a, b, d)))
+
+#define check_sub_overflow(a, b, d) __must_check_overflow( \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_sub_overflow(a, b, d), \
+ __unsigned_sub_overflow(a, b, d)))
+
+#define check_mul_overflow(a, b, d) __must_check_overflow( \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_mul_overflow(a, b, d), \
+ __unsigned_mul_overflow(a, b, d)))
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+ *
+ * @a: Value to be shifted
+ * @s: How many bits left to shift
+ * @d: Pointer to where to store the result
+ *
+ * Computes *@d = (@a << @s)
+ *
+ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * make sense. Example conditions:
+ * - 'a << s' causes bits to be lost when stored in *d.
+ * - 's' is garbage (e.g. negative) or so large that the result of
+ * 'a << s' is guaranteed to be 0.
+ * - 'a' is negative.
+ * - 'a << s' sets the sign bit, if any, in '*d'.
+ *
+ * '*d' will hold the results of the attempted shift, but is not
+ * considered "safe for use" if false is returned.
+ */
+#define check_shl_overflow(a, s, d) __must_check_overflow(({ \
+ typeof(a) _a = a; \
+ typeof(s) _s = s; \
+ typeof(d) _d = d; \
+ u64 _a_full = _a; \
+ unsigned int _to_shift = \
+ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
+ *_d = (_a_full << _to_shift); \
+ (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \
+ (*_d >> _to_shift) != _a); \
+}))
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+ if (check_mul_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/*
+ * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
+ * struct_size() below.
+ */
+static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+ if (check_add_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @count number of @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, count) \
+ __ab_c_size(count, \
+ sizeof(*(p)->member) + __must_be_array((p)->member),\
+ sizeof(*(p)))
+
+/**
+ * flex_array_size() - Calculate size of a flexible array member
+ * within an enclosing structure.
+ *
+ * @p: Pointer to the structure.
+ * @member: Name of the flexible array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of a flexible array of @count number of @member
+ * elements, at the end of structure @p.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define flex_array_size(p, member, count) \
+ array_size(count, \
+ sizeof(*(p)->member) + __must_be_array((p)->member))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/include/linux/poison.h b/include/linux/poison.h
new file mode 100644
index 00000000..dc8ae5d8
--- /dev/null
+++ b/include/linux/poison.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_POISON_H
+#define _LINUX_POISON_H
+
+/********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA)
+
+/********** include/linux/timer.h **********/
+#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
+
+/********** mm/page_poison.c **********/
+#ifdef CONFIG_PAGE_POISONING_ZERO
+#define PAGE_POISON 0x00
+#else
+#define PAGE_POISON 0xaa
+#endif
+
+/********** mm/page_alloc.c ************/
+
+#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA)
+
+/********** mm/slab.c **********/
+/*
+ * Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
+#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
+
+#define SLUB_RED_INACTIVE 0xbb
+#define SLUB_RED_ACTIVE 0xcc
+
+/* ...and for poisoning */
+#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
+#define POISON_FREE 0x6b /* for use-after-free poisoning */
+#define POISON_END 0xa5 /* end-byte of poisoning */
+
+/********** arch/$ARCH/mm/init.c **********/
+#define POISON_FREE_INITMEM 0xcc
+
+/********** arch/ia64/hp/common/sba_iommu.c **********/
+/*
+ * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
+ * value of "SBAIOMMU POISON\0" for spill-over poisoning.
+ */
+
+/********** fs/jbd/journal.c **********/
+#define JBD_POISON_FREE 0x5b
+#define JBD2_POISON_FREE 0x5c
+
+/********** drivers/base/dmapool.c **********/
+#define POOL_POISON_FREED 0xa7 /* !inuse */
+#define POOL_POISON_ALLOCATED 0xa9 /* !initted */
+
+/********** drivers/atm/ **********/
+#define ATM_POISON_FREE 0x12
+#define ATM_POISON 0xdeadbeef
+
+/********** kernel/mutexes **********/
+#define MUTEX_DEBUG_INIT 0x11
+#define MUTEX_DEBUG_FREE 0x22
+#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA)
+
+/********** security/ **********/
+#define KEY_DESTROY 0xbd
+
+#endif
diff --git a/include/linux/random.h b/include/linux/random.h
index c38ae46d..28c595a0 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -45,6 +45,7 @@ static inline type get_random_##type(void) \
get_random_type(int);
get_random_type(long);
+get_random_type(u32);
get_random_type(u64);
#endif /* _LINUX_RANDOM_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c99d78a8..ae292241 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -13,4 +13,32 @@
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)
+/* Has the specified rcu_head structure been handed to call_rcu()? */
+
+/**
+ * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
+ * @rhp: The rcu_head structure to initialize.
+ *
+ * If you intend to invoke rcu_head_after_call_rcu() to test whether a
+ * given rcu_head structure has already been passed to call_rcu(), then
+ * you must also invoke this rcu_head_init() function on it just after
+ * allocating that structure. Calls to this function must not race with
+ * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation.
+ */
+static inline void rcu_head_init(struct rcu_head *rhp)
+{
+ rhp->func = (void *)~0L;
+}
+
+static inline bool
+rcu_head_after_call_rcu(struct rcu_head *rhp,
+ void (*f)(struct rcu_head *head))
+{
+ void (*func)(struct rcu_head *head) = READ_ONCE(rhp->func);
+
+ if (func == f)
+ return true;
+ return false;
+}
+
#endif /* __TOOLS_LINUX_RCUPDATE_H */
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
new file mode 100644
index 00000000..57467cbf
--- /dev/null
+++ b/include/linux/rhashtable-types.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Simple structures that might be needed in include
+ * files.
+ */
+
+#ifndef _LINUX_RHASHTABLE_TYPES_H
+#define _LINUX_RHASHTABLE_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+struct rhash_head {
+ struct rhash_head __rcu *next;
+};
+
+struct rhlist_head {
+ struct rhash_head rhead;
+ struct rhlist_head __rcu *next;
+};
+
+struct bucket_table;
+
+/**
+ * struct rhashtable_compare_arg - Key for the function rhashtable_compare
+ * @ht: Hash table
+ * @key: Key to compare against
+ */
+struct rhashtable_compare_arg {
+ struct rhashtable *ht;
+ const void *key;
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+ const void *obj);
+
+/**
+ * struct rhashtable_params - Hash table construction parameters
+ * @nelem_hint: Hint on number of elements, should be 75% of desired size
+ * @key_len: Length of key
+ * @key_offset: Offset of key in struct to be hashed
+ * @head_offset: Offset of rhash_head in struct to be hashed
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
+ * @automatic_shrinking: Enable automatic shrinking of tables
+ * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
+ * @obj_hashfn: Function to hash object
+ * @obj_cmpfn: Function to compare key with object
+ */
+struct rhashtable_params {
+ u16 nelem_hint;
+ u16 key_len;
+ u16 key_offset;
+ u16 head_offset;
+ unsigned int max_size;
+ u16 min_size;
+ bool automatic_shrinking;
+ rht_hashfn_t hashfn;
+ rht_obj_hashfn_t obj_hashfn;
+ rht_obj_cmpfn_t obj_cmpfn;
+};
+
+/**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+ * @key_len: Key length for hashfn
+ * @max_elems: Maximum number of elements in table
+ * @p: Configuration parameters
+ * @rhlist: True if this is an rhltable
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
+ */
+struct rhashtable {
+ struct bucket_table __rcu *tbl;
+ unsigned int key_len;
+ unsigned int max_elems;
+ struct rhashtable_params p;
+ bool rhlist;
+ struct work_struct run_work;
+ struct mutex mutex;
+ spinlock_t lock;
+ atomic_t nelems;
+};
+
+/**
+ * struct rhltable - Hash table with duplicate objects in a list
+ * @ht: Underlying rhtable
+ */
+struct rhltable {
+ struct rhashtable ht;
+};
+
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @tbl: The table that we were walking over
+ */
+struct rhashtable_walker {
+ struct list_head list;
+ struct bucket_table *tbl;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @list: Current hash list pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+ struct rhashtable *ht;
+ struct rhash_head *p;
+ struct rhlist_head *list;
+ struct rhashtable_walker walker;
+ unsigned int slot;
+ unsigned int skip;
+ bool end_of_table;
+};
+
+int rhashtable_init(struct rhashtable *ht,
+ const struct rhashtable_params *params);
+int rhltable_init(struct rhltable *hlt,
+ const struct rhashtable_params *params);
+
+#endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 8dbe1533..6cf8c257 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1,7 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Resizable, Scalable, Concurrent Hash Table
*
- * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
* Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
*
@@ -17,92 +18,93 @@
#ifndef _LINUX_RHASHTABLE_H
#define _LINUX_RHASHTABLE_H
-#include <linux/atomic.h>
-#include <linux/cache.h>
-#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/jhash.h>
-#include <linux/workqueue.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
+#include <linux/list_nulls.h>
#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <linux/bit_spinlock.h>
-#define RHT_BASE_BITS 4
-#define RHT_HASH_BITS 27
-#define RHT_BASE_SHIFT RHT_HASH_BITS
-#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
+#define BIT(nr) (1UL << (nr))
-struct rhash_head {
- struct rhash_head __rcu *next;
-};
+#include <linux/rhashtable-types.h>
+/*
+ * Objects in an rhashtable have an embedded struct rhash_head
+ * which is linked into as hash chain from the hash table - or one
+ * of two or more hash tables when the rhashtable is being resized.
+ * The end of the chain is marked with a special nulls marks which has
+ * the least significant bit set but otherwise stores the address of
+ * the hash bucket. This allows us to be sure we've found the end
+ * of the right list.
+ * The value stored in the hash bucket has BIT(0) used as a lock bit.
+ * This bit must be atomically set before any changes are made to
+ * the chain. To avoid dereferencing this pointer without clearing
+ * the bit first, we use an opaque 'struct rhash_lock_head *' for the
+ * pointer stored in the bucket. This struct needs to be defined so
+ * that rcu_dereference() works on it, but it has no content so a
+ * cast is needed for it to be useful. This ensures it isn't
+ * used by mistake with clearing the lock bit first.
+ */
+struct rhash_lock_head {};
+/* Maximum chain length before rehash
+ *
+ * The maximum (not average) chain length grows with the size of the hash
+ * table, at a rate of (log N)/(log log N).
+ *
+ * The value of 16 is selected so that even if the hash table grew to
+ * 2^32 you would not expect the maximum chain length to exceed it
+ * unless we are under attack (or extremely unlucky).
+ *
+ * As this limit is only to detect attacks, we don't need to set it to a
+ * lower value as you'd need the chain length to vastly exceed 16 to have
+ * any real effect on the system.
+ */
+#define RHT_ELASTICITY 16u
+
+/**
+ * struct bucket_table - Table of hash buckets
+ * @size: Number of hash buckets
+ * @nest: Number of bits of first-level nested table.
+ * @rehash: Current bucket being rehashed
+ * @hash_rnd: Random seed to fold into hash
+ * @walkers: List of active walkers
+ * @rcu: RCU structure for freeing the table
+ * @future_tbl: Table under construction during rehashing
+ * @ntbl: Nested table used when out of memory.
+ * @buckets: size * hash buckets
+ */
struct bucket_table {
unsigned int size;
- unsigned int rehash;
+ unsigned int nest;
u32 hash_rnd;
- unsigned int locks_mask;
- spinlock_t *locks;
struct list_head walkers;
struct rcu_head rcu;
struct bucket_table __rcu *future_tbl;
- struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
-};
-
-struct rhashtable_compare_arg {
- struct rhashtable *ht;
- const void *key;
+ struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};
-typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
- const void *obj);
-
-struct rhashtable_params {
- size_t nelem_hint;
- size_t key_len;
- size_t key_offset;
- size_t head_offset;
- unsigned int insecure_max_entries;
- unsigned int max_size;
- unsigned int min_size;
- u32 nulls_base;
- bool insecure_elasticity;
- bool automatic_shrinking;
- size_t locks_mul;
- rht_hashfn_t hashfn;
- rht_obj_hashfn_t obj_hashfn;
- rht_obj_cmpfn_t obj_cmpfn;
-};
-
-struct rhashtable {
- struct bucket_table __rcu *tbl;
- atomic_t nelems;
- unsigned int key_len;
- unsigned int elasticity;
- struct rhashtable_params p;
- struct work_struct run_work;
- struct mutex mutex;
- spinlock_t lock;
-};
-
-struct rhashtable_walker {
- struct list_head list;
- struct bucket_table *tbl;
-};
-
-#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
-
-static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
-{
- return NULLS_MARKER(ht->p.nulls_base + hash);
-}
-
-#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
- ((ptr) = (typeof(ptr)) rht_marker(ht, hash))
+/*
+ * NULLS_MARKER() expects a hash value with the low
+ * bits mostly likely to be significant, and it discards
+ * the msb.
+ * We give it an address, in which the bottom bit is
+ * always 0, and the msb might be significant.
+ * So we shift the address down one bit to align with
+ * expectations and avoid losing a significant bit.
+ *
+ * We never store the NULLS_MARKER in the hash table
+ * itself as we need the lsb for locking.
+ * Instead we store a NULL
+ */
+#define RHT_NULLS_MARKER(ptr) \
+ ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
+#define INIT_RHT_NULLS_HEAD(ptr) \
+ ((ptr) = NULL)
static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
@@ -118,37 +120,45 @@ static inline void *rht_obj(const struct rhashtable *ht,
static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
unsigned int hash)
{
- return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
+ return hash & (tbl->size - 1);
}
-static inline unsigned int rht_key_hashfn(
- struct rhashtable *ht, const struct bucket_table *tbl,
- const void *key, const struct rhashtable_params params)
+static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
+ const void *key, const struct rhashtable_params params,
+ unsigned int hash_rnd)
{
unsigned int hash;
/* params must be equal to ht->p if it isn't constant. */
if (!__builtin_constant_p(params.key_len))
- hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
+ hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
else if (params.key_len) {
unsigned int key_len = params.key_len;
if (params.hashfn)
- hash = params.hashfn(key, key_len, tbl->hash_rnd);
+ hash = params.hashfn(key, key_len, hash_rnd);
else if (key_len & (sizeof(u32) - 1))
- hash = jhash(key, key_len, tbl->hash_rnd);
+ hash = jhash(key, key_len, hash_rnd);
else
- hash = jhash2(key, key_len / sizeof(u32),
- tbl->hash_rnd);
+ hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
} else {
unsigned int key_len = ht->p.key_len;
if (params.hashfn)
- hash = params.hashfn(key, key_len, tbl->hash_rnd);
+ hash = params.hashfn(key, key_len, hash_rnd);
else
- hash = jhash(key, key_len, tbl->hash_rnd);
+ hash = jhash(key, key_len, hash_rnd);
}
+ return hash;
+}
+
+static inline unsigned int rht_key_hashfn(
+ struct rhashtable *ht, const struct bucket_table *tbl,
+ const void *key, const struct rhashtable_params params)
+{
+ unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
+
return rht_bucket_index(tbl, hash);
}
@@ -165,6 +175,11 @@ static inline unsigned int rht_head_hashfn(
rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
}
+/**
+ * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
+ * @ht: hash table
+ * @tbl: current table
+ */
static inline bool rht_grow_above_75(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
@@ -173,6 +188,11 @@ static inline bool rht_grow_above_75(const struct rhashtable *ht,
(!ht->p.max_size || tbl->size < ht->p.max_size);
}
+/**
+ * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
+ * @ht: hash table
+ * @tbl: current table
+ */
static inline bool rht_shrink_below_30(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
@@ -181,6 +201,11 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht,
tbl->size > ht->p.min_size;
}
+/**
+ * rht_grow_above_100 - returns true if nelems > table-size
+ * @ht: hash table
+ * @tbl: current table
+ */
static inline bool rht_grow_above_100(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
@@ -188,62 +213,353 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht,
(!ht->p.max_size || tbl->size < ht->p.max_size);
}
+/**
+ * rht_grow_above_max - returns true if table is above maximum
+ * @ht: hash table
+ * @tbl: current table
+ */
static inline bool rht_grow_above_max(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
- return ht->p.insecure_max_entries &&
- atomic_read(&ht->nelems) >= ht->p.insecure_max_entries;
+ return atomic_read(&ht->nelems) >= ht->max_elems;
}
-static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
- unsigned int hash)
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rht_mutex_is_held(struct rhashtable *ht);
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
+#else
+static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
{
- return &tbl->locks[hash & tbl->locks_mask];
+ return 1;
}
-int rhashtable_insert_rehash(struct rhashtable *, struct bucket_table *);
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *,
- const void *,
- struct rhash_head *,
- struct bucket_table *);
+static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
+ u32 hash)
+{
+ return 1;
+}
+#endif /* CONFIG_PROVE_LOCKING */
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj);
-int rhashtable_init(struct rhashtable *, const struct rhashtable_params *);
-void rhashtable_destroy(struct rhashtable *);
+void rhashtable_walk_enter(struct rhashtable *ht,
+ struct rhashtable_iter *iter);
+void rhashtable_walk_exit(struct rhashtable_iter *iter);
+int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
-#define rht_dereference(p, ht) rcu_dereference(p)
-#define rht_dereference_rcu(p, ht) rcu_dereference(p)
-#define rht_dereference_bucket(p, tbl, hash) rcu_dereference(p)
-#define rht_dereference_bucket_rcu(p, tbl, hash) rcu_dereference(p)
+static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
+{
+ (void)rhashtable_walk_start_check(iter);
+}
+
+void *rhashtable_walk_next(struct rhashtable_iter *iter);
+void *rhashtable_walk_peek(struct rhashtable_iter *iter);
+void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
+
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg);
+void rhashtable_destroy(struct rhashtable *ht);
+
+struct rhash_lock_head __rcu **rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash);
+
+#define rht_dereference(p, ht) \
+ rcu_dereference(p)
+
+#define rht_dereference_rcu(p, ht) \
+ rcu_dereference(p)
+
+#define rht_dereference_bucket(p, tbl, hash) \
+ rcu_dereference(p)
+
+#define rht_dereference_bucket_rcu(p, tbl, hash) \
+ rcu_dereference(p)
#define rht_entry(tpos, pos, member) \
({ tpos = container_of(pos, typeof(*tpos), member); 1; })
-#define rht_for_each_continue(pos, head, tbl, hash) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
- !rht_is_a_nulls(pos); \
+static inline struct rhash_lock_head __rcu *const *rht_bucket(
+ const struct bucket_table *tbl, unsigned int hash)
+{
+ return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+ &tbl->buckets[hash];
+}
+
+static inline struct rhash_lock_head __rcu **rht_bucket_var(
+ struct bucket_table *tbl, unsigned int hash)
+{
+ return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
+ &tbl->buckets[hash];
+}
+
+static inline struct rhash_lock_head __rcu **rht_bucket_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+ return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
+ &tbl->buckets[hash];
+}
+
+/*
+ * We lock a bucket by setting BIT(0) in the pointer - this is always
+ * zero in real pointers. The NULLS mark is never stored in the bucket,
+ * rather we store NULL if the bucket is empty.
+ * bit_spin_locks do not handle contention well, but the whole point
+ * of the hashtable design is to achieve minimum per-bucket contention.
+ * A nested hash table might not have a bucket pointer. In that case
+ * we cannot get a lock. For remove and replace the bucket cannot be
+ * interesting and doesn't need locking.
+ * For insert we allocate the bucket if this is the last bucket_table,
+ * and then take the lock.
+ * Sometimes we unlock a bucket by writing a new pointer there. In that
+ * case we don't need to unlock, but we do need to reset state such as
+ * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
+ * provides the same release semantics that bit_spin_unlock() provides,
+ * this is safe.
+ * When we write to a bucket without unlocking, we use rht_assign_locked().
+ */
+
+static inline void rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt)
+{
+ bit_spin_lock(0, (unsigned long *)bkt);
+}
+
+static inline void rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bucket,
+ unsigned int subclass)
+{
+ bit_spin_lock(0, (unsigned long *)bucket);
+}
+
+static inline void rht_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt)
+{
+ bit_spin_unlock(0, (unsigned long *)bkt);
+}
+
+static inline struct rhash_head *__rht_ptr(
+ struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
+{
+ return (struct rhash_head *)
+ ((unsigned long)p & ~BIT(0) ?:
+ (unsigned long)RHT_NULLS_MARKER(bkt));
+}
+
+/*
+ * Where 'bkt' is a bucket and might be locked:
+ * rht_ptr_rcu() dereferences that pointer and clears the lock bit.
+ * rht_ptr() dereferences in a context where the bucket is locked.
+ * rht_ptr_exclusive() dereferences in a context where exclusive
+ * access is guaranteed, such as when destroying the table.
+ */
+static inline struct rhash_head *rht_ptr_rcu(
+ struct rhash_lock_head __rcu *const *bkt)
+{
+ return __rht_ptr(rcu_dereference(*bkt), bkt);
+}
+
+static inline struct rhash_head *rht_ptr(
+ struct rhash_lock_head __rcu *const *bkt,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
+}
+
+static inline struct rhash_head *rht_ptr_exclusive(
+ struct rhash_lock_head __rcu *const *bkt)
+{
+ return __rht_ptr(rcu_dereference(*bkt), bkt);
+}
+
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0)));
+}
+
+static inline void rht_assign_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*bkt, (void *)obj);
+ preempt_enable();
+ __release(bitlock);
+}
+
+/**
+ * rht_for_each_from - iterate over hash chain from given head
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ */
+#define rht_for_each_from(pos, head, tbl, hash) \
+ for (pos = head; \
+ !rht_is_a_nulls(pos); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
+/**
+ * rht_for_each - iterate over hash chain
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ */
#define rht_for_each(pos, tbl, hash) \
- rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash)
+
+/**
+ * rht_for_each_entry_from - iterate over hash chain from given head
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
+ for (pos = head; \
+ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
+ pos = rht_dereference_bucket((pos)->next, tbl, hash))
-#define rht_for_each_rcu_continue(pos, head, tbl, hash) \
+/**
+ * rht_for_each_entry - iterate over hash chain of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
+ rht_for_each_entry_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash, member)
+
+/**
+ * rht_for_each_entry_safe - safely iterate over hash chain of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @next: the &struct rhash_head to use as next in loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive allows for the looped code to
+ * remove the loop cursor from the list.
+ */
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
+ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ next = !rht_is_a_nulls(pos) ? \
+ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
+ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
+ pos = next, \
+ next = !rht_is_a_nulls(pos) ? \
+ rht_dereference_bucket(pos->next, tbl, hash) : NULL)
+
+/**
+ * rht_for_each_rcu_from - iterate over rcu hash chain from given head
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu_from(pos, head, tbl, hash) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
-#define rht_for_each_rcu(pos, tbl, hash) \
- rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+/**
+ * rht_for_each_rcu - iterate over rcu hash chain
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu(pos, tbl, hash) \
+ for (({barrier(); }), \
+ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \
+ !rht_is_a_nulls(pos); \
+ pos = rcu_dereference_raw(pos->next))
-#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
+/**
+ * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
- tbl, hash, member)
+/**
+ * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
+ rht_for_each_entry_rcu_from(tpos, pos, \
+ rht_ptr_rcu(rht_bucket(tbl, hash)), \
+ tbl, hash, member)
+
+/**
+ * rhl_for_each_rcu - iterate over rcu hash table list
+ * @pos: the &struct rlist_head to use as a loop cursor.
+ * @list: the head of the list
+ *
+ * This hash chain list-traversal primitive should be used on the
+ * list returned by rhltable_lookup.
+ */
+#define rhl_for_each_rcu(pos, list) \
+ for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
+
+/**
+ * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rlist_head to use as a loop cursor.
+ * @list: the head of the list
+ * @member: name of the &struct rlist_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive should be used on the
+ * list returned by rhltable_lookup.
+ */
+#define rhl_for_each_entry_rcu(tpos, pos, list, member) \
+ for (pos = list; pos && rht_entry(tpos, pos, member); \
+ pos = rcu_dereference_raw(pos->next))
static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
const void *obj)
@@ -254,7 +570,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
}
-static inline void *rhashtable_lookup_fast(
+/* Internal function, do not use. */
+static inline struct rhash_head *__rhashtable_lookup(
struct rhashtable *ht, const void *key,
const struct rhashtable_params params)
{
@@ -262,23 +579,27 @@ static inline void *rhashtable_lookup_fast(
.ht = ht,
.key = key,
};
- const struct bucket_table *tbl;
+ struct rhash_lock_head __rcu *const *bkt;
+ struct bucket_table *tbl;
struct rhash_head *he;
unsigned int hash;
- rcu_read_lock();
-
tbl = rht_dereference_rcu(ht->tbl, ht);
restart:
hash = rht_key_hashfn(ht, tbl, key, params);
- rht_for_each_rcu(he, tbl, hash) {
- if (params.obj_cmpfn ?
- params.obj_cmpfn(&arg, rht_obj(ht, he)) :
- rhashtable_compare(&arg, rht_obj(ht, he)))
- continue;
- rcu_read_unlock();
- return rht_obj(ht, he);
- }
+ bkt = rht_bucket(tbl, hash);
+ do {
+ rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
+ if (params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+ rhashtable_compare(&arg, rht_obj(ht, he)))
+ continue;
+ return he;
+ }
+ /* An object might have been moved to a different hash chain,
+ * while we walk along it - better check and retry.
+ */
+ } while (he != RHT_NULLS_MARKER(bkt));
/* Ensure we see any new tables. */
smp_rmb();
@@ -286,150 +607,594 @@ restart:
tbl = rht_dereference_rcu(tbl->future_tbl, ht);
if (unlikely(tbl))
goto restart;
- rcu_read_unlock();
return NULL;
}
-static inline int __rhashtable_insert_fast(
- struct rhashtable *ht, const void *key, struct rhash_head *obj,
+/**
+ * rhashtable_lookup - search hash table
+ * @ht: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(ht, key, params);
+
+ return he ? rht_obj(ht, he) : NULL;
+}
+
+/**
+ * rhashtable_lookup_fast - search hash table, without RCU read lock
+ * @ht: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * Only use this function when you have other mechanisms guaranteeing
+ * that the object won't go away after the RCU read lock is released.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup_fast(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ void *obj;
+
+ rcu_read_lock();
+ obj = rhashtable_lookup(ht, key, params);
+ rcu_read_unlock();
+
+ return obj;
+}
+
+/**
+ * rhltable_lookup - search hash list table
+ * @hlt: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. All matching entries are returned
+ * in a list.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the list of entries that match the given key.
+ */
+static inline struct rhlist_head *rhltable_lookup(
+ struct rhltable *hlt, const void *key,
const struct rhashtable_params params)
{
+ struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
+
+ return he ? container_of(he, struct rhlist_head, rhead) : NULL;
+}
+
+/* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+ */
+static inline void *__rhashtable_insert_fast(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params, bool rhlist)
+{
struct rhashtable_compare_arg arg = {
.ht = ht,
.key = key,
};
- struct bucket_table *tbl, *new_tbl;
+ struct rhash_lock_head __rcu **bkt;
+ struct rhash_head __rcu **pprev;
+ struct bucket_table *tbl;
struct rhash_head *head;
- spinlock_t *lock;
- unsigned int elasticity;
unsigned int hash;
- int err;
+ int elasticity;
+ void *data;
-restart:
rcu_read_lock();
tbl = rht_dereference_rcu(ht->tbl, ht);
+ hash = rht_head_hashfn(ht, tbl, obj, params);
+ elasticity = RHT_ELASTICITY;
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ data = ERR_PTR(-ENOMEM);
+ if (!bkt)
+ goto out;
+ pprev = NULL;
+ rht_lock(tbl, bkt);
- /* All insertions must grab the oldest table containing
- * the hashed bucket that is yet to be rehashed.
- */
- for (;;) {
- hash = rht_head_hashfn(ht, tbl, obj, params);
- lock = rht_bucket_lock(tbl, hash);
- spin_lock_bh(lock);
+ if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
+slow_path:
+ rht_unlock(tbl, bkt);
+ rcu_read_unlock();
+ return rhashtable_insert_slow(ht, key, obj);
+ }
- if (tbl->rehash <= hash)
- break;
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ struct rhlist_head *plist;
+ struct rhlist_head *list;
- spin_unlock_bh(lock);
- tbl = rht_dereference_rcu(tbl->future_tbl, ht);
- }
+ elasticity--;
+ if (!key ||
+ (params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+ rhashtable_compare(&arg, rht_obj(ht, head)))) {
+ pprev = &head->next;
+ continue;
+ }
- new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
- if (unlikely(new_tbl)) {
- tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
- if (!IS_ERR_OR_NULL(tbl))
- goto slow_path;
+ data = rht_obj(ht, head);
- err = PTR_ERR(tbl);
- goto out;
- }
+ if (!rhlist)
+ goto out_unlock;
- err = -E2BIG;
- if (unlikely(rht_grow_above_max(ht, tbl)))
- goto out;
- if (unlikely(rht_grow_above_100(ht, tbl))) {
-slow_path:
- spin_unlock_bh(lock);
- err = rhashtable_insert_rehash(ht, tbl);
- rcu_read_unlock();
- if (err)
- return err;
+ list = container_of(obj, struct rhlist_head, rhead);
+ plist = container_of(head, struct rhlist_head, rhead);
- goto restart;
+ RCU_INIT_POINTER(list->next, plist);
+ head = rht_dereference_bucket(head->next, tbl, hash);
+ RCU_INIT_POINTER(list->rhead.next, head);
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj);
+ rht_unlock(tbl, bkt);
+ } else
+ rht_assign_unlock(tbl, bkt, obj);
+ data = NULL;
+ goto out;
}
- err = -EEXIST;
- elasticity = ht->elasticity;
- rht_for_each(head, tbl, hash) {
- if (key &&
- unlikely(!(params.obj_cmpfn ?
- params.obj_cmpfn(&arg, rht_obj(ht, head)) :
- rhashtable_compare(&arg, rht_obj(ht, head)))))
- goto out;
- if (!--elasticity)
- goto slow_path;
- }
+ if (elasticity <= 0)
+ goto slow_path;
+
+ data = ERR_PTR(-E2BIG);
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+ goto out_unlock;
- err = 0;
+ if (unlikely(rht_grow_above_100(ht, tbl)))
+ goto slow_path;
- head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+ /* Inserting at head of list makes unlocking free. */
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
+ if (rhlist) {
+ struct rhlist_head *list;
- rcu_assign_pointer(tbl->buckets[hash], obj);
+ list = container_of(obj, struct rhlist_head, rhead);
+ RCU_INIT_POINTER(list->next, NULL);
+ }
atomic_inc(&ht->nelems);
+ rht_assign_unlock(tbl, bkt, obj);
+
if (rht_grow_above_75(ht, tbl))
schedule_work(&ht->run_work);
+ data = NULL;
out:
- spin_unlock_bh(lock);
rcu_read_unlock();
- return err;
+ return data;
+
+out_unlock:
+ rht_unlock(tbl, bkt);
+ goto out;
}
+/**
+ * rhashtable_insert_fast - insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Will take the per bucket bitlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhashtable_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ void *ret;
+
+ ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhltable_insert_key - insert object into hash list table
+ * @hlt: hash list table
+ * @key: the pointer to the key
+ * @list: pointer to hash list head inside object
+ * @params: hash table parameters
+ *
+ * Will take the per bucket bitlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhltable_insert_key(
+ struct rhltable *hlt, const void *key, struct rhlist_head *list,
+ const struct rhashtable_params params)
+{
+ return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
+ params, true));
+}
+
+/**
+ * rhltable_insert - insert object into hash list table
+ * @hlt: hash list table
+ * @list: pointer to hash list head inside object
+ * @params: hash table parameters
+ *
+ * Will take the per bucket bitlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhltable_insert(
+ struct rhltable *hlt, struct rhlist_head *list,
+ const struct rhashtable_params params)
+{
+ const char *key = rht_obj(&hlt->ht, &list->rhead);
+
+ key += params.key_offset;
+
+ return rhltable_insert_key(hlt, key, list, params);
+}
+
+/**
+ * rhashtable_lookup_insert_fast - lookup and insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * This lookup function may only be used for fixed key hash table (key_len
+ * parameter set). It will BUG() if used inappropriately.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
static inline int rhashtable_lookup_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
const struct rhashtable_params params)
{
const char *key = rht_obj(ht, obj);
+ void *ret;
BUG_ON(ht->p.obj_hashfn);
- return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
- params);
+ ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
+ false);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
}
-static inline int __rhashtable_remove_fast(
+/**
+ * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Just like rhashtable_lookup_insert_fast(), but this function returns the
+ * object if it exists, NULL if it did not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ const char *key = rht_obj(ht, obj);
+
+ BUG_ON(ht->p.obj_hashfn);
+
+ return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
+ false);
+}
+
+/**
+ * rhashtable_lookup_insert_key - search and insert object to hash table
+ * with explicit key
+ * @ht: hash table
+ * @key: key
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Lookups may occur in parallel with hashtable mutations and resizing.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ *
+ * Returns zero on success.
+ */
+static inline int rhashtable_lookup_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ void *ret;
+
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+ ret = __rhashtable_insert_fast(ht, key, obj, params, false);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+ * @ht: hash table
+ * @key: key
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Just like rhashtable_lookup_insert_key(), but this function returns the
+ * object if it exists, NULL if it does not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+ return __rhashtable_insert_fast(ht, key, obj, params, false);
+}
+
+/* Internal function, please use rhashtable_remove_fast() instead */
+static inline int __rhashtable_remove_fast_one(
struct rhashtable *ht, struct bucket_table *tbl,
- struct rhash_head *obj, const struct rhashtable_params params)
+ struct rhash_head *obj, const struct rhashtable_params params,
+ bool rhlist)
{
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
- spinlock_t * lock;
unsigned int hash;
int err = -ENOENT;
hash = rht_head_hashfn(ht, tbl, obj, params);
- lock = rht_bucket_lock(tbl, hash);
+ bkt = rht_bucket_var(tbl, hash);
+ if (!bkt)
+ return -ENOENT;
+ pprev = NULL;
+ rht_lock(tbl, bkt);
- spin_lock_bh(lock);
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ struct rhlist_head *list;
+
+ list = container_of(he, struct rhlist_head, rhead);
- pprev = &tbl->buckets[hash];
- rht_for_each(he, tbl, hash) {
if (he != obj) {
+ struct rhlist_head __rcu **lpprev;
+
pprev = &he->next;
- continue;
+
+ if (!rhlist)
+ continue;
+
+ do {
+ lpprev = &list->next;
+ list = rht_dereference_bucket(list->next,
+ tbl, hash);
+ } while (list && obj != &list->rhead);
+
+ if (!list)
+ continue;
+
+ list = rht_dereference_bucket(list->next, tbl, hash);
+ RCU_INIT_POINTER(*lpprev, list);
+ err = 0;
+ break;
}
- rcu_assign_pointer(*pprev, obj->next);
+ obj = rht_dereference_bucket(obj->next, tbl, hash);
+ err = 1;
+
+ if (rhlist) {
+ list = rht_dereference_bucket(list->next, tbl, hash);
+ if (list) {
+ RCU_INIT_POINTER(list->rhead.next, obj);
+ obj = &list->rhead;
+ err = 0;
+ }
+ }
+
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj);
+ rht_unlock(tbl, bkt);
+ } else {
+ rht_assign_unlock(tbl, bkt, obj);
+ }
+ goto unlocked;
+ }
+
+ rht_unlock(tbl, bkt);
+unlocked:
+ if (err > 0) {
+ atomic_dec(&ht->nelems);
+ if (unlikely(ht->p.automatic_shrinking &&
+ rht_shrink_below_30(ht, tbl)))
+ schedule_work(&ht->run_work);
err = 0;
- break;
}
- spin_unlock_bh(lock);
+ return err;
+}
+
+/* Internal function, please use rhashtable_remove_fast() instead */
+static inline int __rhashtable_remove_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params, bool rhlist)
+{
+ struct bucket_table *tbl;
+ int err;
+
+ rcu_read_lock();
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ /* Because we have already taken (and released) the bucket
+ * lock in old_tbl, if we find that future_tbl is not yet
+ * visible then that guarantees the entry to still be in
+ * the old tbl if it exists.
+ */
+ while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
+ rhlist)) &&
+ (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+ ;
+
+ rcu_read_unlock();
return err;
}
+/**
+ * rhashtable_remove_fast - remove object from hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found.
+ */
static inline int rhashtable_remove_fast(
struct rhashtable *ht, struct rhash_head *obj,
const struct rhashtable_params params)
{
+ return __rhashtable_remove_fast(ht, obj, params, false);
+}
+
+/**
+ * rhltable_remove - remove object from hash list table
+ * @hlt: hash list table
+ * @list: pointer to hash list head inside object
+ * @params: hash table parameters
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found.
+ */
+static inline int rhltable_remove(
+ struct rhltable *hlt, struct rhlist_head *list,
+ const struct rhashtable_params params)
+{
+ return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
+}
+
+/* Internal function, please use rhashtable_replace_fast() instead */
+static inline int __rhashtable_replace_fast(
+ struct rhashtable *ht, struct bucket_table *tbl,
+ struct rhash_head *obj_old, struct rhash_head *obj_new,
+ const struct rhashtable_params params)
+{
+ struct rhash_lock_head __rcu **bkt;
+ struct rhash_head __rcu **pprev;
+ struct rhash_head *he;
+ unsigned int hash;
+ int err = -ENOENT;
+
+ /* Minimally, the old and new objects must have same hash
+ * (which should mean identifiers are the same).
+ */
+ hash = rht_head_hashfn(ht, tbl, obj_old, params);
+ if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
+ return -EINVAL;
+
+ bkt = rht_bucket_var(tbl, hash);
+ if (!bkt)
+ return -ENOENT;
+
+ pprev = NULL;
+ rht_lock(tbl, bkt);
+
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ if (he != obj_old) {
+ pprev = &he->next;
+ continue;
+ }
+
+ rcu_assign_pointer(obj_new->next, obj_old->next);
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj_new);
+ rht_unlock(tbl, bkt);
+ } else {
+ rht_assign_unlock(tbl, bkt, obj_new);
+ }
+ err = 0;
+ goto unlocked;
+ }
+
+ rht_unlock(tbl, bkt);
+
+unlocked:
+ return err;
+}
+
+/**
+ * rhashtable_replace_fast - replace an object in hash table
+ * @ht: hash table
+ * @obj_old: pointer to hash head inside object being replaced
+ * @obj_new: pointer to hash head inside object which is new
+ * @params: hash table parameters
+ *
+ * Replacing an object doesn't affect the number of elements in the hash table
+ * or bucket, so we don't need to worry about shrinking or expanding the
+ * table here.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found,
+ * -EINVAL if hash is not the same for the old and new objects.
+ */
+static inline int rhashtable_replace_fast(
+ struct rhashtable *ht, struct rhash_head *obj_old,
+ struct rhash_head *obj_new,
+ const struct rhashtable_params params)
+{
struct bucket_table *tbl;
int err;
@@ -442,22 +1207,62 @@ static inline int rhashtable_remove_fast(
* visible then that guarantees the entry to still be in
* the old tbl if it exists.
*/
- while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) &&
+ while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
+ obj_new, params)) &&
(tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
;
- if (err)
- goto out;
-
- atomic_dec(&ht->nelems);
- if (unlikely(ht->p.automatic_shrinking &&
- rht_shrink_below_30(ht, tbl)))
- schedule_work(&ht->run_work);
-
-out:
rcu_read_unlock();
return err;
}
+/**
+ * rhltable_walk_enter - Initialise an iterator
+ * @hlt: Table to walk over
+ * @iter: Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice. Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
+ *
+ * You must call rhashtable_walk_exit after this function returns.
+ */
+static inline void rhltable_walk_enter(struct rhltable *hlt,
+ struct rhashtable_iter *iter)
+{
+ return rhashtable_walk_enter(&hlt->ht, iter);
+}
+
+/**
+ * rhltable_free_and_destroy - free elements and destroy hash list table
+ * @hlt: the hash list table to destroy
+ * @free_fn: callback to release resources of element
+ * @arg: pointer passed to free_fn
+ *
+ * See documentation for rhashtable_free_and_destroy.
+ */
+static inline void rhltable_free_and_destroy(struct rhltable *hlt,
+ void (*free_fn)(void *ptr,
+ void *arg),
+ void *arg)
+{
+ return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
+}
+
+static inline void rhltable_destroy(struct rhltable *hlt)
+{
+ return rhltable_free_and_destroy(hlt, NULL, NULL);
+}
+
#endif /* _LINUX_RHASHTABLE_H */
diff --git a/include/linux/six.h b/include/linux/six.h
index 0e6df059..477c33eb 100644
--- a/include/linux/six.h
+++ b/include/linux/six.h
@@ -196,6 +196,7 @@ void six_lock_increment(struct six_lock *, enum six_lock_type);
void six_lock_wakeup_all(struct six_lock *);
+void six_lock_pcpu_free_rcu(struct six_lock *);
void six_lock_pcpu_free(struct six_lock *);
void six_lock_pcpu_alloc(struct six_lock *);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b8a1235b..775b7e3a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -66,6 +66,7 @@ static inline void *krealloc(void *old, size_t size, gfp_t flags)
#define kzfree(p) free(p)
#define kvmalloc(size, flags) kmalloc(size, flags)
+#define kvzalloc(size, flags) kzalloc(size, flags)
#define kvfree(p) kfree(p)
static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
diff --git a/include/linux/types.h b/include/linux/types.h
index 1e125550..c9886cba 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -11,6 +11,8 @@
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#include <asm/types.h>
+#include <linux/cache.h>
+
#define BITS_PER_LONG __BITS_PER_LONG
struct page;