summaryrefslogtreecommitdiff
path: root/linux
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-01-16 17:00:02 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-01-16 17:17:23 -0500
commitb5fd066153c40a70a29caa1ea7987723ab687763 (patch)
tree6d43a8b0a90d549a54c65565ac96c92b3e84b594 /linux
parent06ff8b55b70fda44d91b31b5511fafd1680a8934 (diff)
Move c_src dirs back to toplevel
We just wanted c sourcefiles out of the top level, not c source directories. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'linux')
-rw-r--r--linux/atomic64.c188
-rw-r--r--linux/bio.c395
-rw-r--r--linux/blkdev.c428
-rw-r--r--linux/closure.c211
-rw-r--r--linux/crc64.c56
-rw-r--r--linux/crc64table.h135
-rw-r--r--linux/crypto/api.c114
-rw-r--r--linux/crypto/chacha20_generic.c111
-rw-r--r--linux/crypto/poly1305_generic.c88
-rw-r--r--linux/crypto/sha256_generic.c81
-rw-r--r--linux/fs.c14
-rw-r--r--linux/generic-radix-tree.c307
-rw-r--r--linux/int_sqrt.c71
-rw-r--r--linux/kstrtox.c357
-rw-r--r--linux/kstrtox.h7
-rw-r--r--linux/kthread.c139
-rw-r--r--linux/llist.c92
-rw-r--r--linux/mempool.c541
-rw-r--r--linux/preempt.c37
-rw-r--r--linux/ratelimit.c69
-rw-r--r--linux/rhashtable.c1237
-rw-r--r--linux/sched.c133
-rw-r--r--linux/semaphore.c191
-rw-r--r--linux/seq_buf.c152
-rw-r--r--linux/shrinker.c140
-rw-r--r--linux/siphash.c552
-rw-r--r--linux/string.c112
-rw-r--r--linux/string_helpers.c130
-rw-r--r--linux/timer.c329
-rw-r--r--linux/wait.c250
-rw-r--r--linux/workqueue.c346
-rw-r--r--linux/xxhash.c500
-rw-r--r--linux/zstd_compress_module.c157
-rw-r--r--linux/zstd_decompress_module.c103
34 files changed, 7773 insertions, 0 deletions
diff --git a/linux/atomic64.c b/linux/atomic64.c
new file mode 100644
index 00000000..4654d092
--- /dev/null
+++ b/linux/atomic64.c
@@ -0,0 +1,188 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+#ifdef ATOMIC64_SPINLOCK
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable. Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS 16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+ raw_spinlock_t lock;
+ char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] ____cacheline_aligned_in_smp = {
+ [0 ... (NR_LOCKS - 1)] = {
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+ },
+};
+
+static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
+{
+ unsigned long addr = (unsigned long) v;
+
+ addr >>= L1_CACHE_SHIFT;
+ addr ^= (addr >> 8) ^ (addr >> 16);
+ return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+
+ raw_spin_lock_irqsave(lock, flags);
+ v->counter = i;
+ raw_spin_unlock_irqrestore(lock, flags);
+}
+
+#define ATOMIC64_OP(op, c_op) \
+void atomic64_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ v->counter c_op a; \
+ raw_spin_unlock_irqrestore(lock, flags); \
+}
+
+#define ATOMIC64_OP_RETURN(op, c_op) \
+long long atomic64_##op##_return(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ long long val; \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ val = (v->counter c_op a); \
+ raw_spin_unlock_irqrestore(lock, flags); \
+ return val; \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op) \
+long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ long long val; \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ val = v->counter; \
+ v->counter c_op a; \
+ raw_spin_unlock_irqrestore(lock, flags); \
+ return val; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +=)
+ATOMIC64_OPS(sub, -=)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter - 1;
+ if (val >= 0)
+ v->counter = val;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ if (val == o)
+ v->counter = n;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ v->counter = new;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ int ret = 0;
+
+ raw_spin_lock_irqsave(lock, flags);
+ if (v->counter != u) {
+ v->counter += a;
+ ret = 1;
+ }
+ raw_spin_unlock_irqrestore(lock, flags);
+ return ret;
+}
+
+#endif
diff --git a/linux/bio.c b/linux/bio.c
new file mode 100644
index 00000000..93a791c4
--- /dev/null
+++ b/linux/bio.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static const struct {
+ int err;
+ const char *name;
+} blk_errors[] = {
+ [BLK_STS_OK] = { 0, "" },
+ [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
+ [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
+ [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
+ [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
+ [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
+ [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
+ [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
+ [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
+ [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
+ [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+
+ /* device mapper special case, should not leak out: */
+ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
+
+ /* everything else not covered above: */
+ [BLK_STS_IOERR] = { -EIO, "I/O" },
+};
+
+int blk_status_to_errno(blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return -EIO;
+ return blk_errors[idx].err;
+}
+
+const char *blk_status_to_str(blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return "(invalid error)";
+ return blk_errors[idx].name;
+}
+
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter)
+{
+ struct bio_vec src_bv, dst_bv;
+ void *src_p, *dst_p;
+ unsigned bytes;
+
+ while (src_iter->bi_size && dst_iter->bi_size) {
+ src_bv = bio_iter_iovec(src, *src_iter);
+ dst_bv = bio_iter_iovec(dst, *dst_iter);
+
+ bytes = min(src_bv.bv_len, dst_bv.bv_len);
+
+ src_p = kmap_atomic(src_bv.bv_page);
+ dst_p = kmap_atomic(dst_bv.bv_page);
+
+ memcpy(dst_p + dst_bv.bv_offset,
+ src_p + src_bv.bv_offset,
+ bytes);
+
+ kunmap_atomic(dst_p);
+ kunmap_atomic(src_p);
+
+ flush_dcache_page(dst_bv.bv_page);
+
+ bio_advance_iter(src, src_iter, bytes);
+ bio_advance_iter(dst, dst_iter, bytes);
+ }
+}
+
+/**
+ * bio_copy_data - copy contents of data buffers from one bio to another
+ * @src: source bio
+ * @dst: destination bio
+ *
+ * Stops when it reaches the end of either @src or @dst - that is, copies
+ * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
+ */
+void bio_copy_data(struct bio *dst, struct bio *src)
+{
+ struct bvec_iter src_iter = src->bi_iter;
+ struct bvec_iter dst_iter = dst->bi_iter;
+
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+}
+
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
+{
+ unsigned long flags;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+
+ __bio_for_each_segment(bv, bio, iter, start) {
+ char *data = bvec_kmap_irq(&bv, &flags);
+ memset(data, 0, bv.bv_len);
+ bvec_kunmap_irq(data, &flags);
+ }
+}
+
+static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
+{
+ bio_set_flag(bio, BIO_CLONED);
+ bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_iter = bio_src->bi_iter;
+ return 0;
+}
+
+struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
+ gfp_t gfp, struct bio_set *bs)
+{
+ struct bio *bio;
+
+ bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs);
+ if (!bio)
+ return NULL;
+
+ if (__bio_clone(bio, bio_src, gfp) < 0) {
+ bio_put(bio);
+ return NULL;
+ }
+ bio->bi_io_vec = bio_src->bi_io_vec;
+
+ return bio;
+}
+
+struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
+{
+ struct bio *split = NULL;
+
+ BUG_ON(sectors <= 0);
+ BUG_ON(sectors >= bio_sectors(bio));
+
+ split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs);
+ if (!split)
+ return NULL;
+
+ split->bi_iter.bi_size = sectors << 9;
+
+ bio_advance(bio, split->bi_iter.bi_size);
+
+ return split;
+}
+
+void bio_free_pages(struct bio *bio)
+{
+ struct bvec_iter_all iter;
+ struct bio_vec *bvec;
+
+ bio_for_each_segment_all(bvec, bio, iter)
+ __free_page(bvec->bv_page);
+}
+
+void bio_advance(struct bio *bio, unsigned bytes)
+{
+ bio_advance_iter(bio, &bio->bi_iter, bytes);
+}
+
+static void bio_free(struct bio *bio)
+{
+ struct bio_set *bs = bio->bi_pool;
+
+ if (bs) {
+ if (bio->bi_max_vecs > BIO_INLINE_VECS)
+ mempool_free(bio->bi_io_vec, &bs->bvec_pool);
+
+ mempool_free((void *) bio - bs->front_pad, &bs->bio_pool);
+ } else {
+ kfree(bio);
+ }
+}
+
+void bio_put(struct bio *bio)
+{
+ if (!bio_flagged(bio, BIO_REFFED))
+ bio_free(bio);
+ else {
+ BUG_ON(!atomic_read(&bio->__bi_cnt));
+
+ /*
+ * last put frees it
+ */
+ if (atomic_dec_and_test(&bio->__bi_cnt))
+ bio_free(bio);
+ }
+}
+
+int bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs);
+
+ bv->bv_page = page;
+ bv->bv_offset = off;
+ bv->bv_len = len;
+
+ bio->bi_iter.bi_size += len;
+ bio->bi_vcnt++;
+ return len;
+}
+
+static inline bool bio_remaining_done(struct bio *bio)
+{
+ /*
+ * If we're not chaining, then ->__bi_remaining is always 1 and
+ * we always end io on the first invocation.
+ */
+ if (!bio_flagged(bio, BIO_CHAIN))
+ return true;
+
+ BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
+
+ if (atomic_dec_and_test(&bio->__bi_remaining)) {
+ bio_clear_flag(bio, BIO_CHAIN);
+ return true;
+ }
+
+ return false;
+}
+
+static struct bio *__bio_chain_endio(struct bio *bio)
+{
+ struct bio *parent = bio->bi_private;
+
+ if (!parent->bi_status)
+ parent->bi_status = bio->bi_status;
+ bio_put(bio);
+ return parent;
+}
+
+static void bio_chain_endio(struct bio *bio)
+{
+ bio_endio(__bio_chain_endio(bio));
+}
+
+void bio_endio(struct bio *bio)
+{
+again:
+ if (!bio_remaining_done(bio))
+ return;
+
+ /*
+ * Need to have a real endio function for chained bios, otherwise
+ * various corner cases will break (like stacking block devices that
+ * save/restore bi_end_io) - however, we want to avoid unbounded
+ * recursion and blowing the stack. Tail call optimization would
+ * handle this, but compiling with frame pointers also disables
+ * gcc's sibling call optimization.
+ */
+ if (bio->bi_end_io == bio_chain_endio) {
+ bio = __bio_chain_endio(bio);
+ goto again;
+ }
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio);
+}
+
+void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
+{
+ unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
+
+ memset(bio, 0, BIO_RESET_BYTES);
+ bio->bi_bdev = bdev;
+ bio->bi_opf = opf;
+ bio->bi_flags = flags;
+ atomic_set(&bio->__bi_remaining, 1);
+}
+
+struct bio *bio_kmalloc(unsigned int nr_iovecs, gfp_t gfp_mask)
+{
+ struct bio *bio;
+
+ bio = kmalloc(sizeof(struct bio) +
+ sizeof(struct bio_vec) * nr_iovecs, gfp_mask);
+ if (unlikely(!bio))
+ return NULL;
+ bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs, 0);
+ bio->bi_pool = NULL;
+ return bio;
+}
+
+static struct bio_vec *bvec_alloc(mempool_t *pool, int *nr_vecs,
+ gfp_t gfp_mask)
+{
+ *nr_vecs = roundup_pow_of_two(*nr_vecs);
+ /*
+ * Try a slab allocation first for all smaller allocations. If that
+ * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
+ * The mempool is sized to handle up to BIO_MAX_VECS entries.
+ */
+ if (*nr_vecs < BIO_MAX_VECS) {
+ struct bio_vec *bvl;
+
+ bvl = kmalloc(sizeof(*bvl) * *nr_vecs, gfp_mask);
+ if (likely(bvl))
+ return bvl;
+ *nr_vecs = BIO_MAX_VECS;
+ }
+
+ return mempool_alloc(pool, gfp_mask);
+}
+
+struct bio *bio_alloc_bioset(struct block_device *bdev,
+ unsigned nr_iovecs,
+ unsigned opf,
+ gfp_t gfp_mask,
+ struct bio_set *bs)
+{
+ struct bio *bio;
+ void *p;
+
+ if (nr_iovecs > BIO_MAX_VECS)
+ return NULL;
+
+ p = mempool_alloc(&bs->bio_pool, gfp_mask);
+ if (unlikely(!p))
+ return NULL;
+
+ bio = p + bs->front_pad;
+ if (nr_iovecs > BIO_INLINE_VECS) {
+ struct bio_vec *bvl = NULL;
+
+ bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
+ if (unlikely(!bvl))
+ goto err_free;
+
+ bio_init(bio, bdev, bvl, nr_iovecs, opf);
+ } else if (nr_iovecs) {
+ bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf);
+ } else {
+ bio_init(bio, bdev, NULL, 0, opf);
+ }
+
+ bio->bi_pool = bs;
+ return bio;
+
+err_free:
+ mempool_free(p, &bs->bio_pool);
+ return NULL;
+}
+
+void bioset_exit(struct bio_set *bs)
+{
+ mempool_exit(&bs->bio_pool);
+ mempool_exit(&bs->bvec_pool);
+}
+
+int bioset_init(struct bio_set *bs,
+ unsigned int pool_size,
+ unsigned int front_pad,
+ int flags)
+{
+ int ret;
+
+ bs->front_pad = front_pad;
+ if (flags & BIOSET_NEED_BVECS)
+ bs->back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
+ else
+ bs->back_pad = 0;
+
+ ret = mempool_init_kmalloc_pool(&bs->bio_pool, pool_size, bs->front_pad +
+ sizeof(struct bio) + bs->back_pad) ?:
+ mempool_init_kmalloc_pool(&bs->bvec_pool, pool_size,
+ sizeof(struct bio_vec) * BIO_MAX_VECS);
+ if (ret)
+ bioset_exit(bs);
+ return ret;
+}
diff --git a/linux/blkdev.c b/linux/blkdev.c
new file mode 100644
index 00000000..61f23362
--- /dev/null
+++ b/linux/blkdev.c
@@ -0,0 +1,428 @@
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <libaio.h>
+
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+#endif
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+
+#include "tools-util.h"
+
+struct fops {
+ void (*init)(void);
+ void (*cleanup)(void);
+ void (*read)(struct bio *bio, struct iovec * iov, unsigned i);
+ void (*write)(struct bio *bio, struct iovec * iov, unsigned i);
+};
+
+static struct fops *fops;
+static io_context_t aio_ctx;
+static atomic_t running_requests;
+
+void generic_make_request(struct bio *bio)
+{
+ struct iovec *iov;
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ ssize_t ret;
+ unsigned i;
+
+ if (bio->bi_opf & REQ_PREFLUSH) {
+ ret = fdatasync(bio->bi_bdev->bd_fd);
+ if (ret) {
+ fprintf(stderr, "fsync error: %m\n");
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return;
+ }
+ }
+
+ i = 0;
+ bio_for_each_segment(bv, bio, iter)
+ i++;
+
+ iov = alloca(sizeof(*iov) * i);
+
+ i = 0;
+ bio_for_each_segment(bv, bio, iter) {
+ void *start = page_address(bv.bv_page) + bv.bv_offset;
+ size_t len = bv.bv_len;
+
+ iov[i++] = (struct iovec) {
+ .iov_base = start,
+ .iov_len = len,
+ };
+
+#ifdef CONFIG_VALGRIND
+ /* To be pedantic it should only be on IO completion. */
+ if (bio_op(bio) == REQ_OP_READ)
+ VALGRIND_MAKE_MEM_DEFINED(start, len);
+#endif
+ }
+
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ fops->read(bio, iov, i);
+ break;
+ case REQ_OP_WRITE:
+ fops->write(bio, iov, i);
+ break;
+ case REQ_OP_FLUSH:
+ ret = fsync(bio->bi_bdev->bd_fd);
+ if (ret)
+ die("fsync error: %m");
+ bio_endio(bio);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void submit_bio_wait_endio(struct bio *bio)
+{
+ complete(bio->bi_private);
+}
+
+int submit_bio_wait(struct bio *bio)
+{
+ struct completion done;
+
+ init_completion(&done);
+ bio->bi_private = &done;
+ bio->bi_end_io = submit_bio_wait_endio;
+ bio->bi_opf |= REQ_SYNC;
+ submit_bio(bio);
+ wait_for_completion(&done);
+
+ return blk_status_to_errno(bio->bi_status);
+}
+
+int blkdev_issue_discard(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects,
+ gfp_t gfp_mask)
+{
+ return 0;
+}
+
+int blkdev_issue_zeroout(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects,
+ gfp_t gfp_mask, unsigned flags)
+{
+ /* Not yet implemented: */
+ BUG();
+}
+
+unsigned bdev_logical_block_size(struct block_device *bdev)
+{
+ struct stat statbuf;
+ unsigned blksize;
+ int ret;
+
+ ret = fstat(bdev->bd_fd, &statbuf);
+ BUG_ON(ret);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_blksize;
+
+ xioctl(bdev->bd_fd, BLKPBSZGET, &blksize);
+ return blksize;
+}
+
+sector_t get_capacity(struct gendisk *disk)
+{
+ struct block_device *bdev =
+ container_of(disk, struct block_device, __bd_disk);
+ struct stat statbuf;
+ u64 bytes;
+ int ret;
+
+ ret = fstat(bdev->bd_fd, &statbuf);
+ BUG_ON(ret);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_size >> 9;
+
+ ret = ioctl(bdev->bd_fd, BLKGETSIZE64, &bytes);
+ BUG_ON(ret);
+
+ return bytes >> 9;
+}
+
+void blkdev_put(struct block_device *bdev, void *holder)
+{
+ fdatasync(bdev->bd_fd);
+ close(bdev->bd_fd);
+ free(bdev);
+}
+
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hop)
+{
+ struct block_device *bdev;
+ int fd, flags = 0;
+
+ if ((mode & (BLK_OPEN_READ|BLK_OPEN_WRITE)) == (BLK_OPEN_READ|BLK_OPEN_WRITE))
+ flags = O_RDWR;
+ else if (mode & BLK_OPEN_READ)
+ flags = O_RDONLY;
+ else if (mode & BLK_OPEN_WRITE)
+ flags = O_WRONLY;
+
+ if (!(mode & BLK_OPEN_BUFFERED))
+ flags |= O_DIRECT;
+
+ if (mode & BLK_OPEN_EXCL)
+ flags |= O_EXCL;
+
+ fd = open(path, flags);
+ if (fd < 0)
+ return ERR_PTR(-errno);
+
+ bdev = malloc(sizeof(*bdev));
+ memset(bdev, 0, sizeof(*bdev));
+
+ strncpy(bdev->name, path, sizeof(bdev->name));
+ bdev->name[sizeof(bdev->name) - 1] = '\0';
+
+ bdev->bd_dev = xfstat(fd).st_rdev;
+ bdev->bd_fd = fd;
+ bdev->bd_holder = holder;
+ bdev->bd_disk = &bdev->__bd_disk;
+ bdev->bd_disk->bdi = &bdev->bd_disk->__bdi;
+ bdev->queue.backing_dev_info = bdev->bd_disk->bdi;
+
+ return bdev;
+}
+
+void bdput(struct block_device *bdev)
+{
+ BUG();
+}
+
+int lookup_bdev(const char *path, dev_t *dev)
+{
+ return -EINVAL;
+}
+
+static void io_fallback(void)
+{
+ fops++;
+ if (fops->init == NULL)
+ die("no fallback possible, something is very wrong");
+ fops->init();
+}
+
+static void sync_check(struct bio *bio, int ret)
+{
+ if (ret != bio->bi_iter.bi_size) {
+ die("IO error: %s\n", strerror(-ret));
+ }
+
+ if (bio->bi_opf & REQ_FUA) {
+ ret = fdatasync(bio->bi_bdev->bd_fd);
+ if (ret)
+ die("fsync error: %s\n", strerror(-ret));
+ }
+ bio_endio(bio);
+}
+
+static void sync_init(void) {}
+
+static void sync_cleanup(void)
+{
+ /* not necessary? */
+ sync();
+}
+
+static void sync_read(struct bio *bio, struct iovec * iov, unsigned i)
+{
+
+ ssize_t ret = preadv(bio->bi_bdev->bd_fd, iov, i,
+ bio->bi_iter.bi_sector << 9);
+ sync_check(bio, ret);
+}
+
+static void sync_write(struct bio *bio, struct iovec * iov, unsigned i)
+{
+ ssize_t ret = pwritev2(bio->bi_bdev->bd_fd, iov, i,
+ bio->bi_iter.bi_sector << 9,
+ bio->bi_opf & REQ_FUA ? RWF_SYNC : 0);
+ sync_check(bio, ret);
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(aio_events_completed);
+
+static int aio_completion_thread(void *arg)
+{
+ struct io_event events[8], *ev;
+ int ret;
+ bool stop = false;
+
+ while (!stop) {
+ ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
+ events, NULL);
+
+ if (ret < 0 && ret == -EINTR)
+ continue;
+ if (ret < 0)
+ die("io_getevents() error: %s", strerror(-ret));
+ if (ret)
+ wake_up(&aio_events_completed);
+
+ for (ev = events; ev < events + ret; ev++) {
+ struct bio *bio = (struct bio *) ev->data;
+
+ /* This should only happen during blkdev_cleanup() */
+ if (!bio) {
+ BUG_ON(atomic_read(&running_requests) != 0);
+ stop = true;
+ continue;
+ }
+
+ if (ev->res != bio->bi_iter.bi_size)
+ bio->bi_status = BLK_STS_IOERR;
+
+ bio_endio(bio);
+ atomic_dec(&running_requests);
+ }
+ }
+
+ return 0;
+}
+
+static struct task_struct *aio_task = NULL;
+
+static void aio_init(void)
+{
+ struct task_struct *p;
+ long err = io_setup(256, &aio_ctx);
+ if (!err) {
+ p = kthread_run(aio_completion_thread, NULL, "aio_completion");
+ BUG_ON(IS_ERR(p));
+
+ aio_task = p;
+
+ } else if (err == -ENOSYS) {
+ io_fallback();
+ } else {
+ die("io_setup() error: %s", strerror(err));
+ }
+}
+
+static void aio_cleanup(void)
+{
+ struct task_struct *p = NULL;
+ swap(aio_task, p);
+ get_task_struct(p);
+
+ /* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */
+ int fds[2];
+ int ret = pipe(fds);
+ if (ret != 0)
+ die("pipe err: %s", strerror(ret));
+
+ /* Wake up the completion thread with spurious work. */
+ int junk = 0;
+ struct iocb iocb = {
+ .aio_lio_opcode = IO_CMD_PWRITE,
+ .data = NULL, /* Signal to stop */
+ .aio_fildes = fds[1],
+ .u.c.buf = &junk,
+ .u.c.nbytes = 1,
+ }, *iocbp = &iocb;
+ ret = io_submit(aio_ctx, 1, &iocbp);
+ if (ret != 1)
+ die("io_submit cleanup err: %s", strerror(-ret));
+
+ ret = kthread_stop(p);
+ BUG_ON(ret);
+
+ put_task_struct(p);
+
+ close(fds[0]);
+ close(fds[1]);
+}
+
+static void aio_op(struct bio *bio, struct iovec *iov, unsigned i, int opcode)
+{
+ ssize_t ret;
+ struct iocb iocb = {
+ .data = bio,
+ .aio_fildes = bio->bi_bdev->bd_fd,
+ .aio_rw_flags = bio->bi_opf & REQ_FUA ? RWF_SYNC : 0,
+ .aio_lio_opcode = opcode,
+ .u.c.buf = iov,
+ .u.c.nbytes = i,
+ .u.c.offset = bio->bi_iter.bi_sector << 9,
+
+ }, *iocbp = &iocb;
+
+ atomic_inc(&running_requests);
+
+ wait_event(aio_events_completed,
+ (ret = io_submit(aio_ctx, 1, &iocbp)) != -EAGAIN);;
+
+ if (ret != 1)
+ die("io_submit err: %s", strerror(-ret));
+}
+
+static void aio_read(struct bio *bio, struct iovec *iov, unsigned i)
+{
+ aio_op(bio, iov, i, IO_CMD_PREADV);
+}
+
+static void aio_write(struct bio *bio, struct iovec * iov, unsigned i)
+{
+ aio_op(bio, iov, i, IO_CMD_PWRITEV);
+}
+
+
+/* not implemented */
+static void uring_init(void) {
+ io_fallback();
+}
+
+struct fops fops_list[] = {
+ {
+ .init = uring_init,
+ }, {
+ .init = aio_init,
+ .cleanup = aio_cleanup,
+ .read = aio_read,
+ .write = aio_write,
+ }, {
+ .init = sync_init,
+ .cleanup = sync_cleanup,
+ .read = sync_read,
+ .write = sync_write,
+ }, {
+ /* NULL */
+ }
+};
+
+__attribute__((constructor(102)))
+static void blkdev_init(void)
+{
+ fops = fops_list;
+ fops->init();
+}
+
+__attribute__((destructor(102)))
+static void blkdev_cleanup(void)
+{
+ fops->cleanup();
+}
diff --git a/linux/closure.c b/linux/closure.c
new file mode 100644
index 00000000..c1654055
--- /dev/null
+++ b/linux/closure.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Asynchronous refcounty things
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include <linux/closure.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/sched/debug.h>
+
+static inline void closure_put_after_sub(struct closure *cl, int flags)
+{
+ int r = flags & CLOSURE_REMAINING_MASK;
+
+ BUG_ON(flags & CLOSURE_GUARD_MASK);
+ BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
+
+ if (!r) {
+ smp_acquire__after_ctrl_dep();
+
+ cl->closure_get_happened = false;
+
+ if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
+ atomic_set(&cl->remaining,
+ CLOSURE_REMAINING_INITIALIZER);
+ closure_queue(cl);
+ } else {
+ struct closure *parent = cl->parent;
+ closure_fn *destructor = cl->fn;
+
+ closure_debug_destroy(cl);
+
+ if (destructor)
+ destructor(&cl->work);
+
+ if (parent)
+ closure_put(parent);
+ }
+ }
+}
+
+/* For clearing flags with the same atomic op as a put */
+void closure_sub(struct closure *cl, int v)
+{
+ closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
+}
+EXPORT_SYMBOL(closure_sub);
+
+/*
+ * closure_put - decrement a closure's refcount
+ */
+void closure_put(struct closure *cl)
+{
+ closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
+}
+EXPORT_SYMBOL(closure_put);
+
+/*
+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
+ */
+void __closure_wake_up(struct closure_waitlist *wait_list)
+{
+ struct llist_node *list;
+ struct closure *cl, *t;
+ struct llist_node *reverse = NULL;
+
+ list = llist_del_all(&wait_list->list);
+
+ /* We first reverse the list to preserve FIFO ordering and fairness */
+ reverse = llist_reverse_order(list);
+
+ /* Then do the wakeups */
+ llist_for_each_entry_safe(cl, t, reverse, list) {
+ closure_set_waiting(cl, 0);
+ closure_sub(cl, CLOSURE_WAITING + 1);
+ }
+}
+EXPORT_SYMBOL(__closure_wake_up);
+
+/**
+ * closure_wait - add a closure to a waitlist
+ * @waitlist: will own a ref on @cl, which will be released when
+ * closure_wake_up() is called on @waitlist.
+ * @cl: closure pointer.
+ *
+ */
+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
+{
+ if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
+ return false;
+
+ cl->closure_get_happened = true;
+ closure_set_waiting(cl, _RET_IP_);
+ atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
+ llist_add(&cl->list, &waitlist->list);
+
+ return true;
+}
+EXPORT_SYMBOL(closure_wait);
+
+struct closure_syncer {
+ struct task_struct *task;
+ int done;
+};
+
+static CLOSURE_CALLBACK(closure_sync_fn)
+{
+ struct closure *cl = container_of(ws, struct closure, work);
+ struct closure_syncer *s = cl->s;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ p = READ_ONCE(s->task);
+ s->done = 1;
+ wake_up_process(p);
+ rcu_read_unlock();
+}
+
+void __sched __closure_sync(struct closure *cl)
+{
+ struct closure_syncer s = { .task = current };
+
+ cl->s = &s;
+ continue_at(cl, closure_sync_fn, NULL);
+
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (s.done)
+ break;
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL(__closure_sync);
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+static LIST_HEAD(closure_list);
+static DEFINE_SPINLOCK(closure_list_lock);
+
+void closure_debug_create(struct closure *cl)
+{
+ unsigned long flags;
+
+ BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
+ cl->magic = CLOSURE_MAGIC_ALIVE;
+
+ spin_lock_irqsave(&closure_list_lock, flags);
+ list_add(&cl->all, &closure_list);
+ spin_unlock_irqrestore(&closure_list_lock, flags);
+}
+EXPORT_SYMBOL(closure_debug_create);
+
+void closure_debug_destroy(struct closure *cl)
+{
+ unsigned long flags;
+
+ BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
+ cl->magic = CLOSURE_MAGIC_DEAD;
+
+ spin_lock_irqsave(&closure_list_lock, flags);
+ list_del(&cl->all);
+ spin_unlock_irqrestore(&closure_list_lock, flags);
+}
+EXPORT_SYMBOL(closure_debug_destroy);
+
+static int debug_show(struct seq_file *f, void *data)
+{
+ struct closure *cl;
+
+ spin_lock_irq(&closure_list_lock);
+
+ list_for_each_entry(cl, &closure_list, all) {
+ int r = atomic_read(&cl->remaining);
+
+ seq_printf(f, "%p: %pS -> %pS p %p r %i ",
+ cl, (void *) cl->ip, cl->fn, cl->parent,
+ r & CLOSURE_REMAINING_MASK);
+
+ seq_printf(f, "%s%s\n",
+ test_bit(WORK_STRUCT_PENDING_BIT,
+ work_data_bits(&cl->work)) ? "Q" : "",
+ r & CLOSURE_RUNNING ? "R" : "");
+
+ if (r & CLOSURE_WAITING)
+ seq_printf(f, " W %pS\n",
+ (void *) cl->waiting_on);
+
+ seq_puts(f, "\n");
+ }
+
+ spin_unlock_irq(&closure_list_lock);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(debug);
+
+static int __init closure_debug_init(void)
+{
+ debugfs_create_file("closures", 0400, NULL, NULL, &debug_fops);
+ return 0;
+}
+late_initcall(closure_debug_init)
+
+#endif
diff --git a/linux/crc64.c b/linux/crc64.c
new file mode 100644
index 00000000..0ef8ae6a
--- /dev/null
+++ b/linux/crc64.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ * Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+ or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+ size_t i, t;
+
+ const unsigned char *_p = p;
+
+ for (i = 0; i < len; i++) {
+ t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+ crc = crc64table[t] ^ (crc << 8);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/linux/crc64table.h b/linux/crc64table.h
new file mode 100644
index 00000000..9964164d
--- /dev/null
+++ b/linux/crc64table.h
@@ -0,0 +1,135 @@
+/* this file is generated - do not edit */
+
+#include <linux/types.h>
+#include <linux/cache.h>
+
+static const u64 ____cacheline_aligned crc64table[256] = {
+ 0x0000000000000000ULL, 0x42f0e1eba9ea3693ULL,
+ 0x85e1c3d753d46d26ULL, 0xc711223cfa3e5bb5ULL,
+ 0x493366450e42ecdfULL, 0x0bc387aea7a8da4cULL,
+ 0xccd2a5925d9681f9ULL, 0x8e224479f47cb76aULL,
+ 0x9266cc8a1c85d9beULL, 0xd0962d61b56fef2dULL,
+ 0x17870f5d4f51b498ULL, 0x5577eeb6e6bb820bULL,
+ 0xdb55aacf12c73561ULL, 0x99a54b24bb2d03f2ULL,
+ 0x5eb4691841135847ULL, 0x1c4488f3e8f96ed4ULL,
+ 0x663d78ff90e185efULL, 0x24cd9914390bb37cULL,
+ 0xe3dcbb28c335e8c9ULL, 0xa12c5ac36adfde5aULL,
+ 0x2f0e1eba9ea36930ULL, 0x6dfeff5137495fa3ULL,
+ 0xaaefdd6dcd770416ULL, 0xe81f3c86649d3285ULL,
+ 0xf45bb4758c645c51ULL, 0xb6ab559e258e6ac2ULL,
+ 0x71ba77a2dfb03177ULL, 0x334a9649765a07e4ULL,
+ 0xbd68d2308226b08eULL, 0xff9833db2bcc861dULL,
+ 0x388911e7d1f2dda8ULL, 0x7a79f00c7818eb3bULL,
+ 0xcc7af1ff21c30bdeULL, 0x8e8a101488293d4dULL,
+ 0x499b3228721766f8ULL, 0x0b6bd3c3dbfd506bULL,
+ 0x854997ba2f81e701ULL, 0xc7b97651866bd192ULL,
+ 0x00a8546d7c558a27ULL, 0x4258b586d5bfbcb4ULL,
+ 0x5e1c3d753d46d260ULL, 0x1cecdc9e94ace4f3ULL,
+ 0xdbfdfea26e92bf46ULL, 0x990d1f49c77889d5ULL,
+ 0x172f5b3033043ebfULL, 0x55dfbadb9aee082cULL,
+ 0x92ce98e760d05399ULL, 0xd03e790cc93a650aULL,
+ 0xaa478900b1228e31ULL, 0xe8b768eb18c8b8a2ULL,
+ 0x2fa64ad7e2f6e317ULL, 0x6d56ab3c4b1cd584ULL,
+ 0xe374ef45bf6062eeULL, 0xa1840eae168a547dULL,
+ 0x66952c92ecb40fc8ULL, 0x2465cd79455e395bULL,
+ 0x3821458aada7578fULL, 0x7ad1a461044d611cULL,
+ 0xbdc0865dfe733aa9ULL, 0xff3067b657990c3aULL,
+ 0x711223cfa3e5bb50ULL, 0x33e2c2240a0f8dc3ULL,
+ 0xf4f3e018f031d676ULL, 0xb60301f359dbe0e5ULL,
+ 0xda050215ea6c212fULL, 0x98f5e3fe438617bcULL,
+ 0x5fe4c1c2b9b84c09ULL, 0x1d14202910527a9aULL,
+ 0x93366450e42ecdf0ULL, 0xd1c685bb4dc4fb63ULL,
+ 0x16d7a787b7faa0d6ULL, 0x5427466c1e109645ULL,
+ 0x4863ce9ff6e9f891ULL, 0x0a932f745f03ce02ULL,
+ 0xcd820d48a53d95b7ULL, 0x8f72eca30cd7a324ULL,
+ 0x0150a8daf8ab144eULL, 0x43a04931514122ddULL,
+ 0x84b16b0dab7f7968ULL, 0xc6418ae602954ffbULL,
+ 0xbc387aea7a8da4c0ULL, 0xfec89b01d3679253ULL,
+ 0x39d9b93d2959c9e6ULL, 0x7b2958d680b3ff75ULL,
+ 0xf50b1caf74cf481fULL, 0xb7fbfd44dd257e8cULL,
+ 0x70eadf78271b2539ULL, 0x321a3e938ef113aaULL,
+ 0x2e5eb66066087d7eULL, 0x6cae578bcfe24bedULL,
+ 0xabbf75b735dc1058ULL, 0xe94f945c9c3626cbULL,
+ 0x676dd025684a91a1ULL, 0x259d31cec1a0a732ULL,
+ 0xe28c13f23b9efc87ULL, 0xa07cf2199274ca14ULL,
+ 0x167ff3eacbaf2af1ULL, 0x548f120162451c62ULL,
+ 0x939e303d987b47d7ULL, 0xd16ed1d631917144ULL,
+ 0x5f4c95afc5edc62eULL, 0x1dbc74446c07f0bdULL,
+ 0xdaad56789639ab08ULL, 0x985db7933fd39d9bULL,
+ 0x84193f60d72af34fULL, 0xc6e9de8b7ec0c5dcULL,
+ 0x01f8fcb784fe9e69ULL, 0x43081d5c2d14a8faULL,
+ 0xcd2a5925d9681f90ULL, 0x8fdab8ce70822903ULL,
+ 0x48cb9af28abc72b6ULL, 0x0a3b7b1923564425ULL,
+ 0x70428b155b4eaf1eULL, 0x32b26afef2a4998dULL,
+ 0xf5a348c2089ac238ULL, 0xb753a929a170f4abULL,
+ 0x3971ed50550c43c1ULL, 0x7b810cbbfce67552ULL,
+ 0xbc902e8706d82ee7ULL, 0xfe60cf6caf321874ULL,
+ 0xe224479f47cb76a0ULL, 0xa0d4a674ee214033ULL,
+ 0x67c58448141f1b86ULL, 0x253565a3bdf52d15ULL,
+ 0xab1721da49899a7fULL, 0xe9e7c031e063acecULL,
+ 0x2ef6e20d1a5df759ULL, 0x6c0603e6b3b7c1caULL,
+ 0xf6fae5c07d3274cdULL, 0xb40a042bd4d8425eULL,
+ 0x731b26172ee619ebULL, 0x31ebc7fc870c2f78ULL,
+ 0xbfc9838573709812ULL, 0xfd39626eda9aae81ULL,
+ 0x3a28405220a4f534ULL, 0x78d8a1b9894ec3a7ULL,
+ 0x649c294a61b7ad73ULL, 0x266cc8a1c85d9be0ULL,
+ 0xe17dea9d3263c055ULL, 0xa38d0b769b89f6c6ULL,
+ 0x2daf4f0f6ff541acULL, 0x6f5faee4c61f773fULL,
+ 0xa84e8cd83c212c8aULL, 0xeabe6d3395cb1a19ULL,
+ 0x90c79d3fedd3f122ULL, 0xd2377cd44439c7b1ULL,
+ 0x15265ee8be079c04ULL, 0x57d6bf0317edaa97ULL,
+ 0xd9f4fb7ae3911dfdULL, 0x9b041a914a7b2b6eULL,
+ 0x5c1538adb04570dbULL, 0x1ee5d94619af4648ULL,
+ 0x02a151b5f156289cULL, 0x4051b05e58bc1e0fULL,
+ 0x87409262a28245baULL, 0xc5b073890b687329ULL,
+ 0x4b9237f0ff14c443ULL, 0x0962d61b56fef2d0ULL,
+ 0xce73f427acc0a965ULL, 0x8c8315cc052a9ff6ULL,
+ 0x3a80143f5cf17f13ULL, 0x7870f5d4f51b4980ULL,
+ 0xbf61d7e80f251235ULL, 0xfd913603a6cf24a6ULL,
+ 0x73b3727a52b393ccULL, 0x31439391fb59a55fULL,
+ 0xf652b1ad0167feeaULL, 0xb4a25046a88dc879ULL,
+ 0xa8e6d8b54074a6adULL, 0xea16395ee99e903eULL,
+ 0x2d071b6213a0cb8bULL, 0x6ff7fa89ba4afd18ULL,
+ 0xe1d5bef04e364a72ULL, 0xa3255f1be7dc7ce1ULL,
+ 0x64347d271de22754ULL, 0x26c49cccb40811c7ULL,
+ 0x5cbd6cc0cc10fafcULL, 0x1e4d8d2b65facc6fULL,
+ 0xd95caf179fc497daULL, 0x9bac4efc362ea149ULL,
+ 0x158e0a85c2521623ULL, 0x577eeb6e6bb820b0ULL,
+ 0x906fc95291867b05ULL, 0xd29f28b9386c4d96ULL,
+ 0xcedba04ad0952342ULL, 0x8c2b41a1797f15d1ULL,
+ 0x4b3a639d83414e64ULL, 0x09ca82762aab78f7ULL,
+ 0x87e8c60fded7cf9dULL, 0xc51827e4773df90eULL,
+ 0x020905d88d03a2bbULL, 0x40f9e43324e99428ULL,
+ 0x2cffe7d5975e55e2ULL, 0x6e0f063e3eb46371ULL,
+ 0xa91e2402c48a38c4ULL, 0xebeec5e96d600e57ULL,
+ 0x65cc8190991cb93dULL, 0x273c607b30f68faeULL,
+ 0xe02d4247cac8d41bULL, 0xa2dda3ac6322e288ULL,
+ 0xbe992b5f8bdb8c5cULL, 0xfc69cab42231bacfULL,
+ 0x3b78e888d80fe17aULL, 0x7988096371e5d7e9ULL,
+ 0xf7aa4d1a85996083ULL, 0xb55aacf12c735610ULL,
+ 0x724b8ecdd64d0da5ULL, 0x30bb6f267fa73b36ULL,
+ 0x4ac29f2a07bfd00dULL, 0x08327ec1ae55e69eULL,
+ 0xcf235cfd546bbd2bULL, 0x8dd3bd16fd818bb8ULL,
+ 0x03f1f96f09fd3cd2ULL, 0x41011884a0170a41ULL,
+ 0x86103ab85a2951f4ULL, 0xc4e0db53f3c36767ULL,
+ 0xd8a453a01b3a09b3ULL, 0x9a54b24bb2d03f20ULL,
+ 0x5d45907748ee6495ULL, 0x1fb5719ce1045206ULL,
+ 0x919735e51578e56cULL, 0xd367d40ebc92d3ffULL,
+ 0x1476f63246ac884aULL, 0x568617d9ef46bed9ULL,
+ 0xe085162ab69d5e3cULL, 0xa275f7c11f7768afULL,
+ 0x6564d5fde549331aULL, 0x279434164ca30589ULL,
+ 0xa9b6706fb8dfb2e3ULL, 0xeb46918411358470ULL,
+ 0x2c57b3b8eb0bdfc5ULL, 0x6ea7525342e1e956ULL,
+ 0x72e3daa0aa188782ULL, 0x30133b4b03f2b111ULL,
+ 0xf7021977f9cceaa4ULL, 0xb5f2f89c5026dc37ULL,
+ 0x3bd0bce5a45a6b5dULL, 0x79205d0e0db05dceULL,
+ 0xbe317f32f78e067bULL, 0xfcc19ed95e6430e8ULL,
+ 0x86b86ed5267cdbd3ULL, 0xc4488f3e8f96ed40ULL,
+ 0x0359ad0275a8b6f5ULL, 0x41a94ce9dc428066ULL,
+ 0xcf8b0890283e370cULL, 0x8d7be97b81d4019fULL,
+ 0x4a6acb477bea5a2aULL, 0x089a2aacd2006cb9ULL,
+ 0x14dea25f3af9026dULL, 0x562e43b4931334feULL,
+ 0x913f6188692d6f4bULL, 0xd3cf8063c0c759d8ULL,
+ 0x5dedc41a34bbeeb2ULL, 0x1f1d25f19d51d821ULL,
+ 0xd80c07cd676f8394ULL, 0x9afce626ce85b507ULL,
+};
diff --git a/linux/crypto/api.c b/linux/crypto/api.c
new file mode 100644
index 00000000..2f3ab2b5
--- /dev/null
+++ b/linux/crypto/api.c
@@ -0,0 +1,114 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <crypto/algapi.h>
+
+static LIST_HEAD(crypto_alg_list);
+static DECLARE_RWSEM(crypto_alg_sem);
+
+struct crypto_type {
+};
+
+int crypto_register_alg(struct crypto_alg *alg)
+{
+ down_write(&crypto_alg_sem);
+ list_add(&alg->cra_list, &crypto_alg_list);
+ up_write(&crypto_alg_sem);
+
+ return 0;
+}
+
+static void *crypto_alloc_tfm(const char *name,
+ const struct crypto_type *type)
+{
+ struct crypto_alg *alg;
+
+ down_read(&crypto_alg_sem);
+ list_for_each_entry(alg, &crypto_alg_list, cra_list)
+ if (alg->cra_type == type && !strcmp(alg->cra_name, name))
+ goto found;
+
+ alg = ERR_PTR(-ENOENT);
+found:
+ up_read(&crypto_alg_sem);
+
+ if (IS_ERR(alg))
+ return ERR_CAST(alg);
+
+ return alg->alloc_tfm() ?: ERR_PTR(-ENOMEM);
+}
+
+/* skcipher: */
+
+static const struct crypto_type crypto_skcipher_type2 = {
+};
+
+struct crypto_skcipher *crypto_alloc_skcipher(const char *name,
+ u32 type, u32 mask)
+{
+ return crypto_alloc_tfm(name, &crypto_skcipher_type2);
+}
+
+int crypto_register_skcipher(struct skcipher_alg *alg)
+{
+ alg->base.cra_type = &crypto_skcipher_type2;
+
+ return crypto_register_alg(&alg->base);
+}
+
+/* shash: */
+
+#include <crypto/hash.h>
+
+static int shash_finup(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_update(desc, data, len) ?:
+ crypto_shash_final(desc, out);
+}
+
+static int shash_digest(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_init(desc) ?:
+ crypto_shash_finup(desc, data, len, out);
+}
+
+static const struct crypto_type crypto_shash_type = {
+};
+
+struct crypto_shash *crypto_alloc_shash(const char *name,
+ u32 type, u32 mask)
+{
+ return crypto_alloc_tfm(name, &crypto_shash_type);
+}
+
+int crypto_register_shash(struct shash_alg *alg)
+{
+ alg->base.cra_type = &crypto_shash_type;
+
+ if (!alg->finup)
+ alg->finup = shash_finup;
+ if (!alg->digest)
+ alg->digest = shash_digest;
+
+ return crypto_register_alg(&alg->base);
+}
diff --git a/linux/crypto/chacha20_generic.c b/linux/crypto/chacha20_generic.c
new file mode 100644
index 00000000..914189e7
--- /dev/null
+++ b/linux/crypto/chacha20_generic.c
@@ -0,0 +1,111 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/skcipher.h>
+
+#include <sodium/crypto_stream_chacha20.h>
+
+static struct skcipher_alg alg;
+
+struct chacha20_tfm {
+ struct crypto_skcipher tfm;
+ u32 key[8];
+};
+
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ struct chacha20_tfm *ctx =
+ container_of(tfm, struct chacha20_tfm, tfm);
+ int i;
+
+ if (keysize != CHACHA_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ return 0;
+}
+
+static int crypto_chacha20_crypt(struct skcipher_request *req)
+{
+ struct chacha20_tfm *ctx =
+ container_of(req->tfm, struct chacha20_tfm, tfm.base);
+ struct scatterlist *sg = req->src;
+ unsigned nbytes = req->cryptlen;
+ u32 iv[4];
+ int ret;
+
+ BUG_ON(req->src != req->dst);
+
+ memcpy(iv, req->iv, sizeof(iv));
+
+ while (1) {
+ ret = crypto_stream_chacha20_xor_ic(sg_virt(sg),
+ sg_virt(sg),
+ sg->length,
+ (void *) &iv[2],
+ iv[0] | ((u64) iv[1] << 32),
+ (void *) ctx->key);
+ BUG_ON(ret);
+
+ nbytes -= sg->length;
+
+ if (sg_is_last(sg))
+ break;
+
+ BUG_ON(sg->length % CHACHA_BLOCK_SIZE);
+ iv[0] += sg->length / CHACHA_BLOCK_SIZE;
+ sg = sg_next(sg);
+ };
+
+ BUG_ON(nbytes);
+
+ return 0;
+}
+
+static void *crypto_chacha20_alloc_tfm(void)
+{
+ struct chacha20_tfm *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+ if (!tfm)
+ return NULL;
+
+ tfm->tfm.base.alg = &alg.base;
+ tfm->tfm.setkey = crypto_chacha20_setkey;
+ tfm->tfm.encrypt = crypto_chacha20_crypt;
+ tfm->tfm.decrypt = crypto_chacha20_crypt;
+ tfm->tfm.ivsize = CHACHA_IV_SIZE;
+ tfm->tfm.keysize = CHACHA_KEY_SIZE;
+
+ return tfm;
+}
+
+static struct skcipher_alg alg = {
+ .base.cra_name = "chacha20",
+ .base.alloc_tfm = crypto_chacha20_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int chacha20_generic_mod_init(void)
+{
+ return crypto_register_skcipher(&alg);
+}
diff --git a/linux/crypto/poly1305_generic.c b/linux/crypto/poly1305_generic.c
new file mode 100644
index 00000000..acb554c0
--- /dev/null
+++ b/linux/crypto/poly1305_generic.c
@@ -0,0 +1,88 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+
+static struct shash_alg poly1305_alg;
+
+struct poly1305_desc_ctx {
+ bool key_done;
+ crypto_onetimeauth_poly1305_state s;
+};
+
+static int poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+ state->key_done = false;
+ return 0;
+}
+
+static int poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned len)
+{
+ struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+ if (!state->key_done) {
+ BUG_ON(len != crypto_onetimeauth_poly1305_KEYBYTES);
+
+ state->key_done = true;
+ return crypto_onetimeauth_poly1305_init(&state->s, src);
+ }
+
+ return crypto_onetimeauth_poly1305_update(&state->s, src, len);
+}
+
+static int poly1305_final(struct shash_desc *desc, u8 *out)
+{
+ struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+ return crypto_onetimeauth_poly1305_final(&state->s, out);
+}
+
+static void *poly1305_alloc_tfm(void)
+{
+ struct crypto_shash *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+ if (!tfm)
+ return NULL;
+
+ tfm->base.alg = &poly1305_alg.base;
+ tfm->descsize = sizeof(struct poly1305_desc_ctx);
+ return tfm;
+}
+
+static struct shash_alg poly1305_alg = {
+ .digestsize = crypto_onetimeauth_poly1305_BYTES,
+ .init = poly1305_init,
+ .update = poly1305_update,
+ .final = poly1305_final,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.alloc_tfm = poly1305_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int poly1305_mod_init(void)
+{
+ return crypto_register_shash(&poly1305_alg);
+}
diff --git a/linux/crypto/sha256_generic.c b/linux/crypto/sha256_generic.c
new file mode 100644
index 00000000..9326bfe7
--- /dev/null
+++ b/linux/crypto/sha256_generic.c
@@ -0,0 +1,81 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/byteorder.h>
+#include <linux/types.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+
+#include <sodium/crypto_hash_sha256.h>
+
+static struct shash_alg sha256_alg;
+
+static int sha256_init(struct shash_desc *desc)
+{
+ crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+ return crypto_hash_sha256_init(state);
+}
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+ return crypto_hash_sha256_update(state, data, len);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+ return crypto_hash_sha256_final(state, out);
+}
+
+static void *sha256_alloc_tfm(void)
+{
+ struct crypto_shash *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+ if (!tfm)
+ return NULL;
+
+ tfm->base.alg = &sha256_alg.base;
+ tfm->descsize = sizeof(crypto_hash_sha256_state);
+ return tfm;
+}
+
+static struct shash_alg sha256_alg = {
+ .digestsize = crypto_hash_sha256_BYTES,
+ .init = sha256_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .descsize = sizeof(crypto_hash_sha256_state),
+ .base.cra_name = "sha256",
+ .base.alloc_tfm = sha256_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int __init sha256_generic_mod_init(void)
+{
+ return crypto_register_shash(&sha256_alg);
+}
diff --git a/linux/fs.c b/linux/fs.c
new file mode 100644
index 00000000..623ca266
--- /dev/null
+++ b/linux/fs.c
@@ -0,0 +1,14 @@
+#include <linux/fs.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+const struct xattr_handler nop_posix_acl_access = {
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+};
+
+const struct xattr_handler nop_posix_acl_default = {
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+};
diff --git a/linux/generic-radix-tree.c b/linux/generic-radix-tree.c
new file mode 100644
index 00000000..41f1bcdc
--- /dev/null
+++ b/linux/generic-radix-tree.c
@@ -0,0 +1,307 @@
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+#include <linux/kmemleak.h>
+
+#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+ union {
+ /* Interior node: */
+ struct genradix_node *children[GENRADIX_ARY];
+
+ /* Leaf: */
+ u8 data[PAGE_SIZE];
+ };
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+ return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+ return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH \
+ DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK \
+ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+ return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+ return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+ struct genradix_root *r = READ_ONCE(radix->root);
+ struct genradix_node *n = genradix_root_to_node(r);
+ unsigned level = genradix_root_to_depth(r);
+
+ if (ilog2(offset) >= genradix_depth_shift(level))
+ return NULL;
+
+ while (1) {
+ if (!n)
+ return NULL;
+ if (!level)
+ break;
+
+ level--;
+
+ n = n->children[offset >> genradix_depth_shift(level)];
+ offset &= genradix_depth_size(level) - 1;
+ }
+
+ return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+ struct genradix_node *node;
+
+ node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+ /*
+ * We're using pages (not slab allocations) directly for kernel data
+ * structures, so we need to explicitly inform kmemleak of them in order
+ * to avoid false positive memory leak reports.
+ */
+ kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+ return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+ kmemleak_free(node);
+ free_page((unsigned long)node);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+ gfp_t gfp_mask)
+{
+ struct genradix_root *v = READ_ONCE(radix->root);
+ struct genradix_node *n, *new_node = NULL;
+ unsigned level;
+
+ /* Increase tree depth if necessary: */
+ while (1) {
+ struct genradix_root *r = v, *new_root;
+
+ n = genradix_root_to_node(r);
+ level = genradix_root_to_depth(r);
+
+ if (n && ilog2(offset) < genradix_depth_shift(level))
+ break;
+
+ if (!new_node) {
+ new_node = genradix_alloc_node(gfp_mask);
+ if (!new_node)
+ return NULL;
+ }
+
+ new_node->children[0] = n;
+ new_root = ((struct genradix_root *)
+ ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+ if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+ v = new_root;
+ new_node = NULL;
+ }
+ }
+
+ while (level--) {
+ struct genradix_node **p =
+ &n->children[offset >> genradix_depth_shift(level)];
+ offset &= genradix_depth_size(level) - 1;
+
+ n = READ_ONCE(*p);
+ if (!n) {
+ if (!new_node) {
+ new_node = genradix_alloc_node(gfp_mask);
+ if (!new_node)
+ return NULL;
+ }
+
+ if (!(n = cmpxchg_release(p, NULL, new_node)))
+ swap(n, new_node);
+ }
+ }
+
+ if (new_node)
+ genradix_free_node(new_node);
+
+ return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+ struct __genradix *radix,
+ size_t objs_per_page)
+{
+ struct genradix_root *r;
+ struct genradix_node *n;
+ unsigned level, i;
+
+ if (iter->offset == SIZE_MAX)
+ return NULL;
+
+restart:
+ r = READ_ONCE(radix->root);
+ if (!r)
+ return NULL;
+
+ n = genradix_root_to_node(r);
+ level = genradix_root_to_depth(r);
+
+ if (ilog2(iter->offset) >= genradix_depth_shift(level))
+ return NULL;
+
+ while (level) {
+ level--;
+
+ i = (iter->offset >> genradix_depth_shift(level)) &
+ (GENRADIX_ARY - 1);
+
+ while (!n->children[i]) {
+ size_t objs_per_ptr = genradix_depth_size(level);
+
+ if (iter->offset + objs_per_ptr < iter->offset) {
+ iter->offset = SIZE_MAX;
+ iter->pos = SIZE_MAX;
+ return NULL;
+ }
+
+ i++;
+ iter->offset = round_down(iter->offset + objs_per_ptr,
+ objs_per_ptr);
+ iter->pos = (iter->offset >> PAGE_SHIFT) *
+ objs_per_page;
+ if (i == GENRADIX_ARY)
+ goto restart;
+ }
+
+ n = n->children[i];
+ }
+
+ return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+void *__genradix_iter_peek_prev(struct genradix_iter *iter,
+ struct __genradix *radix,
+ size_t objs_per_page,
+ size_t obj_size_plus_page_remainder)
+{
+ struct genradix_root *r;
+ struct genradix_node *n;
+ unsigned level, i;
+
+ if (iter->offset == SIZE_MAX)
+ return NULL;
+
+restart:
+ r = READ_ONCE(radix->root);
+ if (!r)
+ return NULL;
+
+ n = genradix_root_to_node(r);
+ level = genradix_root_to_depth(r);
+
+ if (ilog2(iter->offset) >= genradix_depth_shift(level)) {
+ iter->offset = genradix_depth_size(level);
+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+
+ iter->offset -= obj_size_plus_page_remainder;
+ iter->pos--;
+ }
+
+ while (level) {
+ level--;
+
+ i = (iter->offset >> genradix_depth_shift(level)) &
+ (GENRADIX_ARY - 1);
+
+ while (!n->children[i]) {
+ size_t objs_per_ptr = genradix_depth_size(level);
+
+ iter->offset = round_down(iter->offset, objs_per_ptr);
+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+
+ if (!iter->offset)
+ return NULL;
+
+ iter->offset -= obj_size_plus_page_remainder;
+ iter->pos--;
+
+ if (!i)
+ goto restart;
+ --i;
+ }
+
+ n = n->children[i];
+ }
+
+ return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek_prev);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+ if (level) {
+ unsigned i;
+
+ for (i = 0; i < GENRADIX_ARY; i++)
+ if (n->children[i])
+ genradix_free_recurse(n->children[i], level - 1);
+ }
+
+ genradix_free_node(n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+ gfp_t gfp_mask)
+{
+ size_t offset;
+
+ for (offset = 0; offset < size; offset += PAGE_SIZE)
+ if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+ struct genradix_root *r = xchg(&radix->root, NULL);
+
+ genradix_free_recurse(genradix_root_to_node(r),
+ genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);
diff --git a/linux/int_sqrt.c b/linux/int_sqrt.c
new file mode 100644
index 00000000..a8170bb9
--- /dev/null
+++ b/linux/int_sqrt.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
+ *
+ * Based on the shift-and-subtract algorithm for computing integer
+ * square root from Guy L. Steele.
+ */
+
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+
+/**
+ * int_sqrt - computes the integer square root
+ * @x: integer of which to calculate the sqrt
+ *
+ * Computes: floor(sqrt(x))
+ */
+unsigned long int_sqrt(unsigned long x)
+{
+ unsigned long b, m, y = 0;
+
+ if (x <= 1)
+ return x;
+
+ m = 1UL << (__fls(x) & ~1UL);
+ while (m != 0) {
+ b = y + m;
+ y >>= 1;
+
+ if (x >= b) {
+ x -= b;
+ y += m;
+ }
+ m >>= 2;
+ }
+
+ return y;
+}
+EXPORT_SYMBOL(int_sqrt);
+
+#if BITS_PER_LONG < 64
+/**
+ * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
+ * is expected.
+ * @x: 64bit integer of which to calculate the sqrt
+ */
+u32 int_sqrt64(u64 x)
+{
+ u64 b, m, y = 0;
+
+ if (x <= ULONG_MAX)
+ return int_sqrt((unsigned long) x);
+
+ m = 1ULL << ((fls64(x) - 1) & ~1ULL);
+ while (m != 0) {
+ b = y + m;
+ y >>= 1;
+
+ if (x >= b) {
+ x -= b;
+ y += m;
+ }
+ m >>= 2;
+ }
+
+ return y;
+}
+EXPORT_SYMBOL(int_sqrt64);
+#endif
diff --git a/linux/kstrtox.c b/linux/kstrtox.c
new file mode 100644
index 00000000..bde55808
--- /dev/null
+++ b/linux/kstrtox.c
@@ -0,0 +1,357 @@
+/*
+ * Convert integer string representation to an integer.
+ * If an integer doesn't fit into specified type, -E is returned.
+ *
+ * Integer starts with optional sign.
+ * kstrtou*() functions do not accept sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hints work after optional sign, but not before.
+ *
+ * If -E is returned, result is not touched.
+ */
+#include <errno.h>
+#include <ctype.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include "kstrtox.h"
+
+#define KSTRTOX_OVERFLOW (1U << 31)
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+ if (*base == 0) {
+ if (s[0] == '0') {
+ if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+ *base = 16;
+ else
+ *base = 8;
+ } else
+ *base = 10;
+ }
+ if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+ s += 2;
+ return s;
+}
+
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+ unsigned long long res;
+ unsigned int rv;
+ int overflow;
+
+ res = 0;
+ rv = 0;
+ overflow = 0;
+ while (*s) {
+ unsigned int val;
+
+ if ('0' <= *s && *s <= '9')
+ val = *s - '0';
+ else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+ val = _tolower(*s) - 'a' + 10;
+ else
+ break;
+
+ if (val >= base)
+ break;
+ /*
+ * Check for overflow only if we are within range of
+ * it in the max base we support (16)
+ */
+ if (unlikely(res & (~0ull << 60))) {
+ if (res > ULLONG_MAX - val / base)
+ overflow = 1;
+ }
+ res = res * base + val;
+ rv++;
+ s++;
+ }
+ *p = res;
+ if (overflow)
+ rv |= KSTRTOX_OVERFLOW;
+ return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ unsigned long long _res;
+ unsigned int rv;
+
+ s = _parse_integer_fixup_radix(s, &base);
+ rv = _parse_integer(s, base, &_res);
+ if (rv & KSTRTOX_OVERFLOW)
+ return -ERANGE;
+ if (rv == 0)
+ return -EINVAL;
+ s += rv;
+ if (*s == '\n')
+ s++;
+ if (*s)
+ return -EINVAL;
+ *res = _res;
+ return 0;
+}
+
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (s[0] == '+')
+ s++;
+ return _kstrtoull(s, base, res);
+}
+
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoll(const char *s, unsigned int base, long long *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ if (s[0] == '-') {
+ rv = _kstrtoull(s + 1, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if ((long long)-tmp > 0)
+ return -ERANGE;
+ *res = -tmp;
+ } else {
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if ((long long)tmp < 0)
+ return -ERANGE;
+ *res = tmp;
+ }
+ return 0;
+}
+
+/* Internal, do not use. */
+int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(unsigned long)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/* Internal, do not use. */
+int _kstrtol(const char *s, unsigned int base, long *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(long)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtouint(const char *s, unsigned int base, unsigned int *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(unsigned int)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoint(const char *s, unsigned int base, int *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(int)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtou16(const char *s, unsigned int base, u16 *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(u16)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtos16(const char *s, unsigned int base, s16 *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(s16)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtou8(const char *s, unsigned int base, u8 *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(u8)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtos8(const char *s, unsigned int base, s8 *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(s8)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
+ * pointed to by res is updated upon finding a match.
+ */
+int kstrtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
diff --git a/linux/kstrtox.h b/linux/kstrtox.h
new file mode 100644
index 00000000..910b6de8
--- /dev/null
+++ b/linux/kstrtox.h
@@ -0,0 +1,7 @@
+#ifndef _LIB_KSTRTOX_H
+#define _LIB_KSTRTOX_H
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
+
+#endif
diff --git a/linux/kthread.c b/linux/kthread.c
new file mode 100644
index 00000000..17830e5f
--- /dev/null
+++ b/linux/kthread.c
@@ -0,0 +1,139 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/bitops.h>
+#include <linux/kthread.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "tools-util.h"
+
+enum KTHREAD_BITS {
+ KTHREAD_IS_PER_CPU = 0,
+ KTHREAD_SHOULD_STOP,
+ KTHREAD_SHOULD_PARK,
+ KTHREAD_IS_PARKED,
+};
+
+static void *kthread_start_fn(void *data)
+{
+ rcu_register_thread();
+
+ current = data;
+ schedule();
+ current->thread_fn(current->thread_data);
+
+ complete(&current->exited);
+ put_task_struct(current);
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/**
+ * kthread_create_on_node - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @node: task and thread structures for the thread are allocated on this node
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread. The thread will be stopped: use wake_up_process() to start
+ * it. See also kthread_run(). The new thread has SCHED_NORMAL policy and
+ * is affine to all CPUs.
+ *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn() can either call do_exit() directly if it is a
+ * standalone thread for which no one will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called). The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
+ */
+struct task_struct *kthread_create(int (*thread_fn)(void *data),
+ void *thread_data,
+ const char namefmt[], ...)
+{
+ va_list args;
+ struct task_struct *p = malloc(sizeof(*p));
+ int ret;
+
+ memset(p, 0, sizeof(*p));
+
+ va_start(args, namefmt);
+ vsnprintf(p->comm, sizeof(p->comm), namefmt, args);
+ va_end(args);
+
+ p->flags |= PF_KTHREAD;
+ p->thread_fn = thread_fn;
+ p->thread_data = thread_data;
+ p->state = TASK_UNINTERRUPTIBLE;
+ p->signal = &p->_signal;
+ atomic_set(&p->usage, 1);
+ init_completion(&p->exited);
+ init_rwsem(&p->_signal.exec_update_lock);
+
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 32 << 10);
+
+ for (unsigned i = 0; i < 10; i++) {
+ ret = pthread_create(&p->thread, &attr, kthread_start_fn, p);
+ if (!ret)
+ break;
+
+ run_shrinkers(GFP_KERNEL, true);
+ }
+ if (ret)
+ return ERR_PTR(-ret);
+ pthread_setname_np(p->thread, p->comm);
+ return p;
+}
+
+/**
+ * kthread_should_stop - should this kthread return now?
+ *
+ * When someone calls kthread_stop() on your kthread, it will be woken
+ * and this will return true. You should then return, and your return
+ * value will be passed through to kthread_stop().
+ */
+bool kthread_should_stop(void)
+{
+ return test_bit(KTHREAD_SHOULD_STOP, &current->kthread_flags);
+}
+
+bool kthread_freezable_should_stop(bool *was_frozen)
+{
+ return test_bit(KTHREAD_SHOULD_STOP, &current->kthread_flags);
+}
+
+/**
+ * kthread_stop - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ *
+ * Sets kthread_should_stop() for @k to return true, wakes it, and
+ * waits for it to exit. This can also be called after kthread_create()
+ * instead of calling wake_up_process(): the thread will exit without
+ * calling threadfn().
+ *
+ * If threadfn() may call do_exit() itself, the caller must ensure
+ * task_struct can't go away.
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
+int kthread_stop(struct task_struct *p)
+{
+ get_task_struct(p);
+
+ set_bit(KTHREAD_SHOULD_STOP, &p->kthread_flags);
+ wake_up_process(p);
+ wait_for_completion(&p->exited);
+
+ put_task_struct(p);
+
+ return 0;
+}
diff --git a/linux/llist.c b/linux/llist.c
new file mode 100644
index 00000000..611ce488
--- /dev/null
+++ b/linux/llist.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * The basic atomic operation of this list is cmpxchg on long. On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handlers. So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/llist.h>
+
+
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first: first entry in batch to be added
+ * @new_last: last entry in batch to be added
+ * @head: the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
+ */
+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+ struct llist_head *head)
+{
+ struct llist_node *first;
+
+ do {
+ new_last->next = first = READ_ONCE(head->first);
+ } while (cmpxchg(&head->first, first, new_first) != first);
+
+ return !first;
+}
+EXPORT_SYMBOL_GPL(llist_add_batch);
+
+/**
+ * llist_del_first - delete the first entry of lock-less list
+ * @head: the head for your lock-less list
+ *
+ * If list is empty, return NULL, otherwise, return the first entry
+ * deleted, this is the newest added one.
+ *
+ * Only one llist_del_first user can be used simultaneously with
+ * multiple llist_add users without lock. Because otherwise
+ * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
+ * llist_add) sequence in another user may change @head->first->next,
+ * but keep @head->first. If multiple consumers are needed, please
+ * use llist_del_all or use lock between consumers.
+ */
+struct llist_node *llist_del_first(struct llist_head *head)
+{
+ struct llist_node *entry, *old_entry, *next;
+
+ entry = smp_load_acquire(&head->first);
+ for (;;) {
+ if (entry == NULL)
+ return NULL;
+ old_entry = entry;
+ next = READ_ONCE(entry->next);
+ entry = cmpxchg(&head->first, old_entry, next);
+ if (entry == old_entry)
+ break;
+ }
+
+ return entry;
+}
+EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_reverse_order - reverse order of a llist chain
+ * @head: first item of the list to be reversed
+ *
+ * Reverse the order of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+ struct llist_node *new_head = NULL;
+
+ while (head) {
+ struct llist_node *tmp = head;
+ head = head->next;
+ tmp->next = new_head;
+ new_head = tmp;
+ }
+
+ return new_head;
+}
+EXPORT_SYMBOL_GPL(llist_reverse_order);
diff --git a/linux/mempool.c b/linux/mempool.c
new file mode 100644
index 00000000..74d4fbb3
--- /dev/null
+++ b/linux/mempool.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/mm/mempool.c
+ *
+ * memory buffer pool support. Such pools are mostly used
+ * for guaranteed, deadlock-free memory allocations during
+ * extreme VM load.
+ *
+ * started by Ingo Molnar, Copyright (C) 2001
+ * debugging by David Rientjes, Copyright (C) 2015
+ */
+
+#include <linux/slab.h>
+//#include <linux/kasan.h>
+//#include <linux/kmemleak.h>
+#include <linux/export.h>
+#include <linux/jiffies.h>
+#include <linux/mempool.h>
+#include <linux/mempool.h>
+#include <linux/sched.h>
+
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
+static void poison_error(mempool_t *pool, void *element, size_t size,
+ size_t byte)
+{
+ const int nr = pool->curr_nr;
+ const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
+ const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
+ int i;
+
+ pr_err("BUG: mempool element poison mismatch\n");
+ pr_err("Mempool %p size %zu\n", pool, size);
+ pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
+ for (i = start; i < end; i++)
+ pr_cont("%x ", *(u8 *)(element + i));
+ pr_cont("%s\n", end < size ? "..." : "");
+ dump_stack();
+}
+
+static void __check_element(mempool_t *pool, void *element, size_t size)
+{
+ u8 *obj = element;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+ if (obj[i] != exp) {
+ poison_error(pool, element, size, i);
+ return;
+ }
+ }
+ memset(obj, POISON_INUSE, size);
+}
+
+static void check_element(mempool_t *pool, void *element)
+{
+ /* Mempools backed by slab allocator */
+ if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
+ __check_element(pool, element, ksize(element));
+ } else if (pool->free == mempool_free_pages) {
+ /* Mempools backed by page allocator */
+ int order = (int)(long)pool->pool_data;
+ void *addr = kmap_atomic((struct page *)element);
+
+ __check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
+ kunmap_atomic(addr);
+ }
+}
+
+static void __poison_element(void *element, size_t size)
+{
+ u8 *obj = element;
+
+ memset(obj, POISON_FREE, size - 1);
+ obj[size - 1] = POISON_END;
+}
+
+static void poison_element(mempool_t *pool, void *element)
+{
+ /* Mempools backed by slab allocator */
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) {
+ __poison_element(element, ksize(element));
+ } else if (pool->alloc == mempool_alloc_pages) {
+ /* Mempools backed by page allocator */
+ int order = (int)(long)pool->pool_data;
+ void *addr = kmap_atomic((struct page *)element);
+
+ __poison_element(addr, 1UL << (PAGE_SHIFT + order));
+ kunmap_atomic(addr);
+ }
+}
+#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+static inline void check_element(mempool_t *pool, void *element)
+{
+}
+static inline void poison_element(mempool_t *pool, void *element)
+{
+}
+#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+
+static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
+{
+#if 0
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_poison_kfree(element, _RET_IP_);
+ else if (pool->alloc == mempool_alloc_pages)
+ kasan_free_pages(element, (unsigned long)pool->pool_data);
+#endif
+}
+
+static void kasan_unpoison_element(mempool_t *pool, void *element)
+{
+#if 0
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_unpoison_slab(element);
+ else if (pool->alloc == mempool_alloc_pages)
+ kasan_alloc_pages(element, (unsigned long)pool->pool_data);
+#endif
+}
+
+static __always_inline void add_element(mempool_t *pool, void *element)
+{
+ BUG_ON(pool->curr_nr >= pool->min_nr);
+ poison_element(pool, element);
+ kasan_poison_element(pool, element);
+ pool->elements[pool->curr_nr++] = element;
+}
+
+static void *remove_element(mempool_t *pool)
+{
+ void *element = pool->elements[--pool->curr_nr];
+
+ BUG_ON(pool->curr_nr < 0);
+ kasan_unpoison_element(pool, element);
+ check_element(pool, element);
+ return element;
+}
+
+/**
+ * mempool_exit - exit a mempool initialized with mempool_init()
+ * @pool: pointer to the memory pool which was initialized with
+ * mempool_init().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ *
+ * May be called on a zeroed but uninitialized mempool (i.e. allocated with
+ * kzalloc()).
+ */
+void mempool_exit(mempool_t *pool)
+{
+ while (pool->curr_nr) {
+ void *element = remove_element(pool);
+ pool->free(element, pool->pool_data);
+ }
+ kfree(pool->elements);
+ pool->elements = NULL;
+}
+EXPORT_SYMBOL(mempool_exit);
+
+/**
+ * mempool_destroy - deallocate a memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ */
+void mempool_destroy(mempool_t *pool)
+{
+ if (unlikely(!pool))
+ return;
+
+ mempool_exit(pool);
+ kfree(pool);
+}
+EXPORT_SYMBOL(mempool_destroy);
+
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
+{
+ spin_lock_init(&pool->lock);
+ pool->min_nr = min_nr;
+ pool->pool_data = pool_data;
+ pool->alloc = alloc_fn;
+ pool->free = free_fn;
+ init_waitqueue_head(&pool->wait);
+
+ pool->elements = kmalloc_array(min_nr, sizeof(void *), gfp_mask);
+ if (!pool->elements)
+ return -ENOMEM;
+
+ /*
+ * First pre-allocate the guaranteed number of buffers.
+ */
+ while (pool->curr_nr < pool->min_nr) {
+ void *element;
+
+ element = pool->alloc(gfp_mask, pool->pool_data);
+ if (unlikely(!element)) {
+ mempool_exit(pool);
+ return -ENOMEM;
+ }
+ add_element(pool, element);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mempool_init_node);
+
+/**
+ * mempool_init - initialize a memory pool
+ * @pool: pointer to the memory pool that should be initialized
+ * @min_nr: the minimum number of elements guaranteed to be
+ * allocated for this pool.
+ * @alloc_fn: user-defined element-allocation function.
+ * @free_fn: user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * Like mempool_create(), but initializes the pool in (i.e. embedded in another
+ * structure).
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
+{
+ return mempool_init_node(pool, min_nr, alloc_fn, free_fn,
+ pool_data, GFP_KERNEL, 0);
+
+}
+EXPORT_SYMBOL(mempool_init);
+
+/**
+ * mempool_create - create a memory pool
+ * @min_nr: the minimum number of elements guaranteed to be
+ * allocated for this pool.
+ * @alloc_fn: user-defined element-allocation function.
+ * @free_fn: user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * this function creates and allocates a guaranteed size, preallocated
+ * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
+ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
+ * functions might sleep - as long as the mempool_alloc() function is not called
+ * from IRQ contexts.
+ *
+ * Return: pointer to the created memory pool object or %NULL on error.
+ */
+mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
+{
+ return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
+ GFP_KERNEL, 0);
+}
+EXPORT_SYMBOL(mempool_create);
+
+mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
+{
+ mempool_t *pool;
+
+ pool = kzalloc(sizeof(*pool), gfp_mask);
+ if (!pool)
+ return NULL;
+
+ if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data,
+ gfp_mask, node_id)) {
+ kfree(pool);
+ return NULL;
+ }
+
+ return pool;
+}
+EXPORT_SYMBOL(mempool_create_node);
+
+/**
+ * mempool_resize - resize an existing memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ * @new_min_nr: the new minimum number of elements guaranteed to be
+ * allocated for this pool.
+ *
+ * This function shrinks/grows the pool. In the case of growing,
+ * it cannot be guaranteed that the pool will be grown to the new
+ * size immediately, but new mempool_free() calls will refill it.
+ * This function may sleep.
+ *
+ * Note, the caller must guarantee that no mempool_destroy is called
+ * while this function is running. mempool_alloc() & mempool_free()
+ * might be called (eg. from IRQ contexts) while this function executes.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int mempool_resize(mempool_t *pool, int new_min_nr)
+{
+ void *element;
+ void **new_elements;
+ unsigned long flags;
+
+ BUG_ON(new_min_nr <= 0);
+ might_sleep();
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (new_min_nr <= pool->min_nr) {
+ while (new_min_nr < pool->curr_nr) {
+ element = remove_element(pool);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ pool->free(element, pool->pool_data);
+ spin_lock_irqsave(&pool->lock, flags);
+ }
+ pool->min_nr = new_min_nr;
+ goto out_unlock;
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ /* Grow the pool */
+ new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements),
+ GFP_KERNEL);
+ if (!new_elements)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (unlikely(new_min_nr <= pool->min_nr)) {
+ /* Raced, other resize will do our work */
+ spin_unlock_irqrestore(&pool->lock, flags);
+ kfree(new_elements);
+ goto out;
+ }
+ memcpy(new_elements, pool->elements,
+ pool->curr_nr * sizeof(*new_elements));
+ kfree(pool->elements);
+ pool->elements = new_elements;
+ pool->min_nr = new_min_nr;
+
+ while (pool->curr_nr < pool->min_nr) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ element = pool->alloc(GFP_KERNEL, pool->pool_data);
+ if (!element)
+ goto out;
+ spin_lock_irqsave(&pool->lock, flags);
+ if (pool->curr_nr < pool->min_nr) {
+ add_element(pool, element);
+ } else {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ pool->free(element, pool->pool_data); /* Raced */
+ goto out;
+ }
+ }
+out_unlock:
+ spin_unlock_irqrestore(&pool->lock, flags);
+out:
+ return 0;
+}
+EXPORT_SYMBOL(mempool_resize);
+
+/**
+ * mempool_alloc - allocate an element from a specific memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ * @gfp_mask: the usual allocation bitmask.
+ *
+ * this function only sleeps if the alloc_fn() function sleeps or
+ * returns NULL. Note that due to preallocation, this function
+ * *never* fails when called from process contexts. (it might
+ * fail if called from an IRQ context.)
+ * Note: using __GFP_ZERO is not supported.
+ *
+ * Return: pointer to the allocated element or %NULL on error.
+ */
+void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+{
+ void *element;
+ unsigned long flags;
+ DEFINE_WAIT(wait);
+ gfp_t gfp_temp;
+
+ WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
+
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_mask |= __GFP_NOWARN; /* failures are OK */
+
+ gfp_temp = gfp_mask & ~(__GFP_IO);
+
+repeat_alloc:
+
+ element = pool->alloc(gfp_temp, pool->pool_data);
+ if (likely(element != NULL))
+ return element;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (likely(pool->curr_nr)) {
+ element = remove_element(pool);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ /* paired with rmb in mempool_free(), read comment there */
+ smp_wmb();
+ return element;
+ }
+
+ /*
+ * We use gfp mask w/o direct reclaim or IO for the first round. If
+ * alloc failed with that and @pool was empty, retry immediately.
+ */
+ if (gfp_temp != gfp_mask) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ gfp_temp = gfp_mask;
+ goto repeat_alloc;
+ }
+
+ /* Let's wait for someone else to return an element to @pool */
+ prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ /*
+ * FIXME: this should be io_schedule(). The timeout is there as a
+ * workaround for some DM problems in 2.6.18.
+ */
+ io_schedule_timeout(5*HZ);
+
+ finish_wait(&pool->wait, &wait);
+ goto repeat_alloc;
+}
+EXPORT_SYMBOL(mempool_alloc);
+
+/**
+ * mempool_free - return an element to the pool.
+ * @element: pool element pointer.
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps.
+ */
+void mempool_free(void *element, mempool_t *pool)
+{
+ unsigned long flags;
+
+ if (unlikely(element == NULL))
+ return;
+
+ /*
+ * Paired with the wmb in mempool_alloc(). The preceding read is
+ * for @element and the following @pool->curr_nr. This ensures
+ * that the visible value of @pool->curr_nr is from after the
+ * allocation of @element. This is necessary for fringe cases
+ * where @element was passed to this task without going through
+ * barriers.
+ *
+ * For example, assume @p is %NULL at the beginning and one task
+ * performs "p = mempool_alloc(...);" while another task is doing
+ * "while (!p) cpu_relax(); mempool_free(p, ...);". This function
+ * may end up using curr_nr value which is from before allocation
+ * of @p without the following rmb.
+ */
+ smp_rmb();
+
+ /*
+ * For correctness, we need a test which is guaranteed to trigger
+ * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr
+ * without locking achieves that and refilling as soon as possible
+ * is desirable.
+ *
+ * Because curr_nr visible here is always a value after the
+ * allocation of @element, any task which decremented curr_nr below
+ * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
+ * incremented to min_nr afterwards. If curr_nr gets incremented
+ * to min_nr after the allocation of @element, the elements
+ * allocated after that are subject to the same guarantee.
+ *
+ * Waiters happen iff curr_nr is 0 and the above guarantee also
+ * ensures that there will be frees which return elements to the
+ * pool waking up the waiters.
+ */
+ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) {
+ spin_lock_irqsave(&pool->lock, flags);
+ if (likely(pool->curr_nr < pool->min_nr)) {
+ add_element(pool, element);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ wake_up(&pool->wait);
+ return;
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+ }
+ pool->free(element, pool->pool_data);
+}
+EXPORT_SYMBOL(mempool_free);
+
+/*
+ * A commonly used alloc and free fn.
+ */
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
+{
+ struct kmem_cache *mem = pool_data;
+ return kmem_cache_alloc(mem, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_alloc_slab);
+
+void mempool_free_slab(void *element, void *pool_data)
+{
+ struct kmem_cache *mem = pool_data;
+ kmem_cache_free(mem, element);
+}
+EXPORT_SYMBOL(mempool_free_slab);
+
+/*
+ * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory
+ * specified by pool_data
+ */
+void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
+{
+ size_t size = (size_t)pool_data;
+ return kmalloc(size, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_kmalloc);
+
+void mempool_kfree(void *element, void *pool_data)
+{
+ kfree(element);
+}
+EXPORT_SYMBOL(mempool_kfree);
+
+/*
+ * A simple mempool-backed page allocator that allocates pages
+ * of the order specified by pool_data.
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
+{
+ int order = (int)(long)pool_data;
+ return alloc_pages(gfp_mask, order);
+}
+EXPORT_SYMBOL(mempool_alloc_pages);
+
+void mempool_free_pages(void *element, void *pool_data)
+{
+ int order = (int)(long)pool_data;
+ __free_pages(element, order);
+}
+EXPORT_SYMBOL(mempool_free_pages);
diff --git a/linux/preempt.c b/linux/preempt.c
new file mode 100644
index 00000000..72eceed3
--- /dev/null
+++ b/linux/preempt.c
@@ -0,0 +1,37 @@
+#include <pthread.h>
+
+#include "linux/preempt.h"
+
+/*
+ * In userspace, pthreads are preemptible and can migrate CPUs at any time.
+ *
+ * In the kernel, preempt_disable() logic essentially guarantees that a marked
+ * critical section owns its CPU for the relevant block. This is necessary for
+ * various code paths, critically including the percpu system as it allows for
+ * non-atomic reads and writes to CPU-local data structures.
+ *
+ * The high performance userspace equivalent would be to use thread local
+ * storage to replace percpu data, but that would be complicated. It should be
+ * correct to instead guarantee mutual exclusion for the critical sections.
+ */
+
+static pthread_mutex_t preempt_lock;
+
+__attribute__((constructor))
+static void preempt_init(void) {
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+ pthread_mutex_init(&preempt_lock, &attr);
+ pthread_mutexattr_destroy(&attr);
+}
+
+void preempt_disable(void)
+{
+ pthread_mutex_lock(&preempt_lock);
+}
+
+void preempt_enable(void)
+{
+ pthread_mutex_unlock(&preempt_lock);
+}
diff --git a/linux/ratelimit.c b/linux/ratelimit.c
new file mode 100644
index 00000000..21a6d6c8
--- /dev/null
+++ b/linux/ratelimit.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ratelimit.c - Do something with rate limit.
+ *
+ * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
+ *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ */
+
+#include <linux/ratelimit.h>
+#include <linux/jiffies.h>
+#include <linux/export.h>
+
+/*
+ * __ratelimit - rate limiting
+ * @rs: ratelimit_state data
+ * @func: name of calling function
+ *
+ * This enforces a rate limit: not more than @rs->burst callbacks
+ * in every @rs->interval
+ *
+ * RETURNS:
+ * 0 means callbacks will be suppressed.
+ * 1 means go ahead and do it.
+ */
+int ___ratelimit(struct ratelimit_state *rs, const char *func)
+{
+ int ret;
+
+ if (!rs->interval)
+ return 1;
+
+ /*
+ * If we contend on this state's lock then almost
+ * by definition we are too busy to print a message,
+ * in addition to the one that will be printed by
+ * the entity that is holding the lock already:
+ */
+ if (!raw_spin_trylock(&rs->lock))
+ return 0;
+
+ if (!rs->begin)
+ rs->begin = jiffies;
+
+ if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (rs->missed) {
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ printk(KERN_WARNING
+ "%s: %d callbacks suppressed\n",
+ func, rs->missed);
+ rs->missed = 0;
+ }
+ }
+ rs->begin = jiffies;
+ rs->printed = 0;
+ }
+ if (rs->burst && rs->burst > rs->printed) {
+ rs->printed++;
+ ret = 1;
+ } else {
+ rs->missed++;
+ ret = 0;
+ }
+ raw_spin_unlock(&rs->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(___ratelimit);
diff --git a/linux/rhashtable.c b/linux/rhashtable.c
new file mode 100644
index 00000000..ba2196fc
--- /dev/null
+++ b/linux/rhashtable.c
@@ -0,0 +1,1237 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/jhash.h>
+#include <linux/overflow.h>
+#include <linux/random.h>
+#include <linux/rhashtable.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#define HASH_DEFAULT_SIZE 64UL
+#define HASH_MIN_SIZE 4U
+
+union nested_table {
+ union nested_table __rcu *table;
+ struct rhash_lock_head __rcu *bucket;
+};
+
+static u32 head_hashfn(struct rhashtable *ht,
+ const struct bucket_table *tbl,
+ const struct rhash_head *he)
+{
+ return rht_head_hashfn(ht, tbl, he, ht->p);
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+
+int lockdep_rht_mutex_is_held(struct rhashtable *ht)
+{
+ return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
+{
+ if (!debug_locks)
+ return 1;
+ if (unlikely(tbl->nest))
+ return 1;
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
+#else
+#define ASSERT_RHT_MUTEX(HT)
+#endif
+
+static inline union nested_table *nested_table_top(
+ const struct bucket_table *tbl)
+{
+ /* The top-level bucket entry does not need RCU protection
+ * because it's set at the same time as tbl->nest.
+ */
+ return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
+}
+
+static void nested_table_free(union nested_table *ntbl, unsigned int size)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ const unsigned int len = 1 << shift;
+ unsigned int i;
+
+ ntbl = rcu_dereference_protected(ntbl->table, 1);
+ if (!ntbl)
+ return;
+
+ if (size > len) {
+ size >>= shift;
+ for (i = 0; i < len; i++)
+ nested_table_free(ntbl + i, size);
+ }
+
+ kfree(ntbl);
+}
+
+static void nested_bucket_table_free(const struct bucket_table *tbl)
+{
+ unsigned int size = tbl->size >> tbl->nest;
+ unsigned int len = 1 << tbl->nest;
+ union nested_table *ntbl;
+ unsigned int i;
+
+ ntbl = nested_table_top(tbl);
+
+ for (i = 0; i < len; i++)
+ nested_table_free(ntbl + i, size);
+
+ kfree(ntbl);
+}
+
+static void bucket_table_free(struct bucket_table *tbl)
+{
+ if (tbl->nest)
+ nested_bucket_table_free(tbl);
+
+ kvfree(tbl);
+}
+
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+ bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
+static union nested_table *nested_table_alloc(struct rhashtable *ht,
+ union nested_table __rcu **prev,
+ bool leaf)
+{
+ union nested_table *ntbl;
+ int i;
+
+ ntbl = rcu_dereference(*prev);
+ if (ntbl)
+ return ntbl;
+
+ ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+
+ if (ntbl && leaf) {
+ for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
+ INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
+ }
+
+ if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
+ return ntbl;
+ /* Raced with another thread. */
+ kfree(ntbl);
+ return rcu_dereference(*prev);
+}
+
+static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
+ size_t nbuckets,
+ gfp_t gfp)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ struct bucket_table *tbl;
+ size_t size;
+
+ if (nbuckets < (1 << (shift + 1)))
+ return NULL;
+
+ size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
+
+ tbl = kzalloc(size, gfp);
+ if (!tbl)
+ return NULL;
+
+ if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
+ false)) {
+ kfree(tbl);
+ return NULL;
+ }
+
+ tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
+
+ return tbl;
+}
+
+static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
+ size_t nbuckets,
+ gfp_t gfp)
+{
+ struct bucket_table *tbl = NULL;
+ size_t size;
+ int i;
+
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
+
+ size = nbuckets;
+
+ if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
+ tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
+ nbuckets = 0;
+ }
+
+ if (tbl == NULL)
+ return NULL;
+
+ tbl->size = size;
+
+ rcu_head_init(&tbl->rcu);
+ INIT_LIST_HEAD(&tbl->walkers);
+
+ tbl->hash_rnd = get_random_u32();
+
+ for (i = 0; i < nbuckets; i++)
+ INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
+
+ return tbl;
+}
+
+static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
+ struct bucket_table *tbl)
+{
+ struct bucket_table *new_tbl;
+
+ do {
+ new_tbl = tbl;
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ } while (tbl);
+
+ return new_tbl;
+}
+
+static int rhashtable_rehash_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
+ unsigned int old_hash)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
+ int err = -EAGAIN;
+ struct rhash_head *head, *next, *entry;
+ struct rhash_head __rcu **pprev = NULL;
+ unsigned int new_hash;
+
+ if (new_tbl->nest)
+ goto out;
+
+ err = -ENOENT;
+
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
+ err = 0;
+ next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
+
+ if (rht_is_a_nulls(next))
+ break;
+
+ pprev = &entry->next;
+ }
+
+ if (err)
+ goto out;
+
+ new_hash = head_hashfn(ht, new_tbl, entry);
+
+ rht_lock(new_tbl, &new_tbl->buckets[new_hash]);
+
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
+
+ RCU_INIT_POINTER(entry->next, head);
+
+ rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
+
+ if (pprev)
+ rcu_assign_pointer(*pprev, next);
+ else
+ /* Need to preserved the bit lock. */
+ rht_assign_locked(bkt, next);
+
+out:
+ return err;
+}
+
+static int rhashtable_rehash_chain(struct rhashtable *ht,
+ unsigned int old_hash)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
+ int err;
+
+ if (!bkt)
+ return 0;
+ rht_lock(old_tbl, bkt);
+
+ while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
+ ;
+
+ if (err == -ENOENT)
+ err = 0;
+ rht_unlock(old_tbl, bkt);
+
+ return err;
+}
+
+static int rhashtable_rehash_attach(struct rhashtable *ht,
+ struct bucket_table *old_tbl,
+ struct bucket_table *new_tbl)
+{
+ /* Make insertions go into the new, empty table right away. Deletions
+ * and lookups will be attempted in both tables until we synchronize.
+ * As cmpxchg() provides strong barriers, we do not need
+ * rcu_assign_pointer().
+ */
+
+ if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
+ new_tbl) != NULL)
+ return -EEXIST;
+
+ return 0;
+}
+
+static int rhashtable_rehash_table(struct rhashtable *ht)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *new_tbl;
+ struct rhashtable_walker *walker;
+ unsigned int old_hash;
+ int err;
+
+ new_tbl = rht_dereference(old_tbl->future_tbl, ht);
+ if (!new_tbl)
+ return 0;
+
+ for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+ err = rhashtable_rehash_chain(ht, old_hash);
+ if (err)
+ return err;
+ cond_resched();
+ }
+
+ /* Publish the new table pointer. */
+ rcu_assign_pointer(ht->tbl, new_tbl);
+
+ spin_lock(&ht->lock);
+ list_for_each_entry(walker, &old_tbl->walkers, list)
+ walker->tbl = NULL;
+
+ /* Wait for readers. All new readers will see the new
+ * table, and thus no references to the old table will
+ * remain.
+ * We do this inside the locked region so that
+ * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
+ * to check if it should not re-link the table.
+ */
+ call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+ spin_unlock(&ht->lock);
+
+ return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
+}
+
+static int rhashtable_rehash_alloc(struct rhashtable *ht,
+ struct bucket_table *old_tbl,
+ unsigned int size)
+{
+ struct bucket_table *new_tbl;
+ int err;
+
+ ASSERT_RHT_MUTEX(ht);
+
+ new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+ if (new_tbl == NULL)
+ return -ENOMEM;
+
+ err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+ if (err)
+ bucket_table_free(new_tbl);
+
+ return err;
+}
+
+/**
+ * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
+ * @ht: the hash table to shrink
+ *
+ * This function shrinks the hash table to fit, i.e., the smallest
+ * size would not cause it to expand right away automatically.
+ *
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
+ * The caller must ensure that no concurrent table mutations take place.
+ * It is however valid to have concurrent lookups if they are RCU protected.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
+ */
+static int rhashtable_shrink(struct rhashtable *ht)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ unsigned int nelems = atomic_read(&ht->nelems);
+ unsigned int size = 0;
+
+ if (nelems)
+ size = roundup_pow_of_two(nelems * 3 / 2);
+ if (size < ht->p.min_size)
+ size = ht->p.min_size;
+
+ if (old_tbl->size <= size)
+ return 0;
+
+ if (rht_dereference(old_tbl->future_tbl, ht))
+ return -EEXIST;
+
+ return rhashtable_rehash_alloc(ht, old_tbl, size);
+}
+
+static void rht_deferred_worker(struct work_struct *work)
+{
+ struct rhashtable *ht;
+ struct bucket_table *tbl;
+ int err = 0;
+
+ ht = container_of(work, struct rhashtable, run_work);
+ mutex_lock(&ht->mutex);
+
+ tbl = rht_dereference(ht->tbl, ht);
+ tbl = rhashtable_last_table(ht, tbl);
+
+ if (rht_grow_above_75(ht, tbl))
+ err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
+ else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
+ err = rhashtable_shrink(ht);
+ else if (tbl->nest)
+ err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
+
+ if (!err || err == -EEXIST) {
+ int nerr;
+
+ nerr = rhashtable_rehash_table(ht);
+ err = err ?: nerr;
+ }
+
+ mutex_unlock(&ht->mutex);
+
+ if (err)
+ schedule_work(&ht->run_work);
+}
+
+static int rhashtable_insert_rehash(struct rhashtable *ht,
+ struct bucket_table *tbl)
+{
+ struct bucket_table *old_tbl;
+ struct bucket_table *new_tbl;
+ unsigned int size;
+ int err;
+
+ old_tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ size = tbl->size;
+
+ err = -EBUSY;
+
+ if (rht_grow_above_75(ht, tbl))
+ size *= 2;
+ /* Do not schedule more than one rehash */
+ else if (old_tbl != tbl)
+ goto fail;
+
+ err = -ENOMEM;
+
+ new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
+ if (new_tbl == NULL)
+ goto fail;
+
+ err = rhashtable_rehash_attach(ht, tbl, new_tbl);
+ if (err) {
+ bucket_table_free(new_tbl);
+ if (err == -EEXIST)
+ err = 0;
+ } else
+ schedule_work(&ht->run_work);
+
+ return err;
+
+fail:
+ /* Do not fail the insert if someone else did a rehash. */
+ if (likely(rcu_access_pointer(tbl->future_tbl)))
+ return 0;
+
+ /* Schedule async rehash to retry allocation in process context. */
+ if (err == -ENOMEM)
+ schedule_work(&ht->run_work);
+
+ return err;
+}
+
+static void *rhashtable_lookup_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
+ struct bucket_table *tbl, unsigned int hash,
+ const void *key, struct rhash_head *obj)
+{
+ struct rhashtable_compare_arg arg = {
+ .ht = ht,
+ .key = key,
+ };
+ struct rhash_head __rcu **pprev = NULL;
+ struct rhash_head *head;
+ int elasticity;
+
+ elasticity = RHT_ELASTICITY;
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ struct rhlist_head *list;
+ struct rhlist_head *plist;
+
+ elasticity--;
+ if (!key ||
+ (ht->p.obj_cmpfn ?
+ ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
+ rhashtable_compare(&arg, rht_obj(ht, head)))) {
+ pprev = &head->next;
+ continue;
+ }
+
+ if (!ht->rhlist)
+ return rht_obj(ht, head);
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ plist = container_of(head, struct rhlist_head, rhead);
+
+ RCU_INIT_POINTER(list->next, plist);
+ head = rht_dereference_bucket(head->next, tbl, hash);
+ RCU_INIT_POINTER(list->rhead.next, head);
+ if (pprev)
+ rcu_assign_pointer(*pprev, obj);
+ else
+ /* Need to preserve the bit lock */
+ rht_assign_locked(bkt, obj);
+
+ return NULL;
+ }
+
+ if (elasticity <= 0)
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct bucket_table *rhashtable_insert_one(
+ struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
+ struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
+ void *data)
+{
+ struct bucket_table *new_tbl;
+ struct rhash_head *head;
+
+ if (!IS_ERR_OR_NULL(data))
+ return ERR_PTR(-EEXIST);
+
+ if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (new_tbl)
+ return new_tbl;
+
+ if (PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+ return ERR_PTR(-E2BIG);
+
+ if (unlikely(rht_grow_above_100(ht, tbl)))
+ return ERR_PTR(-EAGAIN);
+
+ head = rht_ptr(bkt, tbl, hash);
+
+ RCU_INIT_POINTER(obj->next, head);
+ if (ht->rhlist) {
+ struct rhlist_head *list;
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ RCU_INIT_POINTER(list->next, NULL);
+ }
+
+ /* bkt is always the head of the list, so it holds
+ * the lock, which we need to preserve
+ */
+ rht_assign_locked(bkt, obj);
+
+ atomic_inc(&ht->nelems);
+ if (rht_grow_above_75(ht, tbl))
+ schedule_work(&ht->run_work);
+
+ return NULL;
+}
+
+static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ struct bucket_table *new_tbl;
+ struct bucket_table *tbl;
+ struct rhash_lock_head __rcu **bkt;
+ unsigned int hash;
+ void *data;
+
+ new_tbl = rcu_dereference(ht->tbl);
+
+ do {
+ tbl = new_tbl;
+ hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+ if (rcu_access_pointer(tbl->future_tbl))
+ /* Failure is OK */
+ bkt = rht_bucket_var(tbl, hash);
+ else
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ if (bkt == NULL) {
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ data = ERR_PTR(-EAGAIN);
+ } else {
+ rht_lock(tbl, bkt);
+ data = rhashtable_lookup_one(ht, bkt, tbl,
+ hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, bkt, tbl,
+ hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ rht_unlock(tbl, bkt);
+ }
+ } while (!IS_ERR_OR_NULL(new_tbl));
+
+ if (PTR_ERR(data) == -EAGAIN)
+ data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
+ -EAGAIN);
+
+ return data;
+}
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ void *data;
+
+ do {
+ rcu_read_lock();
+ data = rhashtable_try_insert(ht, key, obj);
+ rcu_read_unlock();
+ } while (PTR_ERR(data) == -EAGAIN);
+
+ return data;
+}
+EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
+
+/**
+ * rhashtable_walk_enter - Initialise an iterator
+ * @ht: Table to walk over
+ * @iter: Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice. Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
+ *
+ * You must call rhashtable_walk_exit after this function returns.
+ */
+void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
+{
+ iter->ht = ht;
+ iter->p = NULL;
+ iter->slot = 0;
+ iter->skip = 0;
+ iter->end_of_table = 0;
+
+ spin_lock(&ht->lock);
+ iter->walker.tbl =
+ rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
+ list_add(&iter->walker.list, &iter->walker.tbl->walkers);
+ spin_unlock(&ht->lock);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
+
+/**
+ * rhashtable_walk_exit - Free an iterator
+ * @iter: Hash table Iterator
+ *
+ * This function frees resources allocated by rhashtable_walk_enter.
+ */
+void rhashtable_walk_exit(struct rhashtable_iter *iter)
+{
+ spin_lock(&iter->ht->lock);
+ if (iter->walker.tbl)
+ list_del(&iter->walker.list);
+ spin_unlock(&iter->ht->lock);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
+
+/**
+ * rhashtable_walk_start_check - Start a hash table walk
+ * @iter: Hash table iterator
+ *
+ * Start a hash table walk at the current iterator position. Note that we take
+ * the RCU lock in all cases including when we return an error. So you must
+ * always call rhashtable_walk_stop to clean up.
+ *
+ * Returns zero if successful.
+ *
+ * Returns -EAGAIN if resize event occured. Note that the iterator
+ * will rewind back to the beginning and you may use it immediately
+ * by calling rhashtable_walk_next.
+ *
+ * rhashtable_walk_start is defined as an inline variant that returns
+ * void. This is preferred in cases where the caller would ignore
+ * resize events and always continue.
+ */
+int rhashtable_walk_start_check(struct rhashtable_iter *iter)
+ __acquires(RCU)
+{
+ struct rhashtable *ht = iter->ht;
+ bool rhlist = ht->rhlist;
+
+ rcu_read_lock();
+
+ spin_lock(&ht->lock);
+ if (iter->walker.tbl)
+ list_del(&iter->walker.list);
+ spin_unlock(&ht->lock);
+
+ if (iter->end_of_table)
+ return 0;
+ if (!iter->walker.tbl) {
+ iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+ iter->slot = 0;
+ iter->skip = 0;
+ return -EAGAIN;
+ }
+
+ if (iter->p && !rhlist) {
+ /*
+ * We need to validate that 'p' is still in the table, and
+ * if so, update 'skip'
+ */
+ struct rhash_head *p;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ skip++;
+ if (p == iter->p) {
+ iter->skip = skip;
+ goto found;
+ }
+ }
+ iter->p = NULL;
+ } else if (iter->p && rhlist) {
+ /* Need to validate that 'list' is still in the table, and
+ * if so, update 'skip' and 'p'.
+ */
+ struct rhash_head *p;
+ struct rhlist_head *list;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ for (list = container_of(p, struct rhlist_head, rhead);
+ list;
+ list = rcu_dereference(list->next)) {
+ skip++;
+ if (list == iter->list) {
+ iter->p = p;
+ iter->skip = skip;
+ goto found;
+ }
+ }
+ }
+ iter->p = NULL;
+ }
+found:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
+
+/**
+ * __rhashtable_walk_find_next - Find the next element in a table (or the first
+ * one in case of a new walk).
+ *
+ * @iter: Hash table iterator
+ *
+ * Returns the found object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.
+ */
+static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
+{
+ struct bucket_table *tbl = iter->walker.tbl;
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
+
+ if (!tbl)
+ return NULL;
+
+ for (; iter->slot < tbl->size; iter->slot++) {
+ int skip = iter->skip;
+
+ rht_for_each_rcu(p, tbl, iter->slot) {
+ if (rhlist) {
+ list = container_of(p, struct rhlist_head,
+ rhead);
+ do {
+ if (!skip)
+ goto next;
+ skip--;
+ list = rcu_dereference(list->next);
+ } while (list);
+
+ continue;
+ }
+ if (!skip)
+ break;
+ skip--;
+ }
+
+next:
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
+
+ iter->skip = 0;
+ }
+
+ iter->p = NULL;
+
+ /* Ensure we see any new tables. */
+ smp_rmb();
+
+ iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (iter->walker.tbl) {
+ iter->slot = 0;
+ iter->skip = 0;
+ return ERR_PTR(-EAGAIN);
+ } else {
+ iter->end_of_table = true;
+ }
+
+ return NULL;
+}
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
+
+ if (p) {
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
+
+ /* At the end of this slot, switch to next one and then find
+ * next entry from that point.
+ */
+ iter->skip = 0;
+ iter->slot++;
+ }
+
+ return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_next);
+
+/**
+ * rhashtable_walk_peek - Return the next object but don't advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_peek(struct rhashtable_iter *iter)
+{
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+
+ if (p)
+ return rht_obj(ht, ht->rhlist ? &list->rhead : p);
+
+ /* No object found in current iter, find next one in the table. */
+
+ if (iter->skip) {
+ /* A nonzero skip value points to the next entry in the table
+ * beyond that last one that was found. Decrement skip so
+ * we find the current value. __rhashtable_walk_find_next
+ * will restore the original value of skip assuming that
+ * the table hasn't changed.
+ */
+ iter->skip--;
+ }
+
+ return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
+
+/**
+ * rhashtable_walk_stop - Finish a hash table walk
+ * @iter: Hash table iterator
+ *
+ * Finish a hash table walk. Does not reset the iterator to the start of the
+ * hash table.
+ */
+void rhashtable_walk_stop(struct rhashtable_iter *iter)
+ __releases(RCU)
+{
+ struct rhashtable *ht;
+ struct bucket_table *tbl = iter->walker.tbl;
+
+ if (!tbl)
+ goto out;
+
+ ht = iter->ht;
+
+ spin_lock(&ht->lock);
+ if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
+ /* This bucket table is being freed, don't re-link it. */
+ iter->walker.tbl = NULL;
+ else
+ list_add(&iter->walker.list, &tbl->walkers);
+ spin_unlock(&ht->lock);
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
+
+static size_t rounded_hashtable_size(const struct rhashtable_params *params)
+{
+ size_t retsize;
+
+ if (params->nelem_hint)
+ retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+ (unsigned long)params->min_size);
+ else
+ retsize = max(HASH_DEFAULT_SIZE,
+ (unsigned long)params->min_size);
+
+ return retsize;
+}
+
+static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+{
+ return jhash2(key, length, seed);
+}
+
+/**
+ * rhashtable_init - initialize a new hash table
+ * @ht: hash table to be initialized
+ * @params: configuration parameters
+ *
+ * Initializes a new hash table based on the provided configuration
+ * parameters. A table can be configured either with a variable or
+ * fixed length key:
+ *
+ * Configuration Example 1: Fixed length keys
+ * struct test_obj {
+ * int key;
+ * void * my_member;
+ * struct rhash_head node;
+ * };
+ *
+ * struct rhashtable_params params = {
+ * .head_offset = offsetof(struct test_obj, node),
+ * .key_offset = offsetof(struct test_obj, key),
+ * .key_len = sizeof(int),
+ * .hashfn = jhash,
+ * };
+ *
+ * Configuration Example 2: Variable length keys
+ * struct test_obj {
+ * [...]
+ * struct rhash_head node;
+ * };
+ *
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
+ * {
+ * struct test_obj *obj = data;
+ *
+ * return [... hash ...];
+ * }
+ *
+ * struct rhashtable_params params = {
+ * .head_offset = offsetof(struct test_obj, node),
+ * .hashfn = jhash,
+ * .obj_hashfn = my_hash_fn,
+ * };
+ */
+int rhashtable_init(struct rhashtable *ht,
+ const struct rhashtable_params *params)
+{
+ struct bucket_table *tbl;
+ size_t size;
+
+ if ((!params->key_len && !params->obj_hashfn) ||
+ (params->obj_hashfn && !params->obj_cmpfn))
+ return -EINVAL;
+
+ memset(ht, 0, sizeof(*ht));
+ mutex_init(&ht->mutex);
+ spin_lock_init(&ht->lock);
+ memcpy(&ht->p, params, sizeof(*params));
+
+ if (params->min_size)
+ ht->p.min_size = roundup_pow_of_two(params->min_size);
+
+ /* Cap total entries at 2^31 to avoid nelems overflow. */
+ ht->max_elems = 1u << 31;
+
+ if (params->max_size) {
+ ht->p.max_size = rounddown_pow_of_two(params->max_size);
+ if (ht->p.max_size < ht->max_elems / 2)
+ ht->max_elems = ht->p.max_size * 2;
+ }
+
+ ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+
+ size = rounded_hashtable_size(&ht->p);
+
+ ht->key_len = ht->p.key_len;
+ if (!params->hashfn) {
+ ht->p.hashfn = jhash;
+
+ if (!(ht->key_len & (sizeof(u32) - 1))) {
+ ht->key_len /= sizeof(u32);
+ ht->p.hashfn = rhashtable_jhash2;
+ }
+ }
+
+ /*
+ * This is api initialization and thus we need to guarantee the
+ * initial rhashtable allocation. Upon failure, retry with the
+ * smallest possible size with __GFP_NOFAIL semantics.
+ */
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+ if (unlikely(tbl == NULL)) {
+ size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
+ }
+
+ atomic_set(&ht->nelems, 0);
+
+ RCU_INIT_POINTER(ht->tbl, tbl);
+
+ INIT_WORK(&ht->run_work, rht_deferred_worker);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_init);
+
+/**
+ * rhltable_init - initialize a new hash list table
+ * @hlt: hash list table to be initialized
+ * @params: configuration parameters
+ *
+ * Initializes a new hash list table.
+ *
+ * See documentation for rhashtable_init.
+ */
+int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+{
+ int err;
+
+ err = rhashtable_init(&hlt->ht, params);
+ hlt->ht.rhlist = true;
+ return err;
+}
+EXPORT_SYMBOL_GPL(rhltable_init);
+
+static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg)
+{
+ struct rhlist_head *list;
+
+ if (!ht->rhlist) {
+ free_fn(rht_obj(ht, obj), arg);
+ return;
+ }
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ do {
+ obj = &list->rhead;
+ list = rht_dereference(list->next, ht);
+ free_fn(rht_obj(ht, obj), arg);
+ } while (list);
+}
+
+/**
+ * rhashtable_free_and_destroy - free elements and destroy hash table
+ * @ht: the hash table to destroy
+ * @free_fn: callback to release resources of element
+ * @arg: pointer passed to free_fn
+ *
+ * Stops an eventual async resize. If defined, invokes free_fn for each
+ * element to releasal resources. Please note that RCU protected
+ * readers may still be accessing the elements. Releasing of resources
+ * must occur in a compatible manner. Then frees the bucket array.
+ *
+ * This function will eventually sleep to wait for an async resize
+ * to complete. The caller is responsible that no further write operations
+ * occurs in parallel.
+ */
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg)
+{
+ struct bucket_table *tbl, *next_tbl;
+ unsigned int i;
+
+ cancel_work_sync(&ht->run_work);
+
+ mutex_lock(&ht->mutex);
+ tbl = rht_dereference(ht->tbl, ht);
+restart:
+ if (free_fn) {
+ for (i = 0; i < tbl->size; i++) {
+ struct rhash_head *pos, *next;
+
+ cond_resched();
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL;
+ !rht_is_a_nulls(pos);
+ pos = next,
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL)
+ rhashtable_free_one(ht, pos, free_fn, arg);
+ }
+ }
+
+ next_tbl = rht_dereference(tbl->future_tbl, ht);
+ bucket_table_free(tbl);
+ if (next_tbl) {
+ tbl = next_tbl;
+ goto restart;
+ }
+ mutex_unlock(&ht->mutex);
+}
+EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+ return rhashtable_free_and_destroy(ht, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ unsigned int index = hash & ((1 << tbl->nest) - 1);
+ unsigned int size = tbl->size >> tbl->nest;
+ unsigned int subhash = hash;
+ union nested_table *ntbl;
+
+ ntbl = nested_table_top(tbl);
+ ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
+ subhash >>= tbl->nest;
+
+ while (ntbl && size > (1 << shift)) {
+ index = subhash & ((1 << shift) - 1);
+ ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
+ tbl, hash);
+ size >>= shift;
+ subhash >>= shift;
+ }
+
+ if (!ntbl)
+ return NULL;
+
+ return &ntbl[subhash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(__rht_bucket_nested);
+
+struct rhash_lock_head __rcu **rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash)
+{
+ static struct rhash_lock_head __rcu *rhnull;
+
+ if (!rhnull)
+ INIT_RHT_NULLS_HEAD(rhnull);
+ return __rht_bucket_nested(tbl, hash) ?: &rhnull;
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested);
+
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ unsigned int index = hash & ((1 << tbl->nest) - 1);
+ unsigned int size = tbl->size >> tbl->nest;
+ union nested_table *ntbl;
+
+ ntbl = nested_table_top(tbl);
+ hash >>= tbl->nest;
+ ntbl = nested_table_alloc(ht, &ntbl[index].table,
+ size <= (1 << shift));
+
+ while (ntbl && size > (1 << shift)) {
+ index = hash & ((1 << shift) - 1);
+ size >>= shift;
+ hash >>= shift;
+ ntbl = nested_table_alloc(ht, &ntbl[index].table,
+ size <= (1 << shift));
+ }
+
+ if (!ntbl)
+ return NULL;
+
+ return &ntbl[hash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
diff --git a/linux/sched.c b/linux/sched.c
new file mode 100644
index 00000000..1c7198d2
--- /dev/null
+++ b/linux/sched.c
@@ -0,0 +1,133 @@
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <linux/futex.h>
+
+/* hack for mips: */
+#define CONFIG_RCU_HAVE_FUTEX 1
+#include <urcu/futex.h>
+
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+__thread struct task_struct *current;
+
+void __put_task_struct(struct task_struct *t)
+{
+ pthread_join(t->thread, NULL);
+ free(t);
+}
+
+/* returns true if process was woken up, false if it was already running */
+int wake_up_process(struct task_struct *p)
+{
+ int ret = p->state != TASK_RUNNING;
+
+ p->state = TASK_RUNNING;
+ futex(&p->state, FUTEX_WAKE|FUTEX_PRIVATE_FLAG,
+ INT_MAX, NULL, NULL, 0);
+ return ret;
+}
+
+void schedule(void)
+{
+ int v;
+
+ rcu_quiescent_state();
+
+ while ((v = READ_ONCE(current->state)) != TASK_RUNNING)
+ futex(&current->state, FUTEX_WAIT|FUTEX_PRIVATE_FLAG,
+ v, NULL, NULL, 0);
+}
+
+struct process_timer {
+ struct timer_list timer;
+ struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
+{
+ struct process_timer *timeout =
+ container_of(t, struct process_timer, timer);
+
+ wake_up_process(timeout->task);
+}
+
+long schedule_timeout(long timeout)
+{
+ struct process_timer timer;
+ unsigned long expire;
+
+ switch (timeout)
+ {
+ case MAX_SCHEDULE_TIMEOUT:
+ /*
+ * These two special cases are useful to be comfortable
+ * in the caller. Nothing more. We could take
+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
+ * but I' d like to return a valid offset (>=0) to allow
+ * the caller to do everything it want with the retval.
+ */
+ schedule();
+ goto out;
+ default:
+ /*
+ * Another bit of PARANOID. Note that the retval will be
+ * 0 since no piece of kernel is supposed to do a check
+ * for a negative retval of schedule_timeout() (since it
+ * should never happens anyway). You just have the printk()
+ * that will tell you if something is gone wrong and where.
+ */
+ if (timeout < 0) {
+ fprintf(stderr, "schedule_timeout: wrong timeout "
+ "value %lx\n", timeout);
+ current->state = TASK_RUNNING;
+ goto out;
+ }
+ }
+
+ expire = timeout + jiffies;
+
+ timer.task = current;
+ timer_setup_on_stack(&timer.timer, process_timeout, 0);
+ mod_timer(&timer.timer, expire);
+ schedule();
+ del_timer_sync(&timer.timer);
+
+ timeout = expire - jiffies;
+out:
+ return timeout < 0 ? 0 : timeout;
+}
+
+__attribute__((constructor(101)))
+static void sched_init(void)
+{
+ struct task_struct *p = malloc(sizeof(*p));
+
+ memset(p, 0, sizeof(*p));
+
+ p->state = TASK_RUNNING;
+ atomic_set(&p->usage, 1);
+ init_completion(&p->exited);
+
+ current = p;
+
+ rcu_init();
+ rcu_register_thread();
+}
+
+#ifndef SYS_getrandom
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+int urandom_fd;
+
+__attribute__((constructor(101)))
+static void rand_init(void)
+{
+ urandom_fd = open("/dev/urandom", O_RDONLY);
+ BUG_ON(urandom_fd < 0);
+}
+#endif
diff --git a/linux/semaphore.c b/linux/semaphore.c
new file mode 100644
index 00000000..b7d4b517
--- /dev/null
+++ b/linux/semaphore.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * This file implements counting semaphores.
+ * A counting semaphore may be acquired 'n' times before sleeping.
+ * See mutex.c for single-acquisition sleeping locks which enforce
+ * rules which allow code to be debugged more easily.
+ */
+
+/*
+ * Some notes on the implementation:
+ *
+ * The spinlock controls access to the other members of the semaphore.
+ * down_trylock() and up() can be called from interrupt context, so we
+ * have to disable interrupts when taking the lock. It turns out various
+ * parts of the kernel expect to be able to use down() on a semaphore in
+ * interrupt context when they know it will succeed, so we have to use
+ * irqsave variants for down(), down_interruptible() and down_killable()
+ * too.
+ *
+ * The ->count variable represents how many more tasks can acquire this
+ * semaphore. If it's zero, there may be tasks waiting on the wait_list.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+
+static noinline void __down(struct semaphore *sem);
+static noinline int __down_timeout(struct semaphore *sem, long timeout);
+static noinline void __up(struct semaphore *sem);
+
+/**
+ * down - acquire the semaphore
+ * @sem: the semaphore to be acquired
+ *
+ * Acquires the semaphore. If no more tasks are allowed to acquire the
+ * semaphore, calling this function will put the task to sleep until the
+ * semaphore is released.
+ *
+ * Use of this function is deprecated, please use down_interruptible() or
+ * down_killable() instead.
+ */
+void down(struct semaphore *sem)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ if (likely(sem->count > 0))
+ sem->count--;
+ else
+ __down(sem);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(down);
+
+/**
+ * down_trylock - try to acquire the semaphore, without waiting
+ * @sem: the semaphore to be acquired
+ *
+ * Try to acquire the semaphore atomically. Returns 0 if the semaphore has
+ * been acquired successfully or 1 if it it cannot be acquired.
+ *
+ * NOTE: This return value is inverted from both spin_trylock and
+ * mutex_trylock! Be careful about this when converting code.
+ *
+ * Unlike mutex_trylock, this function can be used from interrupt context,
+ * and the semaphore can be released by any task or interrupt.
+ */
+int down_trylock(struct semaphore *sem)
+{
+ unsigned long flags;
+ int count;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ count = sem->count - 1;
+ if (likely(count >= 0))
+ sem->count = count;
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+
+ return (count < 0);
+}
+EXPORT_SYMBOL(down_trylock);
+
+/**
+ * down_timeout - acquire the semaphore within a specified time
+ * @sem: the semaphore to be acquired
+ * @timeout: how long to wait before failing
+ *
+ * Attempts to acquire the semaphore. If no more tasks are allowed to
+ * acquire the semaphore, calling this function will put the task to sleep.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME. It returns 0 if the semaphore was acquired.
+ */
+int down_timeout(struct semaphore *sem, long timeout)
+{
+ unsigned long flags;
+ int result = 0;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ if (likely(sem->count > 0))
+ sem->count--;
+ else
+ result = __down_timeout(sem, timeout);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL(down_timeout);
+
+/**
+ * up - release the semaphore
+ * @sem: the semaphore to release
+ *
+ * Release the semaphore. Unlike mutexes, up() may be called from any
+ * context and even by tasks which have never called down().
+ */
+void up(struct semaphore *sem)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ if (likely(list_empty(&sem->wait_list)))
+ sem->count++;
+ else
+ __up(sem);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(up);
+
+/* Functions for the contended case */
+
+struct semaphore_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ bool up;
+};
+
+/*
+ * Because this function is inlined, the 'state' parameter will be
+ * constant, and thus optimised away by the compiler. Likewise the
+ * 'timeout' parameter for the cases without timeouts.
+ */
+static inline int __sched __down_common(struct semaphore *sem, long state,
+ long timeout)
+{
+ struct semaphore_waiter waiter;
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+ waiter.task = current;
+ waiter.up = false;
+
+ for (;;) {
+ if (unlikely(timeout <= 0))
+ goto timed_out;
+ __set_current_state(state);
+ raw_spin_unlock_irq(&sem->lock);
+ timeout = schedule_timeout(timeout);
+ raw_spin_lock_irq(&sem->lock);
+ if (waiter.up)
+ return 0;
+ }
+
+ timed_out:
+ list_del(&waiter.list);
+ return -ETIME;
+}
+
+static noinline void __sched __down(struct semaphore *sem)
+{
+ __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
+{
+ return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout);
+}
+
+static noinline void __sched __up(struct semaphore *sem)
+{
+ struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+ struct semaphore_waiter, list);
+ list_del(&waiter->list);
+ waiter->up = true;
+ wake_up_process(waiter->task);
+}
diff --git a/linux/seq_buf.c b/linux/seq_buf.c
new file mode 100644
index 00000000..cf8709ad
--- /dev/null
+++ b/linux/seq_buf.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * seq_buf.c
+ *
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * The seq_buf is a handy tool that allows you to pass a descriptor around
+ * to a buffer that other functions can write to. It is similar to the
+ * seq_file functionality but has some differences.
+ *
+ * To use it, the seq_buf must be initialized with seq_buf_init().
+ * This will set up the counters within the descriptor. You can call
+ * seq_buf_init() more than once to reset the seq_buf to start
+ * from scratch.
+ */
+#include <linux/seq_buf.h>
+#include <stdio.h>
+
+/**
+ * seq_buf_can_fit - can the new data fit in the current buffer?
+ * @s: the seq_buf descriptor
+ * @len: The length to see if it can fit in the current buffer
+ *
+ * Returns true if there's enough unused space in the seq_buf buffer
+ * to fit the amount of new data according to @len.
+ */
+static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
+{
+ return s->len + len <= s->size;
+}
+
+/**
+ * seq_buf_vprintf - sequence printing of information.
+ * @s: seq_buf descriptor
+ * @fmt: printf format string
+ * @args: va_list of arguments from a printf() type function
+ *
+ * Writes a vnprintf() format into the sequencce buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
+{
+ int len;
+
+ WARN_ON(s->size == 0);
+
+ if (s->len < s->size) {
+ len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args);
+ if (s->len + len < s->size) {
+ s->len += len;
+ return 0;
+ }
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
+
+/**
+ * seq_buf_printf - sequence printing of information
+ * @s: seq_buf descriptor
+ * @fmt: printf format string
+ *
+ * Writes a printf() format into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = seq_buf_vprintf(s, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+/**
+ * seq_buf_puts - sequence printing of simple string
+ * @s: seq_buf descriptor
+ * @str: simple string to record
+ *
+ * Copy a simple string into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_puts(struct seq_buf *s, const char *str)
+{
+ size_t len = strlen(str);
+
+ WARN_ON(s->size == 0);
+
+ /* Add 1 to len for the trailing null byte which must be there */
+ len += 1;
+
+ if (seq_buf_can_fit(s, len)) {
+ memcpy(s->buffer + s->len, str, len);
+ /* Don't count the trailing null byte against the capacity */
+ s->len += len - 1;
+ return 0;
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
+
+/**
+ * seq_buf_putc - sequence printing of simple character
+ * @s: seq_buf descriptor
+ * @c: simple character to record
+ *
+ * Copy a single character into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putc(struct seq_buf *s, unsigned char c)
+{
+ WARN_ON(s->size == 0);
+
+ if (seq_buf_can_fit(s, 1)) {
+ s->buffer[s->len++] = c;
+ return 0;
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
+
+/**
+ * seq_buf_putmem - write raw data into the sequenc buffer
+ * @s: seq_buf descriptor
+ * @mem: The raw memory to copy into the buffer
+ * @len: The length of the raw memory to copy (in bytes)
+ *
+ * There may be cases where raw memory needs to be written into the
+ * buffer and a strcpy() would not work. Using this function allows
+ * for such cases.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
+{
+ WARN_ON(s->size == 0);
+
+ if (seq_buf_can_fit(s, len)) {
+ memcpy(s->buffer + s->len, mem, len);
+ s->len += len;
+ return 0;
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
diff --git a/linux/shrinker.c b/linux/shrinker.c
new file mode 100644
index 00000000..ca34ebc7
--- /dev/null
+++ b/linux/shrinker.c
@@ -0,0 +1,140 @@
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/shrinker.h>
+
+#include "tools-util.h"
+
+static LIST_HEAD(shrinker_list);
+static DEFINE_MUTEX(shrinker_lock);
+
+void shrinker_free(struct shrinker *s)
+{
+ if (s->list.next) {
+ mutex_lock(&shrinker_lock);
+ list_del(&s->list);
+ mutex_unlock(&shrinker_lock);
+ }
+ free(s);
+}
+
+struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
+{
+ return calloc(sizeof(struct shrinker), 1);
+}
+
+int shrinker_register(struct shrinker *shrinker)
+{
+ mutex_lock(&shrinker_lock);
+ list_add_tail(&shrinker->list, &shrinker_list);
+ mutex_unlock(&shrinker_lock);
+ return 0;
+}
+
+static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
+{
+ struct shrinker *shrinker;
+
+ mutex_lock(&shrinker_lock);
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ struct shrink_control sc = { .gfp_mask = gfp_mask, };
+
+ unsigned long have = shrinker->count_objects(shrinker, &sc);
+
+ sc.nr_to_scan = have / 8;
+
+ shrinker->scan_objects(shrinker, &sc);
+ }
+ mutex_unlock(&shrinker_lock);
+}
+
+void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
+{
+ struct shrinker *shrinker;
+ struct sysinfo info;
+ s64 want_shrink;
+
+ if (!(gfp_mask & GFP_KERNEL))
+ return;
+
+ /* Fast out if there are no shrinkers to run. */
+ if (list_empty(&shrinker_list))
+ return;
+
+ if (allocation_failed) {
+ run_shrinkers_allocation_failed(gfp_mask);
+ return;
+ }
+
+ si_meminfo(&info);
+
+ /* Aim for 6% of physical RAM free without anything in swap */
+ want_shrink = (info.totalram >> 4) - info.freeram
+ + info.totalswap - info.freeswap;
+ if (want_shrink <= 0)
+ return;
+
+ mutex_lock(&shrinker_lock);
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ struct shrink_control sc = {
+ .gfp_mask = gfp_mask,
+ .nr_to_scan = want_shrink >> PAGE_SHIFT
+ };
+
+ shrinker->scan_objects(shrinker, &sc);
+ }
+ mutex_unlock(&shrinker_lock);
+}
+
+static int shrinker_thread(void *arg)
+{
+ while (!kthread_should_stop()) {
+ struct timespec to;
+ int v;
+
+ clock_gettime(CLOCK_MONOTONIC, &to);
+ to.tv_sec += 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ errno = 0;
+ while ((v = READ_ONCE(current->state)) != TASK_RUNNING &&
+ errno != ETIMEDOUT)
+ futex(&current->state, FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG,
+ v, &to, NULL, (uint32_t)~0);
+ if (kthread_should_stop())
+ break;
+ if (v != TASK_RUNNING)
+ __set_current_state(TASK_RUNNING);
+ run_shrinkers(GFP_KERNEL, false);
+ }
+
+ return 0;
+}
+
+struct task_struct *shrinker_task;
+
+__attribute__((constructor(103)))
+static void shrinker_thread_init(void)
+{
+ shrinker_task = kthread_run(shrinker_thread, NULL, "shrinkers");
+ BUG_ON(IS_ERR(shrinker_task));
+}
+
+#if 0
+/*
+ * We seem to be hitting a rare segfault when shutting down the shrinker thread.
+ * Disabling this is going to cause some harmless warnings about memory leaks:
+ */
+__attribute__((destructor(103)))
+static void shrinker_thread_exit(void)
+{
+ int ret = kthread_stop(shrinker_task);
+ BUG_ON(ret);
+
+ shrinker_task = NULL;
+}
+#endif
diff --git a/linux/siphash.c b/linux/siphash.c
new file mode 100644
index 00000000..f8dbecea
--- /dev/null
+++ b/linux/siphash.c
@@ -0,0 +1,552 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#include <linux/siphash.h>
+#include <linux/bitops.h>
+#include <asm/unaligned.h>
+
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+#include <linux/dcache.h>
+#include <asm/word-at-a-time.h>
+#endif
+
+#define SIPROUND \
+ do { \
+ v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+ v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+ v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+ v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+ } while (0)
+
+#define PREAMBLE(len) \
+ u64 v0 = 0x736f6d6570736575ULL; \
+ u64 v1 = 0x646f72616e646f6dULL; \
+ u64 v2 = 0x6c7967656e657261ULL; \
+ u64 v3 = 0x7465646279746573ULL; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ PREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = le64_to_cpup(data);
+ v3 ^= m;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= le32_to_cpup(data); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+#endif
+ POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ PREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = get_unaligned_le64(data);
+ v3 ^= m;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= get_unaligned_le32(end); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+#endif
+ POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_unaligned);
+#endif
+
+/**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+ * @first: first u64
+ * @key: the siphash key
+ */
+u64 siphash_1u64(const u64 first, const siphash_key_t *key)
+{
+ PREAMBLE(8)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u64);
+
+/**
+ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
+ * @first: first u64
+ * @second: second u64
+ * @key: the siphash key
+ */
+u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_2u64);
+
+/**
+ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @key: the siphash key
+ */
+u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
+ const siphash_key_t *key)
+{
+ PREAMBLE(24)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= third;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u64);
+
+/**
+ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @forth: forth u64
+ * @key: the siphash key
+ */
+u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
+ const u64 forth, const siphash_key_t *key)
+{
+ PREAMBLE(32)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= third;
+ v3 ^= forth;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= forth;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_4u64);
+
+u64 siphash_1u32(const u32 first, const siphash_key_t *key)
+{
+ PREAMBLE(4)
+ b |= first;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u32);
+
+u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
+ const siphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ PREAMBLE(12)
+ v3 ^= combined;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= combined;
+ b |= third;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u32);
+
+#if BITS_PER_LONG == 64
+/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
+ * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
+ */
+
+#define HSIPROUND SIPROUND
+#define HPREAMBLE(len) PREAMBLE(len)
+#define HPOSTAMBLE \
+ v3 ^= b; \
+ HSIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ HSIPROUND; \
+ HSIPROUND; \
+ HSIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = le64_to_cpup(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= le32_to_cpup(data); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+#endif
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = get_unaligned_le64(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= get_unaligned_le32(end); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+#endif
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+ HPREAMBLE(4)
+ b |= first;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(8)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+ const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(12)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ b |= third;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ const u32 forth, const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(16)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ combined = (u64)forth << 32 | third;
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#else
+#define HSIPROUND \
+ do { \
+ v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
+ v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
+ v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
+ v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
+ } while (0)
+
+#define HPREAMBLE(len) \
+ u32 v0 = 0; \
+ u32 v1 = 0; \
+ u32 v2 = 0x6c796765U; \
+ u32 v3 = 0x74656462U; \
+ u32 b = ((u32)(len)) << 24; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define HPOSTAMBLE \
+ v3 ^= b; \
+ HSIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ HSIPROUND; \
+ HSIPROUND; \
+ HSIPROUND; \
+ return v1 ^ v3;
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u32));
+ const u8 left = len & (sizeof(u32) - 1);
+ u32 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u32)) {
+ m = le32_to_cpup(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+ switch (left) {
+ case 3: b |= ((u32)end[2]) << 16; fallthrough;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u32));
+ const u8 left = len & (sizeof(u32) - 1);
+ u32 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u32)) {
+ m = get_unaligned_le32(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+ switch (left) {
+ case 3: b |= ((u32)end[2]) << 16; fallthrough;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+ HPREAMBLE(4)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+ HPREAMBLE(8)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+ const hsiphash_key_t *key)
+{
+ HPREAMBLE(12)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ HSIPROUND;
+ v0 ^= third;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ const u32 forth, const hsiphash_key_t *key)
+{
+ HPREAMBLE(16)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ HSIPROUND;
+ v0 ^= third;
+ v3 ^= forth;
+ HSIPROUND;
+ v0 ^= forth;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#endif
diff --git a/linux/string.c b/linux/string.c
new file mode 100644
index 00000000..a32a8995
--- /dev/null
+++ b/linux/string.c
@@ -0,0 +1,112 @@
+/*
+ * linux/lib/string.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * stupid library routines.. The optimized versions should generally be found
+ * as inline code in <asm-xx/string.h>
+ *
+ * These are buggy as well..
+ *
+ * * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de>
+ * - Added strsep() which will replace strtok() soon (because strsep() is
+ * reentrant and should be faster). Use only strsep() in new code, please.
+ *
+ * * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>,
+ * Matthew Hawkins <matt@mh.dropbear.id.au>
+ * - Kissed strtok() goodbye
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+static char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+
+char *strim(char *s)
+{
+ size_t size;
+ char *end;
+
+ size = strlen(s);
+ if (!size)
+ return s;
+
+ end = s + size - 1;
+ while (end >= s && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+
+ return skip_spaces(s);
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+ssize_t strscpy(char *dest, const char *src, size_t count)
+{
+ long res = 0;
+
+ if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
+ return -E2BIG;
+
+ while (count) {
+ char c;
+
+ c = src[res];
+ dest[res] = c;
+ if (!c)
+ return res;
+ res++;
+ count--;
+ }
+
+ /* Hit buffer length without finding a NUL; force NUL-termination. */
+ if (res)
+ dest[res-1] = '\0';
+
+ return -E2BIG;
+}
+
+void memzero_explicit(void *s, size_t count)
+{
+ memset(s, 0, count);
+ barrier_data(s);
+}
+
+int match_string(const char * const *array, size_t n, const char *string)
+{
+ int index;
+ const char *item;
+
+ for (index = 0; index < n; index++) {
+ item = array[index];
+ if (!item)
+ break;
+ if (!strcmp(item, string))
+ return index;
+ }
+
+ return -EINVAL;
+}
diff --git a/linux/string_helpers.c b/linux/string_helpers.c
new file mode 100644
index 00000000..0810ca13
--- /dev/null
+++ b/linux/string_helpers.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ * Copyright (C) 2013, Intel Corporation
+ */
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size: The size to be converted in blocks
+ * @blk_size: Size of the block (use 1 for size in bytes)
+ * @units: units to use (powers of 1000 or 1024)
+ * @buf: buffer to format to
+ * @len: length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units. @buf should have room for
+ * at least 9 bytes and will always be zero terminated.
+ *
+ */
+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
+ char *buf, int len)
+{
+ static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+ };
+ static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+ };
+ static const char *const *const units_str[] = {
+ [STRING_UNITS_10] = units_10,
+ [STRING_UNITS_2] = units_2,
+ };
+ static const unsigned int divisor[] = {
+ [STRING_UNITS_10] = 1000,
+ [STRING_UNITS_2] = 1024,
+ };
+ static const unsigned int rounding[] = { 500, 50, 5 };
+ int i = 0, j;
+ u32 remainder = 0, sf_cap;
+ char tmp[12];
+ const char *unit;
+
+ tmp[0] = '\0';
+
+ if (blk_size == 0)
+ size = 0;
+ if (size == 0)
+ goto out;
+
+ /* This is Napier's algorithm. Reduce the original block size to
+ *
+ * coefficient * divisor[units]^i
+ *
+ * we do the reduction so both coefficients are just under 32 bits so
+ * that multiplying them together won't overflow 64 bits and we keep
+ * as much precision as possible in the numbers.
+ *
+ * Note: it's safe to throw away the remainders here because all the
+ * precision is in the coefficients.
+ */
+ while (blk_size >> 32) {
+ do_div(blk_size, divisor[units]);
+ i++;
+ }
+
+ while (size >> 32) {
+ do_div(size, divisor[units]);
+ i++;
+ }
+
+ /* now perform the actual multiplication keeping i as the sum of the
+ * two logarithms */
+ size *= blk_size;
+
+ /* and logarithmically reduce it until it's just under the divisor */
+ while (size >= divisor[units]) {
+ remainder = do_div(size, divisor[units]);
+ i++;
+ }
+
+ /* work out in j how many digits of precision we need from the
+ * remainder */
+ sf_cap = size;
+ for (j = 0; sf_cap*10 < 1000; j++)
+ sf_cap *= 10;
+
+ if (units == STRING_UNITS_2) {
+ /* express the remainder as a decimal. It's currently the
+ * numerator of a fraction whose denominator is
+ * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
+ remainder *= 1000;
+ remainder >>= 10;
+ }
+
+ /* add a 5 to the digit below what will be printed to ensure
+ * an arithmetical round up and carry it through to size */
+ remainder += rounding[j];
+ if (remainder >= 1000) {
+ remainder -= 1000;
+ size += 1;
+ }
+
+ if (j) {
+ snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+ tmp[j+1] = '\0';
+ }
+
+ out:
+ if (i >= ARRAY_SIZE(units_2))
+ unit = "UNK";
+ else
+ unit = units_str[units][i];
+
+ return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
+}
+EXPORT_SYMBOL(string_get_size);
diff --git a/linux/timer.c b/linux/timer.c
new file mode 100644
index 00000000..7d519a4d
--- /dev/null
+++ b/linux/timer.c
@@ -0,0 +1,329 @@
+
+#include <pthread.h>
+#include <signal.h>
+#include <time.h>
+
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/timer.h>
+
+/**
+ * timespec_add_ns - Adds nanoseconds to a timespec
+ * @a: pointer to timespec to be incremented
+ * @ns: unsigned nanoseconds value to be added
+ *
+ * This must always be inlined because its used from the x86-64 vdso,
+ * which cannot call other kernel functions.
+ */
+static struct timespec timespec_add_ns(struct timespec a, u64 ns)
+{
+ a.tv_nsec += ns;
+ a.tv_sec += a.tv_nsec / NSEC_PER_SEC;
+ a.tv_nsec %= NSEC_PER_SEC;
+ return a;
+}
+
+#define DECLARE_HEAP(type) \
+struct { \
+ size_t size, used; \
+ type *data; \
+}
+
+#define heap_init(heap, _size) \
+({ \
+ size_t _bytes; \
+ (heap)->used = 0; \
+ (heap)->size = (_size); \
+ _bytes = (heap)->size * sizeof(*(heap)->data); \
+ (heap)->data = malloc(_bytes); \
+ (heap)->data; \
+})
+
+#define heap_free(heap) \
+do { \
+ kvfree((heap)->data); \
+ (heap)->data = NULL; \
+} while (0)
+
+#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
+
+#define heap_sift(h, i, cmp) \
+do { \
+ size_t _r, _j = i; \
+ \
+ for (; _j * 2 + 1 < (h)->used; _j = _r) { \
+ _r = _j * 2 + 1; \
+ if (_r + 1 < (h)->used && \
+ cmp((h)->data[_r], (h)->data[_r + 1])) \
+ _r++; \
+ \
+ if (cmp((h)->data[_r], (h)->data[_j])) \
+ break; \
+ heap_swap(h, _r, _j); \
+ } \
+} while (0)
+
+#define heap_sift_down(h, i, cmp) \
+do { \
+ while (i) { \
+ size_t p = (i - 1) / 2; \
+ if (cmp((h)->data[i], (h)->data[p])) \
+ break; \
+ heap_swap(h, i, p); \
+ i = p; \
+ } \
+} while (0)
+
+#define heap_add(h, d, cmp) \
+({ \
+ bool _r = !heap_full(h); \
+ if (_r) { \
+ size_t _i = (h)->used++; \
+ (h)->data[_i] = d; \
+ \
+ heap_sift_down(h, _i, cmp); \
+ heap_sift(h, _i, cmp); \
+ } \
+ _r; \
+})
+
+#define heap_del(h, i, cmp) \
+do { \
+ size_t _i = (i); \
+ \
+ BUG_ON(_i >= (h)->used); \
+ (h)->used--; \
+ if ((_i) < (h)->used) { \
+ heap_swap(h, _i, (h)->used); \
+ heap_sift_down(h, _i, cmp); \
+ heap_sift(h, _i, cmp); \
+ } \
+} while (0)
+
+#define heap_pop(h, d, cmp) \
+({ \
+ bool _r = (h)->used; \
+ if (_r) { \
+ (d) = (h)->data[0]; \
+ heap_del(h, 0, cmp); \
+ } \
+ _r; \
+})
+
+#define heap_peek(h) ((h)->used ? &(h)->data[0] : NULL)
+#define heap_full(h) ((h)->used == (h)->size)
+#define heap_empty(h) ((h)->used == 0)
+
+#define heap_resort(heap, cmp) \
+do { \
+ ssize_t _i; \
+ for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
+ heap_sift(heap, _i, cmp); \
+} while (0)
+
+struct pending_timer {
+ struct timer_list *timer;
+ unsigned long expires;
+};
+
+static inline bool pending_timer_cmp(struct pending_timer a,
+ struct pending_timer b)
+{
+ return a.expires < b.expires;
+}
+
+static DECLARE_HEAP(struct pending_timer) pending_timers;
+
+static pthread_mutex_t timer_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t timer_cond = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t timer_running_cond = PTHREAD_COND_INITIALIZER;
+static unsigned long timer_seq;
+
+static inline bool timer_running(void)
+{
+ return timer_seq & 1;
+}
+
+static ssize_t timer_idx(struct timer_list *timer)
+{
+ size_t i;
+
+ for (i = 0; i < pending_timers.used; i++)
+ if (pending_timers.data[i].timer == timer)
+ return i;
+
+ return -1;
+}
+
+int del_timer(struct timer_list *timer)
+{
+ ssize_t idx;
+
+ pthread_mutex_lock(&timer_lock);
+ idx = timer_idx(timer);
+ if (idx >= 0)
+ heap_del(&pending_timers, idx, pending_timer_cmp);
+
+ timer->pending = false;
+ pthread_mutex_unlock(&timer_lock);
+
+ return idx >= 0;
+}
+
+void flush_timers(void)
+{
+ unsigned long seq;
+
+ pthread_mutex_lock(&timer_lock);
+ seq = timer_seq;
+ while (timer_running() && seq == timer_seq)
+ pthread_cond_wait(&timer_running_cond, &timer_lock);
+
+ pthread_mutex_unlock(&timer_lock);
+}
+
+int del_timer_sync(struct timer_list *timer)
+{
+ unsigned long seq;
+ ssize_t idx;
+
+ pthread_mutex_lock(&timer_lock);
+ idx = timer_idx(timer);
+ if (idx >= 0)
+ heap_del(&pending_timers, idx, pending_timer_cmp);
+
+ timer->pending = false;
+
+ seq = timer_seq;
+ while (timer_running() && seq == timer_seq)
+ pthread_cond_wait(&timer_running_cond, &timer_lock);
+ pthread_mutex_unlock(&timer_lock);
+
+ return idx >= 0;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+ ssize_t idx;
+
+ pthread_mutex_lock(&timer_lock);
+ timer->expires = expires;
+ timer->pending = true;
+ idx = timer_idx(timer);
+
+ if (idx >= 0 &&
+ pending_timers.data[idx].expires == expires)
+ goto out;
+
+ if (idx >= 0) {
+ pending_timers.data[idx].expires = expires;
+
+ heap_sift_down(&pending_timers, idx, pending_timer_cmp);
+ heap_sift(&pending_timers, idx, pending_timer_cmp);
+ } else {
+ if (heap_full(&pending_timers)) {
+ pending_timers.size *= 2;
+ pending_timers.data =
+ realloc(pending_timers.data,
+ pending_timers.size *
+ sizeof(struct pending_timer));
+
+ BUG_ON(!pending_timers.data);
+ }
+
+ heap_add(&pending_timers,
+ ((struct pending_timer) {
+ .timer = timer,
+ .expires = expires,
+ }),
+ pending_timer_cmp);
+ }
+
+ pthread_cond_signal(&timer_cond);
+out:
+ pthread_mutex_unlock(&timer_lock);
+
+ return idx >= 0;
+}
+
+static bool timer_thread_stop = false;
+
+static int timer_thread(void *arg)
+{
+ struct pending_timer *p;
+ struct timespec ts;
+ unsigned long now;
+ int ret;
+
+ pthread_mutex_lock(&timer_lock);
+
+ while (!timer_thread_stop) {
+ now = jiffies;
+ p = heap_peek(&pending_timers);
+
+ if (!p) {
+ pthread_cond_wait(&timer_cond, &timer_lock);
+ continue;
+ }
+
+ if (time_after_eq(now, p->expires)) {
+ struct timer_list *timer = p->timer;
+
+ heap_del(&pending_timers, 0, pending_timer_cmp);
+ BUG_ON(!timer_pending(timer));
+ timer->pending = false;
+
+ timer_seq++;
+ BUG_ON(!timer_running());
+
+ pthread_mutex_unlock(&timer_lock);
+ timer->function(timer);
+ pthread_mutex_lock(&timer_lock);
+
+ timer_seq++;
+ pthread_cond_broadcast(&timer_running_cond);
+ continue;
+ }
+
+
+ ret = clock_gettime(CLOCK_REALTIME, &ts);
+ BUG_ON(ret);
+
+ ts = timespec_add_ns(ts, jiffies_to_nsecs(p->expires - now));
+
+ pthread_cond_timedwait(&timer_cond, &timer_lock, &ts);
+ }
+
+ pthread_mutex_unlock(&timer_lock);
+
+ return 0;
+}
+
+struct task_struct *timer_task;
+
+__attribute__((constructor(103)))
+static void timers_init(void)
+{
+ heap_init(&pending_timers, 64);
+ BUG_ON(!pending_timers.data);
+
+ timer_task = kthread_run(timer_thread, NULL, "timers");
+ BUG_ON(IS_ERR(timer_task));
+}
+
+__attribute__((destructor(103)))
+static void timers_cleanup(void)
+{
+ get_task_struct(timer_task);
+
+ pthread_mutex_lock(&timer_lock);
+ timer_thread_stop = true;
+ pthread_cond_signal(&timer_cond);
+ pthread_mutex_unlock(&timer_lock);
+
+ int ret = kthread_stop(timer_task);
+ BUG_ON(ret);
+
+ put_task_struct(timer_task);
+ timer_task = NULL;
+}
diff --git a/linux/wait.c b/linux/wait.c
new file mode 100644
index 00000000..b1f002b9
--- /dev/null
+++ b/linux/wait.c
@@ -0,0 +1,250 @@
+/*
+ * Generic waiting primitives.
+ *
+ * (C) 2004 Nadia Yvette Chambers, Oracle
+ */
+
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+static inline int waitqueue_active(wait_queue_head_t *q)
+{
+ return !list_empty(&q->task_list);
+}
+
+static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+{
+ list_add(&new->task_list, &head->task_list);
+}
+
+static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+ wait_queue_t *new)
+{
+ list_add_tail(&new->task_list, &head->task_list);
+}
+
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ __add_wait_queue_tail(q, wait);
+}
+
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+{
+ list_del(&old->task_list);
+}
+
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, int wake_flags, void *key)
+{
+ wait_queue_t *curr, *next;
+
+ list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+ unsigned flags = curr->flags;
+
+ if (curr->func(curr, mode, wake_flags, key) &&
+ (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+ break;
+ }
+}
+
+static void __wake_up(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, void *key)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->lock, flags);
+ __wake_up_common(q, mode, nr_exclusive, 0, key);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void wake_up(wait_queue_head_t *q)
+{
+ __wake_up(q, TASK_NORMAL, 1, NULL);
+}
+
+void wake_up_all(wait_queue_head_t *q)
+{
+ __wake_up(q, TASK_NORMAL, 0, NULL);
+}
+
+static void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+{
+ __wake_up_common(q, mode, nr, 0, NULL);
+}
+
+void
+prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue(q, wait);
+ set_current_state(state);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+static void
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue_tail(q, wait);
+ set_current_state(state);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ /*
+ * We can check for list emptiness outside the lock
+ * IFF:
+ * - we use the "careful" check that verifies both
+ * the next and prev pointers, so that there cannot
+ * be any half-pending updates in progress on other
+ * CPU's that we haven't seen yet (and that might
+ * still change the stack area.
+ * and
+ * - all other users take the lock (ie we can only
+ * have _one_ other CPU that looks at or modifies
+ * the list).
+ */
+ if (!list_empty_careful(&wait->task_list)) {
+ spin_lock_irqsave(&q->lock, flags);
+ list_del_init(&wait->task_list);
+ spin_unlock_irqrestore(&q->lock, flags);
+ }
+}
+
+int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+ void *key)
+{
+ return wake_up_process(curr->private);
+}
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ int ret = default_wake_function(wait, mode, sync, key);
+
+ if (ret)
+ list_del_init(&wait->task_list);
+ return ret;
+}
+
+struct wait_bit_key {
+ void *flags;
+ int bit_nr;
+ unsigned long timeout;
+};
+
+struct wait_bit_queue {
+ struct wait_bit_key key;
+ wait_queue_t wait;
+};
+
+static int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+ struct wait_bit_key *key = arg;
+ struct wait_bit_queue *wait_bit =
+ container_of(wait, struct wait_bit_queue, wait);
+
+ return (wait_bit->key.flags == key->flags &&
+ wait_bit->key.bit_nr == key->bit_nr &&
+ !test_bit(key->bit_nr, key->flags))
+ ? autoremove_wake_function(wait, mode, sync, key) : 0;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(bit_wq);
+
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
+ { .flags = word, .bit_nr = bit, }
+
+#define DEFINE_WAIT_BIT(name, word, bit) \
+ struct wait_bit_queue name = { \
+ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
+ .wait = { \
+ .private = current, \
+ .func = wake_bit_function, \
+ .task_list = \
+ LIST_HEAD_INIT((name).wait.task_list), \
+ }, \
+ }
+
+void wake_up_bit(void *word, int bit)
+{
+ struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+
+ if (waitqueue_active(&bit_wq))
+ __wake_up(&bit_wq, TASK_NORMAL, 1, &key);
+}
+
+void __wait_on_bit(void *word, int bit, unsigned mode)
+{
+ DEFINE_WAIT_BIT(wait, word, bit);
+
+ do {
+ prepare_to_wait(&bit_wq, &wait.wait, mode);
+ if (test_bit(wait.key.bit_nr, wait.key.flags))
+ schedule();
+ } while (test_bit(wait.key.bit_nr, wait.key.flags));
+
+ finish_wait(&bit_wq, &wait.wait);
+}
+
+void __wait_on_bit_lock(void *word, int bit, unsigned mode)
+{
+ DEFINE_WAIT_BIT(wait, word, bit);
+
+ do {
+ prepare_to_wait_exclusive(&bit_wq, &wait.wait, mode);
+ if (!test_bit(wait.key.bit_nr, wait.key.flags))
+ continue;
+ schedule();
+ } while (test_and_set_bit(wait.key.bit_nr, wait.key.flags));
+ finish_wait(&bit_wq, &wait.wait);
+}
+
+void complete(struct completion *x)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&x->wait.lock, flags);
+ x->done++;
+ __wake_up_locked(&x->wait, TASK_NORMAL, 1);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+
+void wait_for_completion(struct completion *x)
+{
+ spin_lock_irq(&x->wait.lock);
+
+ if (!x->done) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
+ do {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&x->wait.lock);
+
+ schedule();
+ spin_lock_irq(&x->wait.lock);
+ } while (!x->done);
+ __remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ goto out;
+ }
+ x->done--;
+out:
+ spin_unlock_irq(&x->wait.lock);
+}
diff --git a/linux/workqueue.c b/linux/workqueue.c
new file mode 100644
index 00000000..0d5af3fb
--- /dev/null
+++ b/linux/workqueue.c
@@ -0,0 +1,346 @@
+#include <pthread.h>
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+static pthread_mutex_t wq_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t work_finished = PTHREAD_COND_INITIALIZER;
+static LIST_HEAD(wq_list);
+
+struct workqueue_struct {
+ struct list_head list;
+
+ struct work_struct *current_work;
+ struct list_head pending_work;
+
+ struct task_struct *worker;
+ char name[24];
+};
+
+enum {
+ WORK_PENDING_BIT,
+};
+
+static bool work_pending(struct work_struct *work)
+{
+ return test_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static void clear_work_pending(struct work_struct *work)
+{
+ clear_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static bool set_work_pending(struct work_struct *work)
+{
+ return !test_and_set_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static void __queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ BUG_ON(!work_pending(work));
+ BUG_ON(!list_empty(&work->entry));
+
+ list_add_tail(&work->entry, &wq->pending_work);
+ wake_up_process(wq->worker);
+}
+
+bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ if ((ret = set_work_pending(work)))
+ __queue_work(wq, work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+void delayed_work_timer_fn(struct timer_list *timer)
+{
+ struct delayed_work *dwork =
+ container_of(timer, struct delayed_work, timer);
+
+ pthread_mutex_lock(&wq_lock);
+ __queue_work(dwork->wq, &dwork->work);
+ pthread_mutex_unlock(&wq_lock);
+}
+
+static void __queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct timer_list *timer = &dwork->timer;
+ struct work_struct *work = &dwork->work;
+
+ BUG_ON(timer->function != delayed_work_timer_fn);
+ BUG_ON(timer_pending(timer));
+ BUG_ON(!list_empty(&work->entry));
+
+ if (!delay) {
+ __queue_work(wq, &dwork->work);
+ } else {
+ dwork->wq = wq;
+ timer->expires = jiffies + delay;
+ add_timer(timer);
+ }
+}
+
+bool queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ if ((ret = set_work_pending(work)))
+ __queue_delayed_work(wq, dwork, delay);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+static bool grab_pending(struct work_struct *work, bool is_dwork)
+{
+retry:
+ if (set_work_pending(work)) {
+ BUG_ON(!list_empty(&work->entry));
+ return false;
+ }
+
+ if (is_dwork) {
+ struct delayed_work *dwork = to_delayed_work(work);
+
+ if (likely(del_timer(&dwork->timer))) {
+ BUG_ON(!list_empty(&work->entry));
+ return true;
+ }
+ }
+
+ if (!list_empty(&work->entry)) {
+ list_del_init(&work->entry);
+ return true;
+ }
+
+ BUG_ON(!is_dwork);
+
+ pthread_mutex_unlock(&wq_lock);
+ flush_timers();
+ pthread_mutex_lock(&wq_lock);
+ goto retry;
+}
+
+static bool work_running(struct work_struct *work)
+{
+ struct workqueue_struct *wq;
+
+ list_for_each_entry(wq, &wq_list, list)
+ if (wq->current_work == work)
+ return true;
+
+ return false;
+}
+
+bool flush_work(struct work_struct *work)
+{
+ bool ret = false;
+
+ pthread_mutex_lock(&wq_lock);
+ while (work_pending(work) || work_running(work)) {
+ pthread_cond_wait(&work_finished, &wq_lock);
+ ret = true;
+ }
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+static bool __flush_work(struct work_struct *work)
+{
+ bool ret = false;
+
+ while (work_running(work)) {
+ pthread_cond_wait(&work_finished, &wq_lock);
+ ret = true;
+ }
+
+ return ret;
+}
+
+bool cancel_work_sync(struct work_struct *work)
+{
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, false);
+
+ __flush_work(work);
+ clear_work_pending(work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+bool mod_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, true);
+
+ __queue_delayed_work(wq, dwork, delay);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+bool cancel_delayed_work(struct delayed_work *dwork)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, true);
+
+ clear_work_pending(&dwork->work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+bool cancel_delayed_work_sync(struct delayed_work *dwork)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, true);
+
+ __flush_work(work);
+ clear_work_pending(work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+static int worker_thread(void *arg)
+{
+ struct workqueue_struct *wq = arg;
+ struct work_struct *work;
+
+ pthread_mutex_lock(&wq_lock);
+ while (1) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ work = list_first_entry_or_null(&wq->pending_work,
+ struct work_struct, entry);
+ wq->current_work = work;
+
+ if (kthread_should_stop()) {
+ BUG_ON(wq->current_work);
+ break;
+ }
+
+ if (!work) {
+ pthread_mutex_unlock(&wq_lock);
+ schedule();
+ pthread_mutex_lock(&wq_lock);
+ continue;
+ }
+
+ BUG_ON(!work_pending(work));
+ list_del_init(&work->entry);
+ clear_work_pending(work);
+
+ pthread_mutex_unlock(&wq_lock);
+ work->func(work);
+ pthread_mutex_lock(&wq_lock);
+
+ pthread_cond_broadcast(&work_finished);
+ }
+ pthread_mutex_unlock(&wq_lock);
+
+ return 0;
+}
+
+void destroy_workqueue(struct workqueue_struct *wq)
+{
+ kthread_stop(wq->worker);
+
+ pthread_mutex_lock(&wq_lock);
+ list_del(&wq->list);
+ pthread_mutex_unlock(&wq_lock);
+
+ kfree(wq);
+}
+
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+ unsigned flags,
+ int max_active,
+ ...)
+{
+ va_list args;
+ struct workqueue_struct *wq;
+
+ wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (!wq)
+ return NULL;
+
+ INIT_LIST_HEAD(&wq->list);
+ INIT_LIST_HEAD(&wq->pending_work);
+
+ va_start(args, max_active);
+ vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+ va_end(args);
+
+ wq->worker = kthread_run(worker_thread, wq, "%s", wq->name);
+ if (IS_ERR(wq->worker)) {
+ kfree(wq);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&wq_lock);
+ list_add(&wq->list, &wq_list);
+ pthread_mutex_unlock(&wq_lock);
+
+ return wq;
+}
+
+struct workqueue_struct *system_wq;
+struct workqueue_struct *system_highpri_wq;
+struct workqueue_struct *system_long_wq;
+struct workqueue_struct *system_unbound_wq;
+struct workqueue_struct *system_freezable_wq;
+
+__attribute__((constructor(102)))
+static void wq_init(void)
+{
+ system_wq = alloc_workqueue("events", 0, 0);
+ system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
+ system_long_wq = alloc_workqueue("events_long", 0, 0);
+ system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ system_freezable_wq = alloc_workqueue("events_freezable",
+ WQ_FREEZABLE, 0);
+ BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
+ !system_unbound_wq || !system_freezable_wq);
+}
+
+__attribute__((destructor(102)))
+static void wq_cleanup(void)
+{
+ destroy_workqueue(system_freezable_wq);
+ destroy_workqueue(system_unbound_wq);
+ destroy_workqueue(system_long_wq);
+ destroy_workqueue(system_highpri_wq);
+ destroy_workqueue(system_wq);
+
+ system_wq = system_highpri_wq = system_long_wq = system_unbound_wq =
+ system_freezable_wq = NULL;
+}
diff --git a/linux/xxhash.c b/linux/xxhash.c
new file mode 100644
index 00000000..d5bb9ff1
--- /dev/null
+++ b/linux/xxhash.c
@@ -0,0 +1,500 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#include <asm/unaligned.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/xxhash.h>
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
+
+#ifdef __LITTLE_ENDIAN
+# define XXH_CPU_LITTLE_ENDIAN 1
+#else
+# define XXH_CPU_LITTLE_ENDIAN 0
+#endif
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 = 668265263U;
+static const uint32_t PRIME32_5 = 374761393U;
+
+static const uint64_t PRIME64_1 = 11400714785074694791ULL;
+static const uint64_t PRIME64_2 = 14029467366897019727ULL;
+static const uint64_t PRIME64_3 = 1609587929392839161ULL;
+static const uint64_t PRIME64_4 = 9650029242287828579ULL;
+static const uint64_t PRIME64_5 = 2870177450012600261ULL;
+
+/*-**************************
+ * Utils
+ ***************************/
+void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+EXPORT_SYMBOL(xxh32_copy_state);
+
+void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+EXPORT_SYMBOL(xxh64_copy_state);
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+ seed += input * PRIME32_2;
+ seed = xxh_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *b_end = p + len;
+ uint32_t h32;
+
+ if (len >= 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+ uint32_t v2 = seed + PRIME32_2;
+ uint32_t v3 = seed + 0;
+ uint32_t v4 = seed - PRIME32_1;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (uint32_t)len;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+EXPORT_SYMBOL(xxh32);
+
+static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
+{
+ acc += input * PRIME64_2;
+ acc = xxh_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
+{
+ val = xxh64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+ uint64_t h64;
+
+ if (len >= 32) {
+ const uint8_t *const limit = b_end - 32;
+ uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
+ uint64_t v2 = seed + PRIME64_2;
+ uint64_t v3 = seed + 0;
+ uint64_t v4 = seed - PRIME64_1;
+
+ do {
+ v1 = xxh64_round(v1, get_unaligned_le64(p));
+ p += 8;
+ v2 = xxh64_round(v2, get_unaligned_le64(p));
+ p += 8;
+ v3 = xxh64_round(v3, get_unaligned_le64(p));
+ p += 8;
+ v4 = xxh64_round(v4, get_unaligned_le64(p));
+ p += 8;
+ } while (p <= limit);
+
+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+ h64 = xxh64_merge_round(h64, v1);
+ h64 = xxh64_merge_round(h64, v2);
+ h64 = xxh64_merge_round(h64, v3);
+ h64 = xxh64_merge_round(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (uint64_t)len;
+
+ while (p + 8 <= b_end) {
+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+ h64 ^= k1;
+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+ p += 8;
+ }
+
+ if (p + 4 <= b_end) {
+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+EXPORT_SYMBOL(xxh64);
+
+/*-**************************************************
+ * Advanced Hash Functions
+ ***************************************************/
+void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
+{
+ /* use a local state for memcpy() to avoid strict-aliasing warnings */
+ struct xxh32_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ memcpy(statePtr, &state, sizeof(state));
+}
+EXPORT_SYMBOL(xxh32_reset);
+
+void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
+{
+ /* use a local state for memcpy() to avoid strict-aliasing warnings */
+ struct xxh64_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ memcpy(statePtr, &state, sizeof(state));
+}
+EXPORT_SYMBOL(xxh64_reset);
+
+int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+
+ if (input == NULL)
+ return -EINVAL;
+
+ state->total_len_32 += (uint32_t)len;
+ state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
+ state->memsize += (uint32_t)len;
+ return 0;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ const uint32_t *p32 = state->mem32;
+
+ memcpy((uint8_t *)(state->mem32) + state->memsize, input,
+ 16 - state->memsize);
+
+ state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
+ p32++;
+ state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
+ p32++;
+ state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
+ p32++;
+ state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
+ p32++;
+
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= b_end - 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = state->v1;
+ uint32_t v2 = state->v2;
+ uint32_t v3 = state->v3;
+ uint32_t v4 = state->v4;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < b_end) {
+ memcpy(state->mem32, p, (size_t)(b_end-p));
+ state->memsize = (uint32_t)(b_end-p);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(xxh32_update);
+
+uint32_t xxh32_digest(const struct xxh32_state *state)
+{
+ const uint8_t *p = (const uint8_t *)state->mem32;
+ const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
+ state->memsize;
+ uint32_t h32;
+
+ if (state->large_len) {
+ h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
+ xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+EXPORT_SYMBOL(xxh32_digest);
+
+int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+
+ if (input == NULL)
+ return -EINVAL;
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
+ state->memsize += (uint32_t)len;
+ return 0;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ uint64_t *p64 = state->mem64;
+
+ memcpy(((uint8_t *)p64) + state->memsize, input,
+ 32 - state->memsize);
+
+ state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
+ p64++;
+ state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
+ p64++;
+ state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
+ p64++;
+ state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
+
+ p += 32 - state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p + 32 <= b_end) {
+ const uint8_t *const limit = b_end - 32;
+ uint64_t v1 = state->v1;
+ uint64_t v2 = state->v2;
+ uint64_t v3 = state->v3;
+ uint64_t v4 = state->v4;
+
+ do {
+ v1 = xxh64_round(v1, get_unaligned_le64(p));
+ p += 8;
+ v2 = xxh64_round(v2, get_unaligned_le64(p));
+ p += 8;
+ v3 = xxh64_round(v3, get_unaligned_le64(p));
+ p += 8;
+ v4 = xxh64_round(v4, get_unaligned_le64(p));
+ p += 8;
+ } while (p <= limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < b_end) {
+ memcpy(state->mem64, p, (size_t)(b_end-p));
+ state->memsize = (uint32_t)(b_end - p);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(xxh64_update);
+
+uint64_t xxh64_digest(const struct xxh64_state *state)
+{
+ const uint8_t *p = (const uint8_t *)state->mem64;
+ const uint8_t *const b_end = (const uint8_t *)state->mem64 +
+ state->memsize;
+ uint64_t h64;
+
+ if (state->total_len >= 32) {
+ const uint64_t v1 = state->v1;
+ const uint64_t v2 = state->v2;
+ const uint64_t v3 = state->v3;
+ const uint64_t v4 = state->v4;
+
+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+ h64 = xxh64_merge_round(h64, v1);
+ h64 = xxh64_merge_round(h64, v2);
+ h64 = xxh64_merge_round(h64, v3);
+ h64 = xxh64_merge_round(h64, v4);
+ } else {
+ h64 = state->v3 + PRIME64_5;
+ }
+
+ h64 += (uint64_t)state->total_len;
+
+ while (p + 8 <= b_end) {
+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+ h64 ^= k1;
+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+ p += 8;
+ }
+
+ if (p + 4 <= b_end) {
+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+EXPORT_SYMBOL(xxh64_digest);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("xxHash");
diff --git a/linux/zstd_compress_module.c b/linux/zstd_compress_module.c
new file mode 100644
index 00000000..35cc5cba
--- /dev/null
+++ b/linux/zstd_compress_module.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#define ZSTD_FORWARD_IF_ERR(ret) \
+ do { \
+ size_t const __ret = (ret); \
+ if (ZSTD_isError(__ret)) \
+ return __ret; \
+ } while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+ unsigned long long pledged_src_size)
+{
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+ cctx, ZSTD_reset_session_and_parameters));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+ cctx, pledged_src_size));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+ return 0;
+}
+
+int zstd_min_clevel(void)
+{
+ return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+ return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+ return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+ unsigned long long estimated_src_size)
+{
+ return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+ return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+ ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+ return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+ return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+ unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+ zstd_cstream *cstream;
+
+ if (workspace == NULL)
+ return NULL;
+
+ cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+ if (cstream == NULL)
+ return NULL;
+
+ /* 0 means unknown in linux zstd API but means 0 in new zstd API */
+ if (pledged_src_size == 0)
+ pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+ if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+ return NULL;
+
+ return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+ unsigned long long pledged_src_size)
+{
+ return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+ return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+ return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/linux/zstd_decompress_module.c b/linux/zstd_decompress_module.c
new file mode 100644
index 00000000..7e8cd446
--- /dev/null
+++ b/linux/zstd_decompress_module.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+ return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+ return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+ return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+ return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size)
+{
+ return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+ return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+ size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ (void)max_window_size;
+ return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+ return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+ return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+ size_t src_size)
+{
+ return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");