diff options
Diffstat (limited to 'c_src/include')
135 files changed, 13286 insertions, 0 deletions
diff --git a/c_src/include/asm/page.h b/c_src/include/asm/page.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/asm/page.h diff --git a/c_src/include/asm/unaligned.h b/c_src/include/asm/unaligned.h new file mode 100644 index 00000000..e695bede --- /dev/null +++ b/c_src/include/asm/unaligned.h @@ -0,0 +1,20 @@ +#ifndef _ASM_UNALIGNED_H +#define _ASM_UNALIGNED_H + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# include <linux/unaligned/le_struct.h> +# include <linux/unaligned/be_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# include <linux/unaligned/be_struct.h> +# include <linux/unaligned/le_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#else +# error need to define endianess +#endif + +#endif /* _ASM_UNALIGNED_H */ diff --git a/c_src/include/crypto/algapi.h b/c_src/include/crypto/algapi.h new file mode 100644 index 00000000..5fd3524a --- /dev/null +++ b/c_src/include/crypto/algapi.h @@ -0,0 +1,7 @@ +#ifndef _CRYPTO_ALGAPI_H +#define _CRYPTO_ALGAPI_H + +#include <linux/crypto.h> +#include <crypto/skcipher.h> + +#endif /* _CRYPTO_ALGAPI_H */ diff --git a/c_src/include/crypto/chacha.h b/c_src/include/crypto/chacha.h new file mode 100644 index 00000000..f004cfb5 --- /dev/null +++ b/c_src/include/crypto/chacha.h @@ -0,0 +1,15 @@ +/* + * Common values for the ChaCha20 algorithm + */ + +#ifndef _CRYPTO_CHACHA20_H +#define _CRYPTO_CHACHA20_H + +#include <linux/types.h> +#include <linux/crypto.h> + +#define CHACHA_IV_SIZE 16 +#define CHACHA_KEY_SIZE 32 +#define CHACHA_BLOCK_SIZE 64 + +#endif diff --git a/c_src/include/crypto/hash.h b/c_src/include/crypto/hash.h new file mode 100644 index 00000000..a74f3618 --- /dev/null +++ b/c_src/include/crypto/hash.h @@ -0,0 +1,104 @@ +/* + * Hash: Hash algorithms under the crypto API + * + * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_HASH_H +#define _CRYPTO_HASH_H + +#include <linux/crypto.h> + +struct shash_desc; + +struct shash_alg { + int (*init)(struct shash_desc *desc); + int (*update)(struct shash_desc *desc, const u8 *data, unsigned len); + int (*final)(struct shash_desc *desc, u8 *out); + int (*finup)(struct shash_desc *desc, const u8 *data, + unsigned len, u8 *out); + int (*digest)(struct shash_desc *desc, const u8 *data, + unsigned len, u8 *out); + + unsigned descsize; + unsigned digestsize; + struct crypto_alg base; +}; + +int crypto_register_shash(struct shash_alg *alg); + +struct crypto_shash { + unsigned descsize; + struct crypto_tfm base; +}; + +struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type, + u32 mask); + +static inline void crypto_free_shash(struct crypto_shash *tfm) +{ + kfree(tfm); +} + +static inline struct shash_alg *crypto_shash_alg(struct crypto_shash *tfm) +{ + return container_of(tfm->base.alg, struct shash_alg, base); +} + +static inline unsigned crypto_shash_digestsize(struct crypto_shash *tfm) +{ + return crypto_shash_alg(tfm)->digestsize; +} + +static inline unsigned crypto_shash_descsize(struct crypto_shash *tfm) +{ + return tfm->descsize; +} + +struct shash_desc { + struct crypto_shash *tfm; + u32 flags; + + void *ctx[] CRYPTO_MINALIGN_ATTR; +}; + +#define SHASH_DESC_ON_STACK(shash, tfm) \ + char __##shash##_desc[sizeof(struct shash_desc) + \ + crypto_shash_descsize(tfm)] CRYPTO_MINALIGN_ATTR; \ + struct shash_desc *shash = (struct shash_desc *)__##shash##_desc + +static inline int crypto_shash_init(struct shash_desc *desc) +{ + return crypto_shash_alg(desc->tfm)->init(desc); +} + +static inline int crypto_shash_update(struct shash_desc *desc, + const u8 *data, unsigned len) +{ + return crypto_shash_alg(desc->tfm)->update(desc, data, len); +} + +static inline int crypto_shash_final(struct shash_desc *desc, u8 *out) +{ + return crypto_shash_alg(desc->tfm)->final(desc, out); +} + +static inline int crypto_shash_finup(struct shash_desc *desc, const u8 *data, + unsigned len, u8 *out) +{ + return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out); +} + +static inline int crypto_shash_digest(struct shash_desc *desc, const u8 *data, + unsigned len, u8 *out) +{ + return crypto_shash_alg(desc->tfm)->digest(desc, data, len, out); +} + +#endif /* _CRYPTO_HASH_H */ diff --git a/c_src/include/crypto/poly1305.h b/c_src/include/crypto/poly1305.h new file mode 100644 index 00000000..9fcfbfeb --- /dev/null +++ b/c_src/include/crypto/poly1305.h @@ -0,0 +1,13 @@ +/* + * Common values for the Poly1305 algorithm + */ + +#ifndef _CRYPTO_POLY1305_H +#define _CRYPTO_POLY1305_H + +#include <sodium/crypto_onetimeauth_poly1305.h> + +#define POLY1305_KEY_SIZE crypto_onetimeauth_poly1305_KEYBYTES +#define POLY1305_DIGEST_SIZE crypto_onetimeauth_poly1305_BYTES + +#endif diff --git a/c_src/include/crypto/sha2.h b/c_src/include/crypto/sha2.h new file mode 100644 index 00000000..8a46202b --- /dev/null +++ b/c_src/include/crypto/sha2.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values for SHA algorithms + */ + +#ifndef _CRYPTO_SHA_H +#define _CRYPTO_SHA_H + +#include <linux/types.h> + +#define SHA1_DIGEST_SIZE 20 +#define SHA1_BLOCK_SIZE 64 + +#define SHA224_DIGEST_SIZE 28 +#define SHA224_BLOCK_SIZE 64 + +#define SHA256_DIGEST_SIZE 32 +#define SHA256_BLOCK_SIZE 64 + +#define SHA384_DIGEST_SIZE 48 +#define SHA384_BLOCK_SIZE 128 + +#define SHA512_DIGEST_SIZE 64 +#define SHA512_BLOCK_SIZE 128 + +#define SHA1_H0 0x67452301UL +#define SHA1_H1 0xefcdab89UL +#define SHA1_H2 0x98badcfeUL +#define SHA1_H3 0x10325476UL +#define SHA1_H4 0xc3d2e1f0UL + +#define SHA224_H0 0xc1059ed8UL +#define SHA224_H1 0x367cd507UL +#define SHA224_H2 0x3070dd17UL +#define SHA224_H3 0xf70e5939UL +#define SHA224_H4 0xffc00b31UL +#define SHA224_H5 0x68581511UL +#define SHA224_H6 0x64f98fa7UL +#define SHA224_H7 0xbefa4fa4UL + +#define SHA256_H0 0x6a09e667UL +#define SHA256_H1 0xbb67ae85UL +#define SHA256_H2 0x3c6ef372UL +#define SHA256_H3 0xa54ff53aUL +#define SHA256_H4 0x510e527fUL +#define SHA256_H5 0x9b05688cUL +#define SHA256_H6 0x1f83d9abUL +#define SHA256_H7 0x5be0cd19UL + +#define SHA384_H0 0xcbbb9d5dc1059ed8ULL +#define SHA384_H1 0x629a292a367cd507ULL +#define SHA384_H2 0x9159015a3070dd17ULL +#define SHA384_H3 0x152fecd8f70e5939ULL +#define SHA384_H4 0x67332667ffc00b31ULL +#define SHA384_H5 0x8eb44a8768581511ULL +#define SHA384_H6 0xdb0c2e0d64f98fa7ULL +#define SHA384_H7 0x47b5481dbefa4fa4ULL + +#define SHA512_H0 0x6a09e667f3bcc908ULL +#define SHA512_H1 0xbb67ae8584caa73bULL +#define SHA512_H2 0x3c6ef372fe94f82bULL +#define SHA512_H3 0xa54ff53a5f1d36f1ULL +#define SHA512_H4 0x510e527fade682d1ULL +#define SHA512_H5 0x9b05688c2b3e6c1fULL +#define SHA512_H6 0x1f83d9abfb41bd6bULL +#define SHA512_H7 0x5be0cd19137e2179ULL + +extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE]; + +extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE]; + +extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE]; + +extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; + +extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; + +struct sha1_state { + u32 state[SHA1_DIGEST_SIZE / 4]; + u64 count; + u8 buffer[SHA1_BLOCK_SIZE]; +}; + +struct sha256_state { + u32 state[SHA256_DIGEST_SIZE / 4]; + u64 count; + u8 buf[SHA256_BLOCK_SIZE]; +}; + +struct sha512_state { + u64 state[SHA512_DIGEST_SIZE / 8]; + u64 count[2]; + u8 buf[SHA512_BLOCK_SIZE]; +}; + +struct shash_desc; + +extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); + +extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); + +extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); +#endif diff --git a/c_src/include/crypto/skcipher.h b/c_src/include/crypto/skcipher.h new file mode 100644 index 00000000..5989855d --- /dev/null +++ b/c_src/include/crypto/skcipher.h @@ -0,0 +1,126 @@ +/* + * Symmetric key ciphers. + * + * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_SKCIPHER_H +#define _CRYPTO_SKCIPHER_H + +#include <linux/crypto.h> + +struct crypto_skcipher; +struct skcipher_request; + +struct skcipher_alg { + struct crypto_alg base; +}; + +int crypto_register_skcipher(struct skcipher_alg *alg); + +struct crypto_skcipher { + int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct skcipher_request *req); + int (*decrypt)(struct skcipher_request *req); + + unsigned ivsize; + unsigned keysize; + + struct crypto_tfm base; +}; + +struct crypto_sync_skcipher { + struct crypto_skcipher base; +}; + +struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, + u32 type, u32 mask); + +static inline struct crypto_sync_skcipher * +crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask) +{ + return (void *) crypto_alloc_skcipher(alg_name, type, mask); +} + +static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) +{ + kfree(tfm); +} + +static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) +{ + crypto_free_skcipher(&tfm->base); +} + +struct skcipher_request { + unsigned cryptlen; + u8 *iv; + + struct scatterlist *src; + struct scatterlist *dst; + + struct crypto_tfm *tfm; +}; + +#define MAX_SYNC_SKCIPHER_REQSIZE 384 +#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \ + char __##name##_desc[sizeof(struct skcipher_request) + \ + MAX_SYNC_SKCIPHER_REQSIZE + \ + (!(sizeof((struct crypto_sync_skcipher *)1 == \ + (typeof(tfm))1))) \ + ] CRYPTO_MINALIGN_ATTR; \ + struct skcipher_request *name = (void *)__##name##_desc + +static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return tfm->setkey(tfm, key, keylen); +} + +static inline struct crypto_skcipher *crypto_skcipher_reqtfm( + struct skcipher_request *req) +{ + return container_of(req->tfm, struct crypto_skcipher, base); +} + +static inline int crypto_skcipher_encrypt(struct skcipher_request *req) +{ + return crypto_skcipher_reqtfm(req)->encrypt(req); +} + +static inline int crypto_skcipher_decrypt(struct skcipher_request *req) +{ + return crypto_skcipher_reqtfm(req)->decrypt(req); +} + +static inline void skcipher_request_set_tfm(struct skcipher_request *req, + struct crypto_skcipher *tfm) +{ + req->tfm = &tfm->base; +} + +static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req, + struct crypto_sync_skcipher *tfm) +{ + skcipher_request_set_tfm(req, &tfm->base); +} + +static inline void skcipher_request_set_crypt( + struct skcipher_request *req, + struct scatterlist *src, struct scatterlist *dst, + unsigned int cryptlen, void *iv) +{ + req->src = src; + req->dst = dst; + req->cryptlen = cryptlen; + req->iv = iv; +} + +#endif /* _CRYPTO_SKCIPHER_H */ diff --git a/c_src/include/keys/user-type.h b/c_src/include/keys/user-type.h new file mode 100644 index 00000000..a7a2ee45 --- /dev/null +++ b/c_src/include/keys/user-type.h @@ -0,0 +1,6 @@ +#ifndef _KEYS_USER_TYPE_H +#define _KEYS_USER_TYPE_H + +#include <linux/key.h> + +#endif /* _KEYS_USER_TYPE_H */ diff --git a/c_src/include/linux/atomic.h b/c_src/include/linux/atomic.h new file mode 100644 index 00000000..5313f850 --- /dev/null +++ b/c_src/include/linux/atomic.h @@ -0,0 +1,349 @@ +#ifndef __TOOLS_LINUX_ATOMIC_H +#define __TOOLS_LINUX_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> + +typedef struct { + int counter; +} atomic_t; + +typedef struct { + long counter; +} atomic_long_t; + +typedef struct { + u64 counter; +} atomic64_t; + +#ifndef C11_ATOMICS + +#include <urcu/uatomic.h> + +#if (CAA_BITS_PER_LONG != 64) +#define ATOMIC64_SPINLOCK +#endif + +#define __ATOMIC_READ(p) uatomic_read(p) +#define __ATOMIC_SET(p, v) uatomic_set(p, v) +#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v) +#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v) +#define __ATOMIC_ADD(v, p) uatomic_add(p, v) +#define __ATOMIC_SUB(v, p) uatomic_sub(p, v) +#define __ATOMIC_INC(p) uatomic_inc(p) +#define __ATOMIC_DEC(p) uatomic_dec(p) +#define __ATOMIC_AND(v, p) uatomic_and(p, v) +#define __ATOMIC_OR(v, p) uatomic_or(p, v) + +#define xchg(p, v) uatomic_xchg(p, v) +#define xchg_acquire(p, v) uatomic_xchg(p, v) +#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new) +#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new) +#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new) + +#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add() +#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add() +#define smp_wmb() cmm_smp_wmb() +#define smp_rmb() cmm_smp_rmb() +#define smp_mb() cmm_smp_mb() +#define smp_read_barrier_depends() cmm_smp_read_barrier_depends() +#define smp_acquire__after_ctrl_dep() cmm_smp_mb() + +#else /* C11_ATOMICS */ + +#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED) +#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED) +#define __ATOMIC_ADD_RETURN(v, p) __atomic_add_fetch(p, v, __ATOMIC_RELAXED) +#define __ATOMIC_ADD_RETURN_RELEASE(v, p) \ + __atomic_add_fetch(p, v, __ATOMIC_RELEASE) +#define __ATOMIC_SUB_RETURN(v, p) __atomic_sub_fetch(p, v, __ATOMIC_RELAXED) +#define __ATOMIC_SUB_RETURN_RELEASE(v, p) \ + __atomic_sub_fetch(p, v, __ATOMIC_RELEASE) +#define __ATOMIC_AND(p) __atomic_and_fetch(p, v, __ATOMIC_RELAXED) +#define __ATOMIC_OR(p) __atomic_or_fetch(p, v, __ATOMIC_RELAXED) + +#define xchg(p, v) __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST) +#define xchg_acquire(p, v) __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE) + +#define cmpxchg(p, old, new) \ +({ \ + typeof(*(p)) __old = (old); \ + \ + __atomic_compare_exchange_n((p), &__old, new, false, \ + __ATOMIC_SEQ_CST, \ + __ATOMIC_SEQ_CST); \ + __old; \ +}) + +#define cmpxchg_acquire(p, old, new) \ +({ \ + typeof(*(p)) __old = (old); \ + \ + __atomic_compare_exchange_n((p), &__old, new, false, \ + __ATOMIC_ACQUIRE, \ + __ATOMIC_ACQUIRE); \ + __old; \ +}) + +#define cmpxchg_release(p, old, new) \ +({ \ + typeof(*(p)) __old = (old); \ + \ + __atomic_compare_exchange_n((p), &__old, new, false, \ + __ATOMIC_RELEASE, \ + __ATOMIC_RELEASE); \ + __old; \ +}) + +#define smp_mb__before_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_mb__after_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_wmb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_read_barrier_depends() + +#endif + +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) + +#define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +/* atomic interface: */ + +#ifndef __ATOMIC_ADD +#define __ATOMIC_ADD(i, v) __ATOMIC_ADD_RETURN(i, v) +#endif + +#ifndef __ATOMIC_ADD_RETURN_RELEASE +#define __ATOMIC_ADD_RETURN_RELEASE(i, v) \ + ({ smp_mb__before_atomic(); __ATOMIC_ADD_RETURN(i, v); }) +#endif + +#ifndef __ATOMIC_SUB_RETURN_RELEASE +#define __ATOMIC_SUB_RETURN_RELEASE(i, v) \ + ({ smp_mb__before_atomic(); __ATOMIC_SUB_RETURN(i, v); }) +#endif + +#ifndef __ATOMIC_SUB +#define __ATOMIC_SUB(i, v) __ATOMIC_SUB_RETURN(i, v) +#endif + +#ifndef __ATOMIC_INC_RETURN +#define __ATOMIC_INC_RETURN(v) __ATOMIC_ADD_RETURN(1, v) +#endif + +#ifndef __ATOMIC_DEC_RETURN +#define __ATOMIC_DEC_RETURN(v) __ATOMIC_SUB_RETURN(1, v) +#endif + +#ifndef __ATOMIC_INC +#define __ATOMIC_INC(v) __ATOMIC_ADD(1, v) +#endif + +#ifndef __ATOMIC_DEC +#define __ATOMIC_DEC(v) __ATOMIC_SUB(1, v) +#endif + +#define DEF_ATOMIC_OPS(a_type, i_type) \ +static inline i_type a_type##_read(const a_type##_t *v) \ +{ \ + return __ATOMIC_READ(&v->counter); \ +} \ + \ +static inline i_type a_type##_read_acquire(const a_type##_t *v) \ +{ \ + i_type ret = __ATOMIC_READ(&v->counter); \ + smp_mb__after_atomic(); \ + return ret; \ +} \ + \ +static inline void a_type##_set(a_type##_t *v, i_type i) \ +{ \ + return __ATOMIC_SET(&v->counter, i); \ +} \ + \ +static inline i_type a_type##_add_return(i_type i, a_type##_t *v) \ +{ \ + return __ATOMIC_ADD_RETURN(i, &v->counter); \ +} \ + \ +static inline i_type a_type##_add_return_release(i_type i, a_type##_t *v)\ +{ \ + return __ATOMIC_ADD_RETURN_RELEASE(i, &v->counter); \ +} \ + \ +static inline i_type a_type##_sub_return_release(i_type i, a_type##_t *v)\ +{ \ + return __ATOMIC_SUB_RETURN_RELEASE(i, &v->counter); \ +} \ + \ +static inline i_type a_type##_sub_return(i_type i, a_type##_t *v) \ +{ \ + return __ATOMIC_SUB_RETURN(i, &v->counter); \ +} \ + \ +static inline void a_type##_add(i_type i, a_type##_t *v) \ +{ \ + __ATOMIC_ADD(i, &v->counter); \ +} \ + \ +static inline void a_type##_sub(i_type i, a_type##_t *v) \ +{ \ + __ATOMIC_SUB(i, &v->counter); \ +} \ + \ +static inline i_type a_type##_inc_return(a_type##_t *v) \ +{ \ + return __ATOMIC_INC_RETURN(&v->counter); \ +} \ + \ +static inline i_type a_type##_dec_return(a_type##_t *v) \ +{ \ + return __ATOMIC_DEC_RETURN(&v->counter); \ +} \ + \ +static inline i_type a_type##_dec_return_release(a_type##_t *v) \ +{ \ + return __ATOMIC_SUB_RETURN_RELEASE(1, &v->counter); \ +} \ + \ +static inline void a_type##_inc(a_type##_t *v) \ +{ \ + __ATOMIC_INC(&v->counter); \ +} \ + \ +static inline void a_type##_dec(a_type##_t *v) \ +{ \ + __ATOMIC_DEC(&v->counter); \ +} \ + \ +static inline bool a_type##_add_negative(i_type i, a_type##_t *v) \ +{ \ + return __ATOMIC_ADD_RETURN(i, &v->counter) < 0; \ +} \ + \ +static inline bool a_type##_sub_and_test(i_type i, a_type##_t *v) \ +{ \ + return __ATOMIC_SUB_RETURN(i, &v->counter) == 0; \ +} \ + \ +static inline bool a_type##_inc_and_test(a_type##_t *v) \ +{ \ + return __ATOMIC_INC_RETURN(&v->counter) == 0; \ +} \ + \ +static inline bool a_type##_dec_and_test(a_type##_t *v) \ +{ \ + return __ATOMIC_DEC_RETURN(&v->counter) == 0; \ +} \ + \ +static inline i_type a_type##_add_unless(a_type##_t *v, i_type a, i_type u)\ +{ \ + i_type old, c = __ATOMIC_READ(&v->counter); \ + while (c != u && (old = cmpxchg(&v->counter, c, c + a)) != c) \ + c = old; \ + return c; \ +} \ + \ +static inline bool a_type##_inc_not_zero(a_type##_t *v) \ +{ \ + return a_type##_add_unless(v, 1, 0); \ +} \ + \ +static inline void a_type##_and(i_type a, a_type##_t *v) \ +{ \ + __ATOMIC_AND(a, v); \ +} \ + \ +static inline void a_type##_or(i_type a, a_type##_t *v) \ +{ \ + __ATOMIC_OR(a, v); \ +} \ + \ +static inline i_type a_type##_xchg(a_type##_t *v, i_type i) \ +{ \ + return xchg(&v->counter, i); \ +} \ + \ +static inline i_type a_type##_cmpxchg(a_type##_t *v, i_type old, i_type new)\ +{ \ + return cmpxchg(&v->counter, old, new); \ +} \ + \ +static inline i_type a_type##_cmpxchg_acquire(a_type##_t *v, i_type old, i_type new)\ +{ \ + return cmpxchg_acquire(&v->counter, old, new); \ +} \ + \ +static inline bool a_type##_try_cmpxchg_acquire(a_type##_t *v, i_type *old, i_type new)\ +{ \ + i_type prev = *old; \ + *old = cmpxchg_acquire(&v->counter, *old, new); \ + return prev == *old; \ +} + +DEF_ATOMIC_OPS(atomic, int) +DEF_ATOMIC_OPS(atomic_long, long) + +#ifndef ATOMIC64_SPINLOCK +DEF_ATOMIC_OPS(atomic64, s64) +#else +s64 atomic64_read(const atomic64_t *v); +static inline s64 atomic64_read_acquire(const atomic64_t *v) +{ + s64 ret = atomic64_read(v); + smp_mb__after_atomic(); + return ret; +} + +void atomic64_set(atomic64_t *v, s64); + +s64 atomic64_add_return(s64, atomic64_t *); +s64 atomic64_sub_return(s64, atomic64_t *); +void atomic64_add(s64, atomic64_t *); +void atomic64_sub(s64, atomic64_t *); + +s64 atomic64_xchg(atomic64_t *, s64); +s64 atomic64_cmpxchg(atomic64_t *, s64, s64); + +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) +#define atomic64_inc(v) atomic64_add(1LL, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) +#define atomic64_dec(v) atomic64_sub(1LL, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) +#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) + +static inline s64 atomic64_add_return_release(s64 i, atomic64_t *v) +{ + smp_mb__before_atomic(); + return atomic64_add_return(i, v); +} + +static inline s64 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) +{ + return atomic64_cmpxchg(v, old, new); +} + +static inline s64 atomic64_sub_return_release(s64 i, atomic64_t *v) +{ + smp_mb__before_atomic(); + return atomic64_sub_return(i, v); +} + +#endif + +#endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/c_src/include/linux/backing-dev-defs.h b/c_src/include/linux/backing-dev-defs.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/linux/backing-dev-defs.h diff --git a/c_src/include/linux/backing-dev.h b/c_src/include/linux/backing-dev.h new file mode 100644 index 00000000..d8a86b45 --- /dev/null +++ b/c_src/include/linux/backing-dev.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_BACKING_DEV_H +#define _LINUX_BACKING_DEV_H + +#include <linux/list.h> + +typedef int (congested_fn)(void *, int); + +enum wb_congested_state { + WB_async_congested, /* The async (write) queue is getting full */ + WB_sync_congested, /* The sync queue is getting full */ +}; + +struct backing_dev_info { + struct list_head bdi_list; + unsigned ra_pages; + unsigned capabilities; + + congested_fn *congested_fn; + void *congested_data; +}; + +#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 +#define BDI_CAP_NO_WRITEBACK 0x00000002 +#define BDI_CAP_NO_ACCT_WB 0x00000004 +#define BDI_CAP_STABLE_WRITES 0x00000008 +#define BDI_CAP_STRICTLIMIT 0x00000010 +#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 + +static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) +{ + return 0; +} + +static inline int __must_check bdi_setup_and_register(struct backing_dev_info *bdi, + char *name) +{ + bdi->capabilities = 0; + return 0; +} + +static inline void bdi_destroy(struct backing_dev_info *bdi) {} + +#define VM_MAX_READAHEAD 128 /* kbytes */ + +#endif /* _LINUX_BACKING_DEV_H */ diff --git a/c_src/include/linux/bio.h b/c_src/include/linux/bio.h new file mode 100644 index 00000000..1f8acca2 --- /dev/null +++ b/c_src/include/linux/bio.h @@ -0,0 +1,434 @@ +/* + * 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe <axboe@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +#include <linux/mempool.h> +#include <linux/bug.h> +#include <linux/err.h> + +#include <linux/blkdev.h> +#include <linux/blk_types.h> +#include <linux/workqueue.h> + +#define bio_prio(bio) (bio)->bi_ioprio +#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) + +#define bio_iter_iovec(bio, iter) \ + bvec_iter_bvec((bio)->bi_io_vec, (iter)) + +#define bio_iter_page(bio, iter) \ + bvec_iter_page((bio)->bi_io_vec, (iter)) +#define bio_iter_len(bio, iter) \ + bvec_iter_len((bio)->bi_io_vec, (iter)) +#define bio_iter_offset(bio, iter) \ + bvec_iter_offset((bio)->bi_io_vec, (iter)) + +#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) +#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) +#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) + +#define bio_multiple_segments(bio) \ + ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len) + +#define bvec_iter_sectors(iter) ((iter).bi_size >> 9) +#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) + +#define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) +#define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) + +static inline bool bio_has_data(struct bio *bio) +{ + if (bio && + bio->bi_iter.bi_size && + bio_op(bio) != REQ_OP_DISCARD && + bio_op(bio) != REQ_OP_SECURE_ERASE) + return true; + + return false; +} + +static inline bool bio_no_advance_iter(struct bio *bio) +{ + return bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_SECURE_ERASE || + bio_op(bio) == REQ_OP_WRITE_SAME; +} + +static inline bool bio_is_rw(struct bio *bio) +{ + if (!bio_has_data(bio)) + return false; + + if (bio_no_advance_iter(bio)) + return false; + + return true; +} + +static inline bool bio_mergeable(struct bio *bio) +{ + if (bio->bi_opf & REQ_NOMERGE_FLAGS) + return false; + + return true; +} + +static inline unsigned int bio_cur_bytes(struct bio *bio) +{ + if (bio_has_data(bio)) + return bio_iovec(bio).bv_len; + else /* dataless requests such as discard */ + return bio->bi_iter.bi_size; +} + +static inline void *bio_data(struct bio *bio) +{ + if (bio_has_data(bio)) + return page_address(bio_page(bio)) + bio_offset(bio); + + return NULL; +} + +#define __bio_kmap_atomic(bio, iter) \ + (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \ + bio_iter_iovec((bio), (iter)).bv_offset) + +#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) + +static inline struct bio_vec *bio_next_segment(const struct bio *bio, + struct bvec_iter_all *iter) +{ + if (iter->idx >= bio->bi_vcnt) + return NULL; + + return &bio->bi_io_vec[iter->idx]; +} + +#define bio_for_each_segment_all(bvl, bio, iter) \ + for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++) + +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, + unsigned bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance(bio->bi_io_vec, iter, bytes); +} + +#define __bio_for_each_segment(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bio_iter_iovec((bio), (iter))), 1); \ + bio_advance_iter((bio), &(iter), (bvl).bv_len)) + +#define bio_for_each_segment(bvl, bio, iter) \ + __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) + +#define __bio_for_each_bvec(bvl, bio, iter, start) \ + __bio_for_each_segment(bvl, bio, iter, start) + +#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) + +static inline unsigned bio_segments(struct bio *bio) +{ + unsigned segs = 0; + struct bio_vec bv; + struct bvec_iter iter; + + /* + * We special case discard/write same, because they interpret bi_size + * differently: + */ + + if (bio_op(bio) == REQ_OP_DISCARD) + return 1; + + if (bio_op(bio) == REQ_OP_SECURE_ERASE) + return 1; + + if (bio_op(bio) == REQ_OP_WRITE_SAME) + return 1; + + bio_for_each_segment(bv, bio, iter) + segs++; + + return segs; +} + +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} + +static inline bool bio_flagged(struct bio *bio, unsigned int bit) +{ + return (bio->bi_flags & (1U << bit)) != 0; +} + +static inline void bio_set_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags |= (1U << bit); +} + +static inline void bio_clear_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags &= ~(1U << bit); +} + +extern struct bio *bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs); + +static inline struct bio *bio_next_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) +{ + if (sectors >= bio_sectors(bio)) + return bio; + + return bio_split(bio, sectors, gfp, bs); +} + +struct bio_set { + unsigned int front_pad; + unsigned int back_pad; + mempool_t bio_pool; + mempool_t bvec_pool; +}; + + +static inline void bioset_free(struct bio_set *bs) +{ + kfree(bs); +} + +void bioset_exit(struct bio_set *); +int bioset_init(struct bio_set *, unsigned, unsigned, int); + +extern struct bio_set *bioset_create(unsigned int, unsigned int); +extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); +enum { + BIOSET_NEED_BVECS = 1 << 0, + BIOSET_NEED_RESCUER = 1 << 1, +}; + +struct bio *bio_alloc_bioset(struct block_device *, unsigned, + blk_opf_t, gfp_t, struct bio_set *); +extern void bio_put(struct bio *); + +int bio_add_page(struct bio *, struct page *, unsigned, unsigned); + +struct bio *bio_alloc_clone(struct block_device *, struct bio *, + gfp_t, struct bio_set *); + +struct bio *bio_kmalloc(unsigned int, gfp_t); + +extern void bio_endio(struct bio *); + +extern void bio_advance(struct bio *, unsigned); + +extern void bio_reset(struct bio *, struct block_device *, unsigned); +void bio_chain(struct bio *, struct bio *); + +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter); +extern void bio_copy_data(struct bio *dst, struct bio *src); + +void bio_free_pages(struct bio *bio); + +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); +} + +#define bio_set_dev(bio, bdev) \ +do { \ + (bio)->bi_bdev = (bdev); \ +} while (0) + +#define bio_copy_dev(dst, src) \ +do { \ + (dst)->bi_bdev = (src)->bi_bdev; \ +} while (0) + +static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +{ + return page_address(bvec->bv_page) + bvec->bv_offset; +} + +static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +{ + *flags = 0; +} + +static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, + unsigned long *flags) +{ + return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags); +} +#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) + +#define bio_kmap_irq(bio, flags) \ + __bio_kmap_irq((bio), (bio)->bi_iter, (flags)) +#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) + +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +static inline int bio_list_empty(const struct bio_list *bl) +{ + return bl->head == NULL; +} + +static inline void bio_list_init(struct bio_list *bl) +{ + bl->head = bl->tail = NULL; +} + +#define BIO_EMPTY_LIST { NULL, NULL } + +#define bio_list_for_each(bio, bl) \ + for (bio = (bl)->head; bio; bio = bio->bi_next) + +static inline unsigned bio_list_size(const struct bio_list *bl) +{ + unsigned sz = 0; + struct bio *bio; + + bio_list_for_each(bio, bl) + sz++; + + return sz; +} + +static inline void bio_list_add(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = NULL; + + if (bl->tail) + bl->tail->bi_next = bio; + else + bl->head = bio; + + bl->tail = bio; +} + +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + +static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->tail) + bl->tail->bi_next = bl2->head; + else + bl->head = bl2->head; + + bl->tail = bl2->tail; +} + +static inline void bio_list_merge_head(struct bio_list *bl, + struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->head) + bl2->tail->bi_next = bl->head; + else + bl->tail = bl2->tail; + + bl->head = bl2->head; +} + +static inline struct bio *bio_list_peek(struct bio_list *bl) +{ + return bl->head; +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + if (bio) { + bl->head = bl->head->bi_next; + if (!bl->head) + bl->tail = NULL; + + bio->bi_next = NULL; + } + + return bio; +} + +static inline struct bio *bio_list_get(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + bl->head = bl->tail = NULL; + + return bio; +} + +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio_set_flag(bio, BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + +static inline void bio_init(struct bio *bio, + struct block_device *bdev, + struct bio_vec *table, + unsigned short max_vecs, + unsigned int opf) +{ + memset(bio, 0, sizeof(*bio)); + bio->bi_bdev = bdev; + bio->bi_opf = opf; + atomic_set(&bio->__bi_remaining, 1); + atomic_set(&bio->__bi_cnt, 1); + + bio->bi_io_vec = table; + bio->bi_max_vecs = max_vecs; +} + +#endif /* __LINUX_BIO_H */ diff --git a/c_src/include/linux/bit_spinlock.h b/c_src/include/linux/bit_spinlock.h new file mode 100644 index 00000000..873f08c2 --- /dev/null +++ b/c_src/include/linux/bit_spinlock.h @@ -0,0 +1,84 @@ +#ifndef __LINUX_BIT_SPINLOCK_H +#define __LINUX_BIT_SPINLOCK_H + +#include <linux/kernel.h> +#include <linux/preempt.h> +#include <linux/futex.h> +#include <urcu/futex.h> + +/* + * The futex wait op wants an explicit 32-bit address and value. If the bitmap + * used for the spinlock is 64-bit, cast down and pass the right 32-bit region + * for the in-kernel checks. The value is the copy that has already been read + * from the atomic op. + * + * The futex wake op interprets the value as the number of waiters to wake (up + * to INT_MAX), so pass that along directly. + */ +static inline void do_futex(int nr, unsigned long *addr, unsigned long v, int futex_flags) +{ + u32 *addr32 = (u32 *) addr; + u32 *v32 = (u32 *) &v; + int shift = 0; + + futex_flags |= FUTEX_PRIVATE_FLAG; + +#if BITS_PER_LONG == 64 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + shift = (nr >= 32) ? 1 : 0; +#else + shift = (nr < 32) ? 1 : 0; +#endif +#endif + if (shift) { + addr32 += shift; + v32 += shift; + } + /* + * The shift to determine the futex address may have cast away a + * literal wake count value. The value is capped to INT_MAX and thus + * always in the low bytes of v regardless of bit nr. Copy in the wake + * count to whatever 32-bit range was selected. + */ + if (futex_flags == FUTEX_WAKE_PRIVATE) + *v32 = (u32) v; + futex(addr32, futex_flags, *v32, NULL, NULL, 0); +} + +static inline void bit_spin_lock(int nr, unsigned long *_addr) +{ + unsigned long mask; + unsigned long *addr = _addr + (nr / BITS_PER_LONG); + unsigned long v; + + nr &= BITS_PER_LONG - 1; + mask = 1UL << nr; + + while (1) { + v = __atomic_fetch_or(addr, mask, __ATOMIC_ACQUIRE); + if (!(v & mask)) + break; + + do_futex(nr, addr, v, FUTEX_WAIT); + } +} + +static inline void bit_spin_wake(int nr, unsigned long *_addr) +{ + do_futex(nr, _addr, INT_MAX, FUTEX_WAKE); +} + +static inline void bit_spin_unlock(int nr, unsigned long *_addr) +{ + unsigned long mask; + unsigned long *addr = _addr + (nr / BITS_PER_LONG); + + nr &= BITS_PER_LONG - 1; + mask = 1UL << nr; + + __atomic_and_fetch(addr, ~mask, __ATOMIC_RELEASE); + do_futex(nr, addr, INT_MAX, FUTEX_WAKE); +} + +#endif /* __LINUX_BIT_SPINLOCK_H */ + diff --git a/c_src/include/linux/bitmap.h b/c_src/include/linux/bitmap.h new file mode 100644 index 00000000..db2dfdb2 --- /dev/null +++ b/c_src/include/linux/bitmap.h @@ -0,0 +1,146 @@ +#ifndef _PERF_BITOPS_H +#define _PERF_BITOPS_H + +#include <string.h> +#include <linux/bitops.h> +#include <stdlib.h> + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); + +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) + +#define BITMAP_LAST_WORD_MASK(nbits) \ +( \ + ((nbits) % BITS_PER_LONG) ? \ + (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ +) + +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + +static inline int __bitmap_weight(const unsigned long *bitmap, int bits) +{ + int k, w = 0, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; k++) + w += hweight_long(bitmap[k]); + + if (bits % BITS_PER_LONG) + w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits)); + + return w; +} + +static inline int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; + unsigned long result = 0; + + for (k = 0; k < lim; k++) + result |= (dst[k] = bitmap1[k] & bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); + return result != 0; +} + +static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, + unsigned int bits) +{ + unsigned int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + dst[k] = ~src[k]; + + if (bits % BITS_PER_LONG) + dst[k] = ~src[k]; +} + +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + memset(dst, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); +} + +static inline int bitmap_weight(const unsigned long *src, int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight(src, nbits); +} + +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (small_const_nbits(nbits)) + *dst = *src1 | *src2; + else + __bitmap_or(dst, src1, src2, nbits); +} + +static inline unsigned long *bitmap_alloc(int nbits) +{ + return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); +} + +static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; + return __bitmap_and(dst, src1, src2, nbits); +} + +static inline unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= BITMAP_FIRST_WORD_MASK(start); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + } + + return min(start + __ffs(tmp), nbits); +} + +static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, 0UL); +} + +static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, ~0UL); +} + +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) + +static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) +{ + if (small_const_nbits(nbits)) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + + return find_first_bit(src, nbits) == nbits; +} + +#endif /* _PERF_BITOPS_H */ diff --git a/c_src/include/linux/bitops.h b/c_src/include/linux/bitops.h new file mode 100644 index 00000000..758476b1 --- /dev/null +++ b/c_src/include/linux/bitops.h @@ -0,0 +1,286 @@ +#ifndef _TOOLS_LINUX_BITOPS_H_ +#define _TOOLS_LINUX_BITOPS_H_ + +#include <asm/types.h> +#include <linux/kernel.h> +#include <linux/compiler.h> +#include <linux/page.h> + +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) +#endif + +#ifndef BITS_PER_LONG +# define BITS_PER_LONG __WORDSIZE +#endif + +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) +#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) + +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p |= mask; +} + +static inline void set_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + __atomic_or_fetch(p, mask, __ATOMIC_RELAXED); +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p &= ~mask; +} + +static inline void clear_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + __atomic_and_fetch(p, ~mask, __ATOMIC_RELAXED); +} + +static inline int test_bit(long nr, const volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + + return (*p & mask) != 0; +} + +static inline int __test_and_set_bit(int nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + + old = *p; + *p = old | mask; + + return (old & mask) != 0; +} + +static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + unsigned long old; + + old = __atomic_fetch_or(p, mask, __ATOMIC_RELAXED); + + return (old & mask) != 0; +} + +static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + unsigned long old; + + old = __atomic_fetch_and(p, ~mask, __ATOMIC_RELAXED); + + return (old & mask) != 0; +} + +static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + __atomic_and_fetch(p, ~mask, __ATOMIC_RELEASE); +} + +static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + unsigned long old; + + old = __atomic_fetch_or(p, mask, __ATOMIC_ACQUIRE); + + return (old & mask) != 0; +} + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +static inline unsigned long hweight_long(unsigned long w) +{ + return __builtin_popcountl(w); +} + +static inline unsigned long hweight64(u64 w) +{ + return __builtin_popcount((u32) w) + + __builtin_popcount(w >> 32); +} + +static inline unsigned long hweight32(u32 w) +{ + return __builtin_popcount(w); +} + +static inline unsigned long hweight8(unsigned long w) +{ + return __builtin_popcountl(w); +} + +/** + * rol64 - rotate a 64-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u64 rol64(__u64 word, unsigned int shift) +{ + return (word << shift) | (word >> (64 - shift)); +} + +/** + * ror64 - rotate a 64-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u64 ror64(__u64 word, unsigned int shift) +{ + return (word >> shift) | (word << (64 - shift)); +} + +/** + * rol32 - rotate a 32-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/** + * ror32 - rotate a 32-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 ror32(__u32 word, unsigned int shift) +{ + return (word >> shift) | (word << (32 - shift)); +} + +/** + * rol16 - rotate a 16-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u16 rol16(__u16 word, unsigned int shift) +{ + return (word << shift) | (word >> (16 - shift)); +} + +/** + * ror16 - rotate a 16-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u16 ror16(__u16 word, unsigned int shift) +{ + return (word >> shift) | (word << (16 - shift)); +} + +/** + * rol8 - rotate an 8-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u8 rol8(__u8 word, unsigned int shift) +{ + return (word << shift) | (word >> (8 - shift)); +} + +/** + * ror8 - rotate an 8-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u8 ror8(__u8 word, unsigned int shift) +{ + return (word >> shift) | (word << (8 - shift)); +} + +static inline unsigned long __fls(unsigned long word) +{ + return (sizeof(word) * 8) - 1 - __builtin_clzl(word); +} + +static inline int fls(int x) +{ + return x ? sizeof(x) * 8 - __builtin_clz(x) : 0; +} + +static inline int fls64(__u64 x) +{ +#if BITS_PER_LONG == 32 + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +#elif BITS_PER_LONG == 64 + if (x == 0) + return 0; + return __fls(x) + 1; +#endif +} + +static inline unsigned fls_long(unsigned long l) +{ + if (sizeof(l) == 4) + return fls(l); + return fls64(l); +} + +static inline unsigned long __ffs(unsigned long word) +{ + return __builtin_ctzl(word); +} + +static inline unsigned long __ffs64(u64 word) +{ +#if BITS_PER_LONG == 32 + if (((u32)word) == 0UL) + return __ffs((u32)(word >> 32)) + 32; +#elif BITS_PER_LONG != 64 +#error BITS_PER_LONG not 32 or 64 +#endif + return __ffs((unsigned long)word); +} + +#define ffz(x) __ffs(~(x)) + +static inline __attribute__((const)) +unsigned long rounddown_pow_of_two(unsigned long n) +{ + return 1UL << (fls_long(n) - 1); +} + +#endif diff --git a/c_src/include/linux/blk_types.h b/c_src/include/linux/blk_types.h new file mode 100644 index 00000000..80560ab6 --- /dev/null +++ b/c_src/include/linux/blk_types.h @@ -0,0 +1,247 @@ +/* + * Block data types and constants. Directly include this file only to + * break include dependency loop. + */ +#ifndef __LINUX_BLK_TYPES_H +#define __LINUX_BLK_TYPES_H + +#include <linux/atomic.h> +#include <linux/types.h> +#include <linux/bvec.h> +#include <linux/kobject.h> + +struct bio_set; +struct bio; +typedef void (bio_end_io_t) (struct bio *); + +#define BDEVNAME_SIZE 32 + +struct request_queue { + struct backing_dev_info *backing_dev_info; +}; + +struct gendisk { + struct backing_dev_info *bdi; + struct backing_dev_info __bdi; +}; + +struct hd_struct { + struct kobject kobj; +}; + +struct block_device { + struct kobject kobj; + dev_t bd_dev; + char name[BDEVNAME_SIZE]; + struct inode *bd_inode; + struct request_queue queue; + void *bd_holder; + struct gendisk * bd_disk; + struct gendisk __bd_disk; + int bd_fd; +}; + +#define bdev_kobj(_bdev) (&((_bdev)->kobj)) + +/* + * Block error status values. See block/blk-core:blk_errors for the details. + */ +typedef u8 __bitwise blk_status_t; +#define BLK_STS_OK 0 +#define BLK_STS_NOTSUPP ((__force blk_status_t)1) +#define BLK_STS_TIMEOUT ((__force blk_status_t)2) +#define BLK_STS_NOSPC ((__force blk_status_t)3) +#define BLK_STS_TRANSPORT ((__force blk_status_t)4) +#define BLK_STS_TARGET ((__force blk_status_t)5) +#define BLK_STS_NEXUS ((__force blk_status_t)6) +#define BLK_STS_MEDIUM ((__force blk_status_t)7) +#define BLK_STS_PROTECTION ((__force blk_status_t)8) +#define BLK_STS_RESOURCE ((__force blk_status_t)9) +#define BLK_STS_IOERR ((__force blk_status_t)10) + +/* hack for device mapper, don't use elsewhere: */ +#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) + +#define BLK_STS_AGAIN ((__force blk_status_t)12) + +#define BIO_INLINE_VECS 4 + +/* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) + */ +struct bio { + struct bio *bi_next; /* request queue link */ + struct block_device *bi_bdev; + blk_status_t bi_status; + unsigned int bi_opf; /* bottom bits req flags, + * top bits REQ_OP. Use + * accessors. + */ + unsigned short bi_flags; /* status, command, etc */ + unsigned short bi_ioprio; + + struct bvec_iter bi_iter; + + atomic_t __bi_remaining; + + bio_end_io_t *bi_end_io; + void *bi_private; + + unsigned short bi_vcnt; /* how many bio_vec's */ + + /* + * Everything starting with bi_max_vecs will be preserved by bio_reset() + */ + + unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ + + atomic_t __bi_cnt; /* pin count */ + + struct bio_vec *bi_io_vec; /* the actual vec list */ + + struct bio_set *bi_pool; + + /* + * We can inline a number of vecs at the end of the bio, to avoid + * double allocations for a small number of bio_vecs. This member + * MUST obviously be kept at the very end of the bio. + */ + struct bio_vec bi_inline_vecs[0]; +}; + +#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) + +/* + * bio flags + */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ + +/* + * Flags starting here get preserved by bio_reset() - this includes + * BVEC_POOL_IDX() + */ +#define BIO_RESET_BITS 10 + +/* + * We support 6 different bvec pools, the last one is magic in that it + * is backed by a mempool. + */ +#define BVEC_POOL_NR 6 +#define BVEC_POOL_MAX (BVEC_POOL_NR - 1) + +/* + * Top 4 bits of bio flags indicate the pool the bvecs came from. We add + * 1 to the actual index so that 0 indicates that there are no bvecs to be + * freed. + */ +#define BVEC_POOL_BITS (4) +#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) +#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) + +/* + * Operations and flags common to the bio and request structures. + * We use 8 bits for encoding the operation, and the remaining 24 for flags. + * + * The least significant bit of the operation number indicates the data + * transfer direction: + * + * - if the least significant bit is set transfers are TO the device + * - if the least significant bit is not set transfers are FROM the device + * + * If a operation does not transfer data the least significant bit has no + * meaning. + */ +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_FLAG_BITS 24 + +enum req_opf { + /* read sectors from the device */ + REQ_OP_READ = 0, + /* write sectors to the device */ + REQ_OP_WRITE = 1, + /* flush the volatile write cache */ + REQ_OP_FLUSH = 2, + /* discard sectors */ + REQ_OP_DISCARD = 3, + /* get zone information */ + REQ_OP_ZONE_REPORT = 4, + /* securely erase sectors */ + REQ_OP_SECURE_ERASE = 5, + /* seset a zone write pointer */ + REQ_OP_ZONE_RESET = 6, + /* write the same sector many times */ + REQ_OP_WRITE_SAME = 7, + /* write the zero filled sector many times */ + REQ_OP_WRITE_ZEROES = 8, + + /* SCSI passthrough using struct scsi_request */ + REQ_OP_SCSI_IN = 32, + REQ_OP_SCSI_OUT = 33, + /* Driver private requests */ + REQ_OP_DRV_IN = 34, + REQ_OP_DRV_OUT = 35, + + REQ_OP_LAST, +}; + +enum req_flag_bits { + __REQ_FAILFAST_DEV = /* no driver retries of device errors */ + REQ_OP_BITS, + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + __REQ_SYNC, /* request is sync (sync write or read) */ + __REQ_META, /* metadata io request */ + __REQ_PRIO, /* boost priority in cfq */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_IDLE, /* anticipate more IO after this one */ + __REQ_INTEGRITY, /* I/O includes block integrity payload */ + __REQ_FUA, /* forced unit access */ + __REQ_PREFLUSH, /* request for cache flush */ + __REQ_RAHEAD, /* read ahead, can fail anytime */ + __REQ_BACKGROUND, /* background IO */ + __REQ_NR_BITS, /* stops here */ +}; + +#define REQ_SYNC (1ULL << __REQ_SYNC) +#define REQ_META (1ULL << __REQ_META) +#define REQ_PRIO (1ULL << __REQ_PRIO) + +#define REQ_NOMERGE_FLAGS (REQ_PREFLUSH | REQ_FUA) + +#define bio_op(bio) \ + ((bio)->bi_opf & REQ_OP_MASK) + +static inline void bio_set_op_attrs(struct bio *bio, unsigned op, + unsigned op_flags) +{ + bio->bi_opf = op | op_flags; +} + +#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) +#define REQ_THROTTLED (1ULL << __REQ_THROTTLED) + +#define REQ_FUA (1ULL << __REQ_FUA) +#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) + +#define RW_MASK REQ_OP_WRITE + +#define READ REQ_OP_READ +#define WRITE REQ_OP_WRITE + +#define READ_SYNC REQ_SYNC +#define WRITE_SYNC (REQ_SYNC) +#define WRITE_ODIRECT REQ_SYNC +#define WRITE_FLUSH (REQ_SYNC | REQ_PREFLUSH) +#define WRITE_FUA (REQ_SYNC | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_PREFLUSH | REQ_FUA) + +#endif /* __LINUX_BLK_TYPES_H */ diff --git a/c_src/include/linux/blkdev.h b/c_src/include/linux/blkdev.h new file mode 100644 index 00000000..39143117 --- /dev/null +++ b/c_src/include/linux/blkdev.h @@ -0,0 +1,190 @@ +#ifndef __TOOLS_LINUX_BLKDEV_H +#define __TOOLS_LINUX_BLKDEV_H + +#include <linux/backing-dev.h> +#include <linux/blk_types.h> +#include <linux/kobject.h> +#include <linux/types.h> + +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) + +#define BIO_MAX_VECS 256U + +typedef unsigned fmode_t; +typedef __u32 __bitwise blk_opf_t; + +struct bio; +struct user_namespace; + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +typedef unsigned int __bitwise blk_mode_t; + +/* open for reading */ +#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0)) +/* open for writing */ +#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1)) +/* open exclusively (vs other exclusive openers */ +#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2)) +/* opened with O_NDELAY */ +#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) +/* open for "writes" only for ioctls (specialy hack for floppy.c) */ +#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) + +#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5)) + +struct inode { + unsigned long i_ino; + loff_t i_size; + struct super_block *i_sb; +}; + +struct file { + struct inode *f_inode; +}; + +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +#define part_to_dev(part) (part) + +void generic_make_request(struct bio *); +int submit_bio_wait(struct bio *); + +static inline void submit_bio(struct bio *bio) +{ + generic_make_request(bio); +} + +int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t); +int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsigned); + +#define bdev_get_queue(bdev) (&((bdev)->queue)) + +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif +#ifndef SECTOR_SIZE +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#endif + +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + +#define bdev_max_discard_sectors(bdev) ((void) (bdev), 0) +#define blk_queue_nonrot(q) ((void) (q), 0) + +unsigned bdev_logical_block_size(struct block_device *bdev); +sector_t get_capacity(struct gendisk *disk); + +struct blk_holder_ops { + void (*mark_dead)(struct block_device *bdev); +}; + +void blkdev_put(struct block_device *bdev, void *holder); +void bdput(struct block_device *bdev); +struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, + void *holder, const struct blk_holder_ops *hop); +int lookup_bdev(const char *path, dev_t *); + +struct super_block { + void *s_fs_info; +}; + +/* + * File types + * + * NOTE! These match bits 12..15 of stat.st_mode + * (ie "(i_mode >> 12) & 15"). + */ +#ifndef DT_UNKNOWN +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 +#define DT_MAX 16 +#endif + +/* + * This is the "filldir" function type, used by readdir() to let + * the kernel specify what kind of dirent layout it wants to have. + * This allows the kernel to read directories into kernel space or + * to have different dirent layouts depending on the binary type. + */ +struct dir_context; +typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, + unsigned); + +struct dir_context { + const filldir_t actor; + u64 pos; +}; + +/* /sys/fs */ +extern struct kobject *fs_kobj; + +struct file_operations { +}; + +static inline int register_chrdev(unsigned int major, const char *name, + const struct file_operations *fops) +{ + return 1; +} + +static inline void unregister_chrdev(unsigned int major, const char *name) +{ +} + +static inline const char *bdevname(struct block_device *bdev, char *buf) +{ + snprintf(buf, BDEVNAME_SIZE, "%s", bdev->name); + return buf; +} + +static inline bool op_is_write(unsigned int op) +{ + return op == REQ_OP_READ ? false : true; +} + +/* + * return data direction, READ or WRITE + */ +static inline int bio_data_dir(struct bio *bio) +{ + return op_is_write(bio_op(bio)) ? WRITE : READ; +} + +static inline bool dir_emit(struct dir_context *ctx, + const char *name, int namelen, + u64 ino, unsigned type) +{ + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; +} + +static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) +{ + return true; +} + +#define capable(cap) true + +int blk_status_to_errno(blk_status_t status); +blk_status_t errno_to_blk_status(int errno); +const char *blk_status_to_str(blk_status_t status); + +#endif /* __TOOLS_LINUX_BLKDEV_H */ + diff --git a/c_src/include/linux/bsearch.h b/c_src/include/linux/bsearch.h new file mode 100644 index 00000000..e66b711d --- /dev/null +++ b/c_src/include/linux/bsearch.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BSEARCH_H +#define _LINUX_BSEARCH_H + +#include <linux/types.h> + +static __always_inline +void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) +{ + const char *pivot; + int result; + + while (num > 0) { + pivot = base + (num >> 1) * size; + result = cmp(key, pivot); + + if (result == 0) + return (void *)pivot; + + if (result > 0) { + base = pivot + size; + num--; + } + num >>= 1; + } + + return NULL; +} + +extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp); + +#endif /* _LINUX_BSEARCH_H */ diff --git a/c_src/include/linux/bug.h b/c_src/include/linux/bug.h new file mode 100644 index 00000000..1a10f7e6 --- /dev/null +++ b/c_src/include/linux/bug.h @@ -0,0 +1,66 @@ +#ifndef __TOOLS_LINUX_BUG_H +#define __TOOLS_LINUX_BUG_H + +#include <assert.h> +#include <stdio.h> +#include <linux/compiler.h> + +#ifdef CONFIG_VALGRIND +#include <valgrind/memcheck.h> + +#define DEBUG_MEMORY_FREED(p, len) VALGRIND_MAKE_MEM_UNDEFINED(p, len) +#endif + +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) + +#define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2*!!(cond)])) + +#define BUG() do { fflush(stdout); assert(0); unreachable(); } while (0) +#define BUG_ON(cond) assert(!(cond)) + +#define WARN(cond, fmt, ...) \ +({ \ + int __ret_warn_on = unlikely(!!(cond)); \ + if (__ret_warn_on) \ + fprintf(stderr, "WARNING at " __FILE__ ":%d: " fmt "\n",\ + __LINE__, ##__VA_ARGS__); \ + __ret_warn_on; \ +}) + +#define __WARN() \ +do { \ + fprintf(stderr, "WARNING at " __FILE__ ":%d\n", __LINE__); \ +} while (0) + +#define WARN_ON(cond) ({ \ + int __ret_warn_on = unlikely(!!(cond)); \ + if (__ret_warn_on) \ + __WARN(); \ + __ret_warn_on; \ +}) + +#define WARN_ONCE(cond, fmt, ...) \ +({ \ + static bool __warned; \ + int __ret_warn_on = unlikely(!!(cond)); \ + if (__ret_warn_on && !__warned) { \ + __warned = true; \ + __WARN(); \ + } \ + __ret_warn_on; \ +}) + +#define WARN_ON_ONCE(cond) ({ \ + static bool __warned; \ + int __ret_warn_on = unlikely(!!(cond)); \ + if (__ret_warn_on && !__warned) { \ + __warned = true; \ + __WARN(); \ + } \ + __ret_warn_on; \ +}) + +#endif /* __TOOLS_LINUX_BUG_H */ diff --git a/c_src/include/linux/bvec.h b/c_src/include/linux/bvec.h new file mode 100644 index 00000000..5bc68b42 --- /dev/null +++ b/c_src/include/linux/bvec.h @@ -0,0 +1,101 @@ +/* + * bvec iterator + * + * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BVEC_ITER_H +#define __LINUX_BVEC_ITER_H + +#include <linux/kernel.h> +#include <linux/bug.h> + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +struct bvec_iter { + sector_t bi_sector; /* device address in 512 byte + sectors */ + unsigned int bi_size; /* residual I/O count */ + + unsigned int bi_idx; /* current index into bvl_vec */ + + unsigned int bi_bvec_done; /* number of bytes completed in + current bvec */ +}; + +struct bvec_iter_all { + int idx; +}; + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) + +#define bvec_iter_page(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_page) + +#define bvec_iter_len(bvec, iter) \ + min((iter).bi_size, \ + __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) + +#define bvec_iter_offset(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) + +#define bvec_iter_bvec(bvec, iter) \ +((struct bio_vec) { \ + .bv_page = bvec_iter_page((bvec), (iter)), \ + .bv_len = bvec_iter_len((bvec), (iter)), \ + .bv_offset = bvec_iter_offset((bvec), (iter)), \ +}) + +static inline void bvec_iter_advance(const struct bio_vec *bv, + struct bvec_iter *iter, + unsigned bytes) +{ + WARN_ONCE(bytes > iter->bi_size, + "Attempted to advance past end of bvec iter\n"); + + while (bytes) { + unsigned iter_len = bvec_iter_len(bv, *iter); + unsigned len = min(bytes, iter_len); + + bytes -= len; + iter->bi_size -= len; + iter->bi_bvec_done += len; + + if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { + iter->bi_bvec_done = 0; + iter->bi_idx++; + } + } +} + +#define for_each_bvec(bvl, bio_vec, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ + bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) + +#endif /* __LINUX_BVEC_ITER_H */ diff --git a/c_src/include/linux/byteorder.h b/c_src/include/linux/byteorder.h new file mode 100644 index 00000000..7b04f5bc --- /dev/null +++ b/c_src/include/linux/byteorder.h @@ -0,0 +1,75 @@ +#ifndef __LINUX_BYTEORDER_H +#define __LINUX_BYTEORDER_H + +#include <linux/compiler.h> +#include <asm/byteorder.h> + +#define swab16 __swab16 +#define swab32 __swab32 +#define swab64 __swab64 +#define swahw32 __swahw32 +#define swahb32 __swahb32 +#define swab16p __swab16p +#define swab32p __swab32p +#define swab64p __swab64p +#define swahw32p __swahw32p +#define swahb32p __swahb32p +#define swab16s __swab16s +#define swab32s __swab32s +#define swab64s __swab64s +#define swahw32s __swahw32s +#define swahb32s __swahb32s + +#define cpu_to_le64 __cpu_to_le64 +#define le64_to_cpu __le64_to_cpu +#define cpu_to_le32 __cpu_to_le32 +#define le32_to_cpu __le32_to_cpu +#define cpu_to_le16 __cpu_to_le16 +#define le16_to_cpu __le16_to_cpu +#define cpu_to_be64 __cpu_to_be64 +#define be64_to_cpu __be64_to_cpu +#define cpu_to_be32 __cpu_to_be32 +#define be32_to_cpu __be32_to_cpu +#define cpu_to_be16 __cpu_to_be16 +#define be16_to_cpu __be16_to_cpu +#define cpu_to_le64p __cpu_to_le64p +#define le64_to_cpup __le64_to_cpup +#define cpu_to_le32p __cpu_to_le32p +#define le32_to_cpup __le32_to_cpup +#define cpu_to_le16p __cpu_to_le16p +#define le16_to_cpup __le16_to_cpup +#define cpu_to_be64p __cpu_to_be64p +#define be64_to_cpup __be64_to_cpup +#define cpu_to_be32p __cpu_to_be32p +#define be32_to_cpup __be32_to_cpup +#define cpu_to_be16p __cpu_to_be16p +#define be16_to_cpup __be16_to_cpup +#define cpu_to_le64s __cpu_to_le64s +#define le64_to_cpus __le64_to_cpus +#define cpu_to_le32s __cpu_to_le32s +#define le32_to_cpus __le32_to_cpus +#define cpu_to_le16s __cpu_to_le16s +#define le16_to_cpus __le16_to_cpus +#define cpu_to_be64s __cpu_to_be64s +#define be64_to_cpus __be64_to_cpus +#define cpu_to_be32s __cpu_to_be32s +#define be32_to_cpus __be32_to_cpus +#define cpu_to_be16s __cpu_to_be16s +#define be16_to_cpus __be16_to_cpus + +static inline void le16_add_cpu(__le16 *var, u16 val) +{ + *var = cpu_to_le16(le16_to_cpu(*var) + val); +} + +static inline void le32_add_cpu(__le32 *var, u32 val) +{ + *var = cpu_to_le32(le32_to_cpu(*var) + val); +} + +static inline void le64_add_cpu(__le64 *var, u64 val) +{ + *var = cpu_to_le64(le64_to_cpu(*var) + val); +} + +#endif /* __LINUX_BYTEORDER_H */ diff --git a/c_src/include/linux/cache.h b/c_src/include/linux/cache.h new file mode 100644 index 00000000..c61167ca --- /dev/null +++ b/c_src/include/linux/cache.h @@ -0,0 +1,17 @@ +#ifndef __TOOLS_LINUX_CACHE_H +#define __TOOLS_LINUX_CACHE_H + +#define L1_CACHE_SHIFT 6 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define SMP_CACHE_BYTES L1_CACHE_BYTES + +#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) + +#define __read_mostly +#define __ro_after_init + +#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define ____cacheline_aligned_in_smp ____cacheline_aligned + +#endif /* __TOOLS_LINUX_CACHE_H */ + diff --git a/c_src/include/linux/closure.h b/c_src/include/linux/closure.h new file mode 100644 index 00000000..c554c6a0 --- /dev/null +++ b/c_src/include/linux/closure.h @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLOSURE_H +#define _LINUX_CLOSURE_H + +#include <linux/llist.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/workqueue.h> + +/* + * Closure is perhaps the most overused and abused term in computer science, but + * since I've been unable to come up with anything better you're stuck with it + * again. + * + * What are closures? + * + * They embed a refcount. The basic idea is they count "things that are in + * progress" - in flight bios, some other thread that's doing something else - + * anything you might want to wait on. + * + * The refcount may be manipulated with closure_get() and closure_put(). + * closure_put() is where many of the interesting things happen, when it causes + * the refcount to go to 0. + * + * Closures can be used to wait on things both synchronously and asynchronously, + * and synchronous and asynchronous use can be mixed without restriction. To + * wait synchronously, use closure_sync() - you will sleep until your closure's + * refcount hits 1. + * + * To wait asynchronously, use + * continue_at(cl, next_function, workqueue); + * + * passing it, as you might expect, the function to run when nothing is pending + * and the workqueue to run that function out of. + * + * continue_at() also, critically, requires a 'return' immediately following the + * location where this macro is referenced, to return to the calling function. + * There's good reason for this. + * + * To use safely closures asynchronously, they must always have a refcount while + * they are running owned by the thread that is running them. Otherwise, suppose + * you submit some bios and wish to have a function run when they all complete: + * + * foo_endio(struct bio *bio) + * { + * closure_put(cl); + * } + * + * closure_init(cl); + * + * do_stuff(); + * closure_get(cl); + * bio1->bi_endio = foo_endio; + * bio_submit(bio1); + * + * do_more_stuff(); + * closure_get(cl); + * bio2->bi_endio = foo_endio; + * bio_submit(bio2); + * + * continue_at(cl, complete_some_read, system_wq); + * + * If closure's refcount started at 0, complete_some_read() could run before the + * second bio was submitted - which is almost always not what you want! More + * importantly, it wouldn't be possible to say whether the original thread or + * complete_some_read()'s thread owned the closure - and whatever state it was + * associated with! + * + * So, closure_init() initializes a closure's refcount to 1 - and when a + * closure_fn is run, the refcount will be reset to 1 first. + * + * Then, the rule is - if you got the refcount with closure_get(), release it + * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount + * on a closure because you called closure_init() or you were run out of a + * closure - _always_ use continue_at(). Doing so consistently will help + * eliminate an entire class of particularly pernicious races. + * + * Lastly, you might have a wait list dedicated to a specific event, and have no + * need for specifying the condition - you just want to wait until someone runs + * closure_wake_up() on the appropriate wait list. In that case, just use + * closure_wait(). It will return either true or false, depending on whether the + * closure was already on a wait list or not - a closure can only be on one wait + * list at a time. + * + * Parents: + * + * closure_init() takes two arguments - it takes the closure to initialize, and + * a (possibly null) parent. + * + * If parent is non null, the new closure will have a refcount for its lifetime; + * a closure is considered to be "finished" when its refcount hits 0 and the + * function to run is null. Hence + * + * continue_at(cl, NULL, NULL); + * + * returns up the (spaghetti) stack of closures, precisely like normal return + * returns up the C stack. continue_at() with non null fn is better thought of + * as doing a tail call. + * + * All this implies that a closure should typically be embedded in a particular + * struct (which its refcount will normally control the lifetime of), and that + * struct can very much be thought of as a stack frame. + */ + +struct closure; +struct closure_syncer; +typedef void (closure_fn) (struct work_struct *); +extern struct dentry *bcache_debug; + +struct closure_waitlist { + struct llist_head list; +}; + +enum closure_state { + /* + * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by + * the thread that owns the closure, and cleared by the thread that's + * waking up the closure. + * + * The rest are for debugging and don't affect behaviour: + * + * CLOSURE_RUNNING: Set when a closure is running (i.e. by + * closure_init() and when closure_put() runs then next function), and + * must be cleared before remaining hits 0. Primarily to help guard + * against incorrect usage and accidentally transferring references. + * continue_at() and closure_return() clear it for you, if you're doing + * something unusual you can use closure_set_dead() which also helps + * annotate where references are being transferred. + */ + + CLOSURE_BITS_START = (1U << 26), + CLOSURE_DESTRUCTOR = (1U << 26), + CLOSURE_WAITING = (1U << 28), + CLOSURE_RUNNING = (1U << 30), +}; + +#define CLOSURE_GUARD_MASK \ + ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) + +#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) +#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) + +struct closure { + union { + struct { + struct workqueue_struct *wq; + struct closure_syncer *s; + struct llist_node list; + closure_fn *fn; + }; + struct work_struct work; + }; + + struct closure *parent; + + atomic_t remaining; + bool closure_get_happened; + +#ifdef CONFIG_DEBUG_CLOSURES +#define CLOSURE_MAGIC_DEAD 0xc054dead +#define CLOSURE_MAGIC_ALIVE 0xc054a11e + + unsigned int magic; + struct list_head all; + unsigned long ip; + unsigned long waiting_on; +#endif +}; + +void closure_sub(struct closure *cl, int v); +void closure_put(struct closure *cl); +void __closure_wake_up(struct closure_waitlist *list); +bool closure_wait(struct closure_waitlist *list, struct closure *cl); +void __closure_sync(struct closure *cl); + +static inline unsigned closure_nr_remaining(struct closure *cl) +{ + return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK; +} + +/** + * closure_sync - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +static inline void closure_sync(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); +#endif + + if (cl->closure_get_happened) + __closure_sync(cl); +} + +#ifdef CONFIG_DEBUG_CLOSURES + +void closure_debug_create(struct closure *cl); +void closure_debug_destroy(struct closure *cl); + +#else + +static inline void closure_debug_create(struct closure *cl) {} +static inline void closure_debug_destroy(struct closure *cl) {} + +#endif + +static inline void closure_set_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _THIS_IP_; +#endif +} + +static inline void closure_set_ret_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _RET_IP_; +#endif +} + +static inline void closure_set_waiting(struct closure *cl, unsigned long f) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->waiting_on = f; +#endif +} + +static inline void closure_set_stopped(struct closure *cl) +{ + atomic_sub(CLOSURE_RUNNING, &cl->remaining); +} + +static inline void set_closure_fn(struct closure *cl, closure_fn *fn, + struct workqueue_struct *wq) +{ + closure_set_ip(cl); + cl->fn = fn; + cl->wq = wq; +} + +static inline void closure_queue(struct closure *cl) +{ + struct workqueue_struct *wq = cl->wq; + /** + * Changes made to closure, work_struct, or a couple of other structs + * may cause work.func not pointing to the right location. + */ + BUILD_BUG_ON(offsetof(struct closure, fn) + != offsetof(struct work_struct, func)); + + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); + } else + cl->fn(&cl->work); +} + +/** + * closure_get - increment a closure's refcount + */ +static inline void closure_get(struct closure *cl) +{ + cl->closure_get_happened = true; + +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); +#else + atomic_inc(&cl->remaining); +#endif +} + +/** + * closure_init - Initialize a closure, setting the refcount to 1 + * @cl: closure to initialize + * @parent: parent of the new closure. cl will take a refcount on it for its + * lifetime; may be NULL. + */ +static inline void closure_init(struct closure *cl, struct closure *parent) +{ + cl->fn = NULL; + cl->parent = parent; + if (parent) + closure_get(parent); + + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + cl->closure_get_happened = false; + + closure_debug_create(cl); + closure_set_ip(cl); +} + +static inline void closure_init_stack(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +} + +/** + * closure_wake_up - wake up all closures on a wait list, + * with memory barrier + */ +static inline void closure_wake_up(struct closure_waitlist *list) +{ + /* Memory barrier for the wait list */ + smp_mb(); + __closure_wake_up(list); +} + +#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws) +#define closure_type(name, type, member) \ + struct closure *cl = container_of(ws, struct closure, work); \ + type *name = container_of(cl, type, member) + +/** + * continue_at - jump to another function with barrier + * + * After @cl is no longer waiting on anything (i.e. all outstanding refs have + * been dropped with closure_put()), it will resume execution at @fn running out + * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). + * + * This is because after calling continue_at() you no longer have a ref on @cl, + * and whatever @cl owns may be freed out from under you - a running closure fn + * has a ref on its own closure which continue_at() drops. + * + * Note you are expected to immediately return after using this macro. + */ +#define continue_at(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_sub(_cl, CLOSURE_RUNNING + 1); \ +} while (0) + +/** + * closure_return - finish execution of a closure + * + * This is used to indicate that @cl is finished: when all outstanding refs on + * @cl have been dropped @cl's ref on its parent closure (as passed to + * closure_init()) will be dropped, if one was specified - thus this can be + * thought of as returning to the parent closure. + */ +#define closure_return(_cl) continue_at((_cl), NULL, NULL) + +/** + * continue_at_nobarrier - jump to another function without barrier + * + * Causes @fn to be executed out of @cl, in @wq context (or called directly if + * @wq is NULL). + * + * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, + * thus it's not safe to touch anything protected by @cl after a + * continue_at_nobarrier(). + */ +#define continue_at_nobarrier(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_queue(_cl); \ +} while (0) + +/** + * closure_return_with_destructor - finish execution of a closure, + * with destructor + * + * Works like closure_return(), except @destructor will be called when all + * outstanding refs on @cl have been dropped; @destructor may be used to safely + * free the memory occupied by @cl, and it is called with the ref on the parent + * closure still held - so @destructor could safely return an item to a + * freelist protected by @cl's parent. + */ +#define closure_return_with_destructor(_cl, _destructor) \ +do { \ + set_closure_fn(_cl, _destructor, NULL); \ + closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ +} while (0) + +/** + * closure_call - execute @fn out of a new, uninitialized closure + * + * Typically used when running out of one closure, and we want to run @fn + * asynchronously out of a new closure - @parent will then wait for @cl to + * finish. + */ +static inline void closure_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + closure_init(cl, parent); + continue_at_nobarrier(cl, fn, wq); +} + +#define __closure_wait_event(waitlist, _cond) \ +do { \ + struct closure cl; \ + \ + closure_init_stack(&cl); \ + \ + while (1) { \ + closure_wait(waitlist, &cl); \ + if (_cond) \ + break; \ + closure_sync(&cl); \ + } \ + closure_wake_up(waitlist); \ + closure_sync(&cl); \ +} while (0) + +#define closure_wait_event(waitlist, _cond) \ +do { \ + if (!(_cond)) \ + __closure_wait_event(waitlist, _cond); \ +} while (0) + +#endif /* _LINUX_CLOSURE_H */ diff --git a/c_src/include/linux/compiler.h b/c_src/include/linux/compiler.h new file mode 100644 index 00000000..3ecc3dd1 --- /dev/null +++ b/c_src/include/linux/compiler.h @@ -0,0 +1,190 @@ +#ifndef _TOOLS_LINUX_COMPILER_H_ +#define _TOOLS_LINUX_COMPILER_H_ + +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define barrier() __asm__ __volatile__("": : :"memory") +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") + +#ifndef __always_inline +# define __always_inline inline __attribute__((always_inline)) +#endif + +#ifndef __attribute_const__ +#define __attribute_const__ __attribute__((__const__)) +#endif + +#ifdef __ANDROID__ +/* + * FIXME: Big hammer to get rid of tons of: + * "warning: always_inline function might not be inlinable" + * + * At least on android-ndk-r12/platforms/android-24/arch-arm + */ +#undef __always_inline +#define __always_inline inline +#endif + +#define noinline +#define noinline_for_stack noinline + +#define __user +#define __kernel + +#define __pure __attribute__((pure)) +#define __aligned(x) __attribute__((aligned(x))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __used __attribute__((__used__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) +#define __packed __attribute__((__packed__)) +#define __flatten __attribute__((flatten)) +#define __force +#define __nocast +#define __iomem +#define __chk_user_ptr(x) (void)0 +#define __chk_io_ptr(x) (void)0 +#define __builtin_warning(x, y...) (1) +#define __must_hold(x) +#define __acquires(x) +#define __cond_acquires(x) +#define __releases(x) +#define __acquire(x) (void)0 +#define __release(x) (void)0 +#define __cond_lock(x,c) (c) +#define __percpu +#define __rcu +#define __sched +#define __init +#define __exit +#define __private +#define __must_check +#define __malloc +#define __weak __attribute__((weak)) +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define unreachable() __builtin_unreachable() +#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#define fallthrough __attribute__((__fallthrough__)) +#define __noreturn __attribute__((__noreturn__)) + +#ifndef __counted_by +#define __counted_by(nr) +#endif + +#define ___PASTE(a,b) a##b +#define __PASTE(a,b) ___PASTE(a,b) +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define __initcall(x) /* unimplemented */ +#define __exitcall(x) /* unimplemented */ + +#include <linux/types.h> + +/* + * Following functions are taken from kernel sources and + * break aliasing rules in their original form. + * + * While kernel is compiled with -fno-strict-aliasing, + * perf uses -Wstrict-aliasing=3 which makes build fail + * under gcc 4.4. + * + * Using extra __may_alias__ type to allow aliasing + * in this case. + */ +typedef __u8 __attribute__((__may_alias__)) __u8_alias_t; +typedef __u16 __attribute__((__may_alias__)) __u16_alias_t; +typedef __u32 __attribute__((__may_alias__)) __u32_alias_t; +typedef __u64 __attribute__((__may_alias__)) __u64_alias_t; + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break; + case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break; + case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break; + case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break; + case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break; + case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break; + case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = READ_ONCE(p); \ + typeof(*(p)) *___typecheck_p __maybe_unused; \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) +#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#ifdef __x86_64 +#define CONFIG_X86_64 y +#endif + +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) +#define is_signed_type(type) (((type)(-1)) < (__force type)1) +#define is_unsigned_type(type) (!is_signed_type(type)) + +#endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/c_src/include/linux/completion.h b/c_src/include/linux/completion.h new file mode 100644 index 00000000..d11a8dd0 --- /dev/null +++ b/c_src/include/linux/completion.h @@ -0,0 +1,42 @@ +#ifndef __LINUX_COMPLETION_H +#define __LINUX_COMPLETION_H + +/* + * (C) Copyright 2001 Linus Torvalds + * + * Atomic wait-for-completion handler data structures. + * See kernel/sched/completion.c for details. + */ + +#include <linux/wait.h> + +struct completion { + unsigned int done; + wait_queue_head_t wait; +}; + +#define DECLARE_COMPLETION(work) \ + struct completion work = { \ + .done = 0, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) \ + } + +#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) + +static inline void init_completion(struct completion *x) +{ + x->done = 0; + init_waitqueue_head(&x->wait); +} + +static inline void reinit_completion(struct completion *x) +{ + x->done = 0; +} + +void complete(struct completion *); +void wait_for_completion(struct completion *); + +#define wait_for_completion_interruptible(x) (wait_for_completion(x), 0) + +#endif diff --git a/c_src/include/linux/console.h b/c_src/include/linux/console.h new file mode 100644 index 00000000..d01aa9a2 --- /dev/null +++ b/c_src/include/linux/console.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_CONSOLE_H_ +#define _LINUX_CONSOLE_H_ + +#define console_lock() +#define console_unlock() + +#endif /* _LINUX_CONSOLE_H */ diff --git a/c_src/include/linux/cpumask.h b/c_src/include/linux/cpumask.h new file mode 100644 index 00000000..bfab7ea7 --- /dev/null +++ b/c_src/include/linux/cpumask.h @@ -0,0 +1,26 @@ +#ifndef __LINUX_CPUMASK_H +#define __LINUX_CPUMASK_H + +#define num_online_cpus() 1U +#define num_possible_cpus() 1U +#define num_present_cpus() 1U +#define num_active_cpus() 1U +#define cpu_online(cpu) ((cpu) == 0) +#define cpu_possible(cpu) ((cpu) == 0) +#define cpu_present(cpu) ((cpu) == 0) +#define cpu_active(cpu) ((cpu) == 0) + +#define raw_smp_processor_id() 0U + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_not(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) + +#define for_each_possible_cpu(cpu) for_each_cpu((cpu), 1) +#define for_each_online_cpu(cpu) for_each_cpu((cpu), 1) +#define for_each_present_cpu(cpu) for_each_cpu((cpu), 1) + +#endif /* __LINUX_CPUMASK_H */ diff --git a/c_src/include/linux/crc32c.h b/c_src/include/linux/crc32c.h new file mode 100644 index 00000000..1ac74f7d --- /dev/null +++ b/c_src/include/linux/crc32c.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_CRC32C_H +#define _LINUX_CRC32C_H + +#include "tools-util.h" + +#endif /* _LINUX_CRC32C_H */ diff --git a/c_src/include/linux/crc64.h b/c_src/include/linux/crc64.h new file mode 100644 index 00000000..c756e65a --- /dev/null +++ b/c_src/include/linux/crc64.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * See lib/crc64.c for the related specification and polynomial arithmetic. + */ +#ifndef _LINUX_CRC64_H +#define _LINUX_CRC64_H + +#include <linux/types.h> + +u64 __pure crc64_be(u64 crc, const void *p, size_t len); +#endif /* _LINUX_CRC64_H */ diff --git a/c_src/include/linux/crypto.h b/c_src/include/linux/crypto.h new file mode 100644 index 00000000..866b4c5a --- /dev/null +++ b/c_src/include/linux/crypto.h @@ -0,0 +1,45 @@ +/* + * Scatterlist Cryptographic API. + * + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2002 David S. Miller (davem@redhat.com) + * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> + * + * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no> + * and Nettle, by Niels Möller. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _LINUX_CRYPTO_H +#define _LINUX_CRYPTO_H + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/slab.h> + +#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN +#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) + +struct crypto_type; + +struct crypto_alg { + struct list_head cra_list; + + const char *cra_name; + const struct crypto_type *cra_type; + + void * (*alloc_tfm)(void); +} CRYPTO_MINALIGN_ATTR; + +int crypto_register_alg(struct crypto_alg *alg); + +struct crypto_tfm { + struct crypto_alg *alg; +}; + +#endif /* _LINUX_CRYPTO_H */ + diff --git a/c_src/include/linux/ctype.h b/c_src/include/linux/ctype.h new file mode 100644 index 00000000..26b7de5a --- /dev/null +++ b/c_src/include/linux/ctype.h @@ -0,0 +1,2 @@ + +#include <ctype.h> diff --git a/c_src/include/linux/dcache.h b/c_src/include/linux/dcache.h new file mode 100644 index 00000000..7637854d --- /dev/null +++ b/c_src/include/linux/dcache.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_DCACHE_H +#define __LINUX_DCACHE_H + +struct super_block; +struct inode; + +struct dentry { + struct super_block *d_sb; + struct inode *d_inode; +}; + +#endif /* __LINUX_DCACHE_H */ diff --git a/c_src/include/linux/debugfs.h b/c_src/include/linux/debugfs.h new file mode 100644 index 00000000..9a78cb16 --- /dev/null +++ b/c_src/include/linux/debugfs.h @@ -0,0 +1,46 @@ +/* + * debugfs.h - a tiny little debug file system + * + * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2004 IBM Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * debugfs is for people to use instead of /proc or /sys. + * See Documentation/DocBook/filesystems for more details. + */ + +#ifndef _DEBUGFS_H_ +#define _DEBUGFS_H_ + +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/types.h> +#include <linux/compiler.h> + +struct file_operations; + +#include <linux/err.h> + +static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_dir(const char *name, + struct dentry *parent) +{ + return ERR_PTR(-ENODEV); +} + +static inline void debugfs_remove(struct dentry *dentry) +{ } + +static inline void debugfs_remove_recursive(struct dentry *dentry) +{ } + +#endif diff --git a/c_src/include/linux/device.h b/c_src/include/linux/device.h new file mode 100644 index 00000000..2b2b8494 --- /dev/null +++ b/c_src/include/linux/device.h @@ -0,0 +1,40 @@ +#ifndef _DEVICE_H_ +#define _DEVICE_H_ + +#include <linux/slab.h> +#include <linux/types.h> + +struct module; + +struct class { +}; + +static inline void class_destroy(struct class *class) +{ + kfree(class); +} + +static inline struct class * __must_check class_create(struct module *owner, + const char *name) +{ + return kzalloc(sizeof(struct class), GFP_KERNEL); +} + +struct device { +}; + +static inline void device_unregister(struct device *dev) +{ + kfree(dev); +} + +static inline void device_destroy(struct class *cls, dev_t devt) {} + +static inline struct device *device_create(struct class *cls, struct device *parent, + dev_t devt, void *drvdata, + const char *fmt, ...) +{ + return kzalloc(sizeof(struct device), GFP_KERNEL); +} + +#endif /* _DEVICE_H_ */ diff --git a/c_src/include/linux/dynamic_fault.h b/c_src/include/linux/dynamic_fault.h new file mode 100644 index 00000000..dd215dcb --- /dev/null +++ b/c_src/include/linux/dynamic_fault.h @@ -0,0 +1,7 @@ +#ifndef __TOOLS_LINUX_DYNAMIC_FAULT_H +#define __TOOLS_LINUX_DYNAMIC_FAULT_H + +#define dynamic_fault(_class) 0 +#define race_fault() 0 + +#endif /* __TOOLS_LINUX_DYNAMIC_FAULT_H */ diff --git a/c_src/include/linux/err.h b/c_src/include/linux/err.h new file mode 100644 index 00000000..e94bdff5 --- /dev/null +++ b/c_src/include/linux/err.h @@ -0,0 +1,68 @@ +#ifndef __TOOLS_LINUX_ERR_H +#define __TOOLS_LINUX_ERR_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#include <asm/errno.h> + +/* + * Original kernel header comment: + * + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a normal + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + * + * Userspace note: + * The same principle works for userspace, because 'error' pointers + * fall down to the unused hole far from user space, as described + * in Documentation/x86/x86_64/mm.txt for x86_64 arch: + * + * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension + * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole + * + * It should be the same case for other architectures, because + * this code is used in generic kernel code. + */ +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error_) +{ + return (void *) error_; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) +{ + return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); +} + +static inline void * __must_check ERR_CAST(__force const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} + +static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#endif /* _LINUX_ERR_H */ diff --git a/c_src/include/linux/errname.h b/c_src/include/linux/errname.h new file mode 100644 index 00000000..443d5040 --- /dev/null +++ b/c_src/include/linux/errname.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_ERRNAME_H +#define _LINUX_ERRNAME_H + +#include <string.h> + +static inline const char *errname(int err) +{ + return strerror(abs(err)); +} + +#endif /* _LINUX_ERRNAME_H */ diff --git a/c_src/include/linux/export.h b/c_src/include/linux/export.h new file mode 100644 index 00000000..af9da968 --- /dev/null +++ b/c_src/include/linux/export.h @@ -0,0 +1,13 @@ +#ifndef _TOOLS_LINUX_EXPORT_H_ +#define _TOOLS_LINUX_EXPORT_H_ + +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) +#define EXPORT_SYMBOL_GPL_FUTURE(sym) +#define EXPORT_UNUSED_SYMBOL(sym) +#define EXPORT_UNUSED_SYMBOL_GPL(sym) + +#define THIS_MODULE ((struct module *)0) +#define KBUILD_MODNAME + +#endif diff --git a/c_src/include/linux/freezer.h b/c_src/include/linux/freezer.h new file mode 100644 index 00000000..d90373f3 --- /dev/null +++ b/c_src/include/linux/freezer.h @@ -0,0 +1,12 @@ +#ifndef __TOOLS_LINUX_FREEZER_H +#define __TOOLS_LINUX_FREEZER_H + +#define try_to_freeze() +#define set_freezable() +#define freezing(task) false +#define freezable_schedule() schedule() +#define freezable_schedule_timeout(_t) schedule_timeout(_t) + +static inline void __refrigerator(bool f) {} + +#endif /* __TOOLS_LINUX_FREEZER_H */ diff --git a/c_src/include/linux/generic-radix-tree.h b/c_src/include/linux/generic-radix-tree.h new file mode 100644 index 00000000..84741316 --- /dev/null +++ b/c_src/include/linux/generic-radix-tree.h @@ -0,0 +1,298 @@ +#ifndef _LINUX_GENERIC_RADIX_TREE_H +#define _LINUX_GENERIC_RADIX_TREE_H + +/** + * DOC: Generic radix trees/sparse arrays + * + * Very simple and minimalistic, supporting arbitrary size entries up to + * PAGE_SIZE. + * + * A genradix is defined with the type it will store, like so: + * + * static GENRADIX(struct foo) foo_genradix; + * + * The main operations are: + * + * - genradix_init(radix) - initialize an empty genradix + * + * - genradix_free(radix) - free all memory owned by the genradix and + * reinitialize it + * + * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning + * NULL if that entry does not exist + * + * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry, + * allocating it if necessary + * + * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix + * + * The radix tree allocates one page of entries at a time, so entries may exist + * that were never explicitly allocated - they will be initialized to all + * zeroes. + * + * Internally, a genradix is just a radix tree of pages, and indexing works in + * terms of byte offsets. The wrappers in this header file use sizeof on the + * type the radix contains to calculate a byte offset from the index - see + * __idx_to_offset. + */ + +#include <asm/page.h> +#include <linux/bug.h> +#include <linux/limits.h> +#include <linux/log2.h> +#include <linux/math.h> +#include <linux/types.h> + +struct genradix_root; + +struct __genradix { + struct genradix_root *root; +}; + +/* + * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: + */ + +#define __GENRADIX_INITIALIZER \ + { \ + .tree = { \ + .root = NULL, \ + } \ + } + +/* + * We use a 0 size array to stash the type we're storing without taking any + * space at runtime - then the various accessor macros can use typeof() to get + * to it for casts/sizeof - we also force the alignment so that storing a type + * with a ridiculous alignment doesn't blow up the alignment or size of the + * genradix. + */ + +#define GENRADIX(_type) \ +struct { \ + struct __genradix tree; \ + _type type[0] __aligned(1); \ +} + +#define DEFINE_GENRADIX(_name, _type) \ + GENRADIX(_type) _name = __GENRADIX_INITIALIZER + +/** + * genradix_init - initialize a genradix + * @_radix: genradix to initialize + * + * Does not fail + */ +#define genradix_init(_radix) \ +do { \ + *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \ +} while (0) + +void __genradix_free(struct __genradix *); + +/** + * genradix_free: free all memory owned by a genradix + * @_radix: the genradix to free + * + * After freeing, @_radix will be reinitialized and empty + */ +#define genradix_free(_radix) __genradix_free(&(_radix)->tree) + +static inline size_t __idx_to_offset(size_t idx, size_t obj_size) +{ + if (__builtin_constant_p(obj_size)) + BUILD_BUG_ON(obj_size > PAGE_SIZE); + else + BUG_ON(obj_size > PAGE_SIZE); + + if (!is_power_of_2(obj_size)) { + size_t objs_per_page = PAGE_SIZE / obj_size; + + return (idx / objs_per_page) * PAGE_SIZE + + (idx % objs_per_page) * obj_size; + } else { + return idx * obj_size; + } +} + +#define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) +#define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_objs_per_page(_radix) \ + (PAGE_SIZE / sizeof((_radix)->type[0])) +#define __genradix_page_remainder(_radix) \ + (PAGE_SIZE % sizeof((_radix)->type[0])) + +#define __genradix_idx_to_offset(_radix, _idx) \ + __idx_to_offset(_idx, __genradix_obj_size(_radix)) + +void *__genradix_ptr(struct __genradix *, size_t); + +/** + * genradix_ptr - get a pointer to a genradix entry + * @_radix: genradix to access + * @_idx: index to fetch + * + * Returns a pointer to entry at @_idx, or NULL if that entry does not exist. + */ +#define genradix_ptr(_radix, _idx) \ + (__genradix_cast(_radix) \ + __genradix_ptr(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx))) + +void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); + +/** + * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it + * if necessary + * @_radix: genradix to access + * @_idx: index to fetch + * @_gfp: gfp mask + * + * Returns a pointer to entry at @_idx, or NULL on allocation failure + */ +#define genradix_ptr_alloc(_radix, _idx, _gfp) \ + (__genradix_cast(_radix) \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _gfp)) + +struct genradix_iter { + size_t offset; + size_t pos; +}; + +/** + * genradix_iter_init - initialize a genradix_iter + * @_radix: genradix that will be iterated over + * @_idx: index to start iterating from + */ +#define genradix_iter_init(_radix, _idx) \ + ((struct genradix_iter) { \ + .pos = (_idx), \ + .offset = __genradix_idx_to_offset((_radix), (_idx)),\ + }) + +void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); + +/** + * genradix_iter_peek - get first entry at or above iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or above @_iter's current position, returns NULL + */ +#define genradix_iter_peek(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek(_iter, &(_radix)->tree, \ + __genradix_objs_per_page(_radix))) + +void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, + size_t, size_t); + +/** + * genradix_iter_peek_prev - get first entry at or below iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or below @_iter's current position, returns NULL + */ +#define genradix_iter_peek_prev(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ + __genradix_objs_per_page(_radix), \ + __genradix_obj_size(_radix) + \ + __genradix_page_remainder(_radix))) + +static inline void __genradix_iter_advance(struct genradix_iter *iter, + size_t obj_size) +{ + if (iter->offset + obj_size < iter->offset) { + iter->offset = SIZE_MAX; + iter->pos = SIZE_MAX; + return; + } + + iter->offset += obj_size; + + if (!is_power_of_2(obj_size) && + (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) + iter->offset = round_up(iter->offset, PAGE_SIZE); + + iter->pos++; +} + +#define genradix_iter_advance(_iter, _radix) \ + __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) + +static inline void __genradix_iter_rewind(struct genradix_iter *iter, + size_t obj_size) +{ + if (iter->offset == 0 || + iter->offset == SIZE_MAX) { + iter->offset = SIZE_MAX; + return; + } + + if ((iter->offset & (PAGE_SIZE - 1)) == 0) + iter->offset -= PAGE_SIZE % obj_size; + + iter->offset -= obj_size; + iter->pos--; +} + +#define genradix_iter_rewind(_iter, _radix) \ + __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) + +#define genradix_for_each_from(_radix, _iter, _p, _start) \ + for (_iter = genradix_iter_init(_radix, _start); \ + (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ + genradix_iter_advance(&_iter, _radix)) + +/** + * genradix_for_each - iterate over entry in a genradix + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each(_radix, _iter, _p) \ + genradix_for_each_from(_radix, _iter, _p, 0) + +#define genradix_last_pos(_radix) \ + (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1) + +/** + * genradix_for_each_reverse - iterate over entry in a genradix, reverse order + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each_reverse(_radix, _iter, _p) \ + for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ + (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ + genradix_iter_rewind(&_iter, _radix)) + +int __genradix_prealloc(struct __genradix *, size_t, gfp_t); + +/** + * genradix_prealloc - preallocate entries in a generic radix tree + * @_radix: genradix to preallocate + * @_nr: number of entries to preallocate + * @_gfp: gfp mask + * + * Returns 0 on success, -ENOMEM on failure + */ +#define genradix_prealloc(_radix, _nr, _gfp) \ + __genradix_prealloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _nr + 1),\ + _gfp) + + +#endif /* _LINUX_GENERIC_RADIX_TREE_H */ diff --git a/c_src/include/linux/genhd.h b/c_src/include/linux/genhd.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/linux/genhd.h diff --git a/c_src/include/linux/gfp.h b/c_src/include/linux/gfp.h new file mode 100644 index 00000000..3830bc2f --- /dev/null +++ b/c_src/include/linux/gfp.h @@ -0,0 +1 @@ +#include <linux/slab.h> diff --git a/c_src/include/linux/hash.h b/c_src/include/linux/hash.h new file mode 100644 index 00000000..ad6fa21d --- /dev/null +++ b/c_src/include/linux/hash.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_HASH_H +#define _LINUX_HASH_H +/* Fast hashing routine for ints, longs and pointers. + (C) 2002 Nadia Yvette Chambers, IBM */ + +#include <asm/types.h> +#include <linux/compiler.h> + +/* + * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and + * fs/inode.c. It's not actually prime any more (the previous primes + * were actively bad for hashing), but the name remains. + */ +#if BITS_PER_LONG == 32 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 +#define hash_long(val, bits) hash_32(val, bits) +#elif BITS_PER_LONG == 64 +#define hash_long(val, bits) hash_64(val, bits) +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 +#else +#error Wordsize not 32 or 64 +#endif + +/* + * This hash multiplies the input by a large odd number and takes the + * high bits. Since multiplication propagates changes to the most + * significant end only, it is essential that the high bits of the + * product be used for the hash value. + * + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * + * Although a random odd number will do, it turns out that the golden + * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice + * properties. (See Knuth vol 3, section 6.4, exercise 9.) + * + * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, + * which is very slightly easier to multiply by and makes no + * difference to the hash distribution. + */ +#define GOLDEN_RATIO_32 0x61C88647 +#define GOLDEN_RATIO_64 0x61C8864680B583EBull + +#ifdef CONFIG_HAVE_ARCH_HASH +/* This header may use the GOLDEN_RATIO_xx constants */ +#include <asm/hash.h> +#endif + +/* + * The _generic versions exist only so lib/test_hash.c can compare + * the arch-optimized versions with the generic. + * + * Note that if you change these, any <asm/hash.h> that aren't updated + * to match need to have their HAVE_ARCH_* define values updated so the + * self-test will not false-positive. + */ +#ifndef HAVE_ARCH__HASH_32 +#define __hash_32 __hash_32_generic +#endif +static inline u32 __hash_32_generic(u32 val) +{ + return val * GOLDEN_RATIO_32; +} + +#ifndef HAVE_ARCH_HASH_32 +#define hash_32 hash_32_generic +#endif +static inline u32 hash_32_generic(u32 val, unsigned int bits) +{ + /* High bits are more random, so use them. */ + return __hash_32(val) >> (32 - bits); +} + +#ifndef HAVE_ARCH_HASH_64 +#define hash_64 hash_64_generic +#endif +static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) +{ +#if BITS_PER_LONG == 64 + /* 64x64-bit multiply is efficient on all 64-bit processors */ + return val * GOLDEN_RATIO_64 >> (64 - bits); +#else + /* Hash 64 bits using only 32x32-bit multiply. */ + return hash_32((u32)val ^ __hash_32(val >> 32), bits); +#endif +} + +static inline u32 hash_ptr(const void *ptr, unsigned int bits) +{ + return hash_long((unsigned long)ptr, bits); +} + +/* This really should be called fold32_ptr; it does no hashing to speak of. */ +static inline u32 hash32_ptr(const void *ptr) +{ + unsigned long val = (unsigned long)ptr; + +#if BITS_PER_LONG == 64 + val ^= (val >> 32); +#endif + return (u32)val; +} + +#endif /* _LINUX_HASH_H */ diff --git a/c_src/include/linux/idr.h b/c_src/include/linux/idr.h new file mode 100644 index 00000000..6f928254 --- /dev/null +++ b/c_src/include/linux/idr.h @@ -0,0 +1,208 @@ +/* + * include/linux/idr.h + * + * 2002-10-18 written by Jim Houston jim.houston@ccur.com + * Copyright (C) 2002 by Concurrent Computer Corporation + * Distributed under the GNU GPL license version 2. + * + * Small id to pointer translation service avoiding fixed sized + * tables. + */ + +#ifndef __IDR_H__ +#define __IDR_H__ + +#include <linux/types.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/preempt.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> + +/* + * We want shallower trees and thus more bits covered at each layer. 8 + * bits gives us large enough first layer for most use cases and maximum + * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and + * 1k on 32bit. + */ +#define IDR_BITS 8 +#define IDR_SIZE (1 << IDR_BITS) +#define IDR_MASK ((1 << IDR_BITS)-1) + +struct idr_layer { + int prefix; /* the ID prefix of this idr_layer */ + int layer; /* distance from leaf */ + struct idr_layer __rcu *ary[1<<IDR_BITS]; + int count; /* When zero, we can release it */ + union { + /* A zero bit means "space here" */ + DECLARE_BITMAP(bitmap, IDR_SIZE); + struct rcu_head rcu_head; + }; +}; + +struct idr { + struct idr_layer __rcu *hint; /* the last layer allocated from */ + struct idr_layer __rcu *top; + int layers; /* only valid w/o concurrent changes */ + int cur; /* current pos for cyclic allocation */ + spinlock_t lock; + int id_free_cnt; + struct idr_layer *id_free; +}; + +#define IDR_INIT(name) \ +{ \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ +} +#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) + +/** + * DOC: idr sync + * idr synchronization (stolen from radix-tree.h) + * + * idr_find() is able to be called locklessly, using RCU. The caller must + * ensure calls to this function are made within rcu_read_lock() regions. + * Other readers (lock-free or otherwise) and modifications may be running + * concurrently. + * + * It is still required that the caller manage the synchronization and + * lifetimes of the items. So if RCU lock-free lookups are used, typically + * this would mean that the items have their own locks, or are amenable to + * lock-free access; and that the items are freed by RCU (or only freed after + * having been deleted from the idr tree *and* a synchronize_rcu() grace + * period). + */ + +/* + * This is what we export. + */ + +void *idr_find_slowpath(struct idr *idp, int id); +void idr_preload(gfp_t gfp_mask); + +static inline int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask) +{ + return 0; +} + +static inline void idr_remove(struct idr *idp, int id) {} + +int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask); +int idr_for_each(struct idr *idp, + int (*fn)(int id, void *p, void *data), void *data); +void *idr_get_next(struct idr *idp, int *nextid); +void *idr_replace(struct idr *idp, void *ptr, int id); +void idr_destroy(struct idr *idp); +void idr_init(struct idr *idp); +bool idr_is_empty(struct idr *idp); + +/** + * idr_preload_end - end preload section started with idr_preload() + * + * Each idr_preload() should be matched with an invocation of this + * function. See idr_preload() for details. + */ +static inline void idr_preload_end(void) +{ + preempt_enable(); +} + +/** + * idr_find - return pointer for given id + * @idr: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +static inline void *idr_find(struct idr *idr, int id) +{ + struct idr_layer *hint = rcu_dereference_raw(idr->hint); + + if (hint && (id & ~IDR_MASK) == hint->prefix) + return rcu_dereference_raw(hint->ary[id & IDR_MASK]); + + return idr_find_slowpath(idr, id); +} + +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + * + * @entry and @id do not need to be initialized before the loop, and + * after normal terminatinon @entry is left with the value NULL. This + * is convenient for a "not found" value. + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) + +/** + * idr_for_each_entry - continue iteration over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define idr_for_each_entry_continue(idp, entry, id) \ + for ((entry) = idr_get_next((idp), &(id)); \ + entry; \ + ++id, (entry) = idr_get_next((idp), &(id))) + +/* + * IDA - IDR based id allocator, use when translation from id to + * pointer isn't necessary. + * + * IDA_BITMAP_LONGS is calculated to be one less to accommodate + * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes. + */ +#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ +#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1) +#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) + +struct ida_bitmap { + long nr_busy; + unsigned long bitmap[IDA_BITMAP_LONGS]; +}; + +struct ida { + struct idr idr; + struct ida_bitmap *free_bitmap; +}; + +#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } +#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) + +int ida_pre_get(struct ida *ida, gfp_t gfp_mask); +int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); +void ida_remove(struct ida *ida, int id); +void ida_destroy(struct ida *ida); +void ida_init(struct ida *ida); + +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask); +void ida_simple_remove(struct ida *ida, unsigned int id); + +/** + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Simple wrapper around ida_get_new_above() w/ @starting_id of zero. + */ +static inline int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} + +void __init idr_init_cache(void); + +#endif /* __IDR_H__ */ diff --git a/c_src/include/linux/ioprio.h b/c_src/include/linux/ioprio.h new file mode 100644 index 00000000..822c64a2 --- /dev/null +++ b/c_src/include/linux/ioprio.h @@ -0,0 +1,46 @@ +#ifndef IOPRIO_H +#define IOPRIO_H + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_BITS (16) +#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +/* + * Fallback BE priority + */ +#define IOPRIO_NORM (4) + +#endif diff --git a/c_src/include/linux/jhash.h b/c_src/include/linux/jhash.h new file mode 100644 index 00000000..348c6f47 --- /dev/null +++ b/c_src/include/linux/jhash.h @@ -0,0 +1,175 @@ +#ifndef _LINUX_JHASH_H +#define _LINUX_JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. + * + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are my fault. + * Jozsef + */ +#include <linux/bitops.h> +#include <linux/unaligned/packed_struct.h> + +/* Best hash sizes are of power of two */ +#define jhash_size(n) ((u32)1<<(n)) +/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ +#define jhash_mask(n) (jhash_size(n)-1) + +/* __jhash_mix -- mix 3 32-bit values reversibly. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +/* An arbitrary initial parameter */ +#define JHASH_INITVAL 0xdeadbeef + +/* jhash - hash an arbitrary key + * @k: sequence of bytes as key + * @length: the length of the key + * @initval: the previous hash, or an arbitray value + * + * The generic version, hashes an arbitrary sequence of bytes. + * No alignment or length assumptions are made about the input key. + * + * Returns the hash value of the key. The result depends on endianness. + */ +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const u8 *k = key; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + length + initval; + + /* All but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += __get_unaligned_cpu32(k); + b += __get_unaligned_cpu32(k + 4); + c += __get_unaligned_cpu32(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + /* Last block: affect all 32 bits of (c) */ + /* All the case statements fall through */ + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +/* jhash2 - hash an array of u32's + * @k: the key which must be an array of u32's + * @length: the number of u32's in the key + * @initval: the previous hash, or an arbitray value + * + * Returns the hash value of the key. + */ +static inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + (length<<2) + initval; + + /* Handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + length -= 3; + k += 3; + } + + /* Handle the last 3 u32's: all the case statements fall through */ + switch (length) { + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + + +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + + __jhash_final(a, b, c); + + return c; +} + +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); +} + +#endif /* _LINUX_JHASH_H */ diff --git a/c_src/include/linux/jiffies.h b/c_src/include/linux/jiffies.h new file mode 100644 index 00000000..d16ea76f --- /dev/null +++ b/c_src/include/linux/jiffies.h @@ -0,0 +1,91 @@ +#ifndef _LINUX_JIFFIES_H +#define _LINUX_JIFFIES_H + +#include <time.h> +#include <linux/kernel.h> +#include <linux/time64.h> +#include <linux/typecheck.h> +#include <linux/types.h> + +#define time_after(a,b) \ + (typecheck(unsigned long, a) && \ + typecheck(unsigned long, b) && \ + ((long)((b) - (a)) < 0)) +#define time_before(a,b) time_after(b,a) + +#define time_after_eq(a,b) \ + (typecheck(unsigned long, a) && \ + typecheck(unsigned long, b) && \ + ((long)((a) - (b)) >= 0)) +#define time_before_eq(a,b) time_after_eq(b,a) + +#define time_in_range(a,b,c) \ + (time_after_eq(a,b) && \ + time_before_eq(a,c)) + +#define time_in_range_open(a,b,c) \ + (time_after_eq(a,b) && \ + time_before(a,c)) + +#define time_after64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)((b) - (a)) < 0)) +#define time_before64(a,b) time_after64(b,a) + +#define time_after_eq64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)((a) - (b)) >= 0)) +#define time_before_eq64(a,b) time_after_eq64(b,a) + +#define time_in_range64(a, b, c) \ + (time_after_eq64(a, b) && \ + time_before_eq64(a, c)) + +#define time_is_before_jiffies(a) time_after(jiffies, a) + +#define HZ 1000 + +static inline u64 jiffies_to_nsecs(const unsigned long j) +{ + return (u64)j * NSEC_PER_MSEC; +} + +static inline unsigned jiffies_to_msecs(const unsigned long j) +{ + return j; +} + +static inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + return m; +} + +static inline unsigned long nsecs_to_jiffies(u64 n) +{ + return n / NSEC_PER_MSEC; +} + +static inline u64 sched_clock(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + + return ((s64) ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +static inline u64 local_clock(void) +{ + return sched_clock(); +} + +static inline u64 ktime_get_ns(void) +{ + return sched_clock(); +} + +#define jiffies nsecs_to_jiffies(sched_clock()) + +#endif diff --git a/c_src/include/linux/kernel.h b/c_src/include/linux/kernel.h new file mode 100644 index 00000000..ef0b1a7d --- /dev/null +++ b/c_src/include/linux/kernel.h @@ -0,0 +1,267 @@ +#ifndef __TOOLS_LINUX_KERNEL_H +#define __TOOLS_LINUX_KERNEL_H + +#include <assert.h> +#include <errno.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <limits.h> + +#include <linux/bug.h> +#include <linux/byteorder.h> +#include <linux/compiler.h> +#include <linux/math.h> +#include <linux/minmax.h> + +#define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) + +#define __ARG_PLACEHOLDER_1 0, +#define __take_second_arg(__ignored, val, ...) val + +#define __and(x, y) ___and(x, y) +#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y) +#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0) + +#define __or(x, y) ___or(x, y) +#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y) +#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y) + +#define __is_defined(x) ___is_defined(x) +#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) +#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) + +/* + * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 + * otherwise. For boolean options, this is equivalent to + * IS_ENABLED(CONFIG_FOO). + */ +#define IS_BUILTIN(option) __is_defined(option) + +/* + * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 + * otherwise. + */ +#define IS_MODULE(option) __is_defined(option##_MODULE) + +/* + * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled + * code can call a function defined in code compiled based on CONFIG_FOO. + * This is similar to IS_ENABLED(), but returns false when invoked from + * built-in code when CONFIG_FOO is set to 'm'. + */ +#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \ + __and(IS_MODULE(option), __is_defined(MODULE))) + +/* + * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', + * 0 otherwise. + */ +#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) +#define EXPORT_SYMBOL(sym) + +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) + +#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) +#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) + +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#ifndef __struct_group +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } +#endif + +#define struct_group(NAME, MEMBERS...) \ + __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + +#define might_sleep() + +#define cpu_relax() barrier() +#define cpu_relax_lowlatency() barrier() + +#define panic(fmt, ...) \ +do { \ + printf(fmt, ##__VA_ARGS__); \ + BUG(); \ +} while (0) + +int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); +int __must_check _kstrtol(const char *s, unsigned int base, long *res); + +int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); +int __must_check kstrtoll(const char *s, unsigned int base, long long *res); + +/** + * kstrtoul - convert a string to an unsigned long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. +*/ +static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. + */ + if (sizeof(unsigned long) == sizeof(unsigned long long) && + __alignof__(unsigned long) == __alignof__(unsigned long long)) + return kstrtoull(s, base, (unsigned long long *)res); + else + return _kstrtoul(s, base, res); +} + +/** + * kstrtol - convert a string to a long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ +static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(long, long long) = 0. + */ + if (sizeof(long) == sizeof(long long) && + __alignof__(long) == __alignof__(long long)) + return kstrtoll(s, base, (long long *)res); + else + return _kstrtol(s, base, res); +} + +int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); +int __must_check kstrtoint(const char *s, unsigned int base, int *res); + +static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) +{ + return kstrtoull(s, base, res); +} + +static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) +{ + return kstrtoll(s, base, res); +} + +static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) +{ + return kstrtouint(s, base, res); +} + +static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) +{ + return kstrtoint(s, base, res); +} + +struct printbuf; +extern void prt_u64(struct printbuf *out, u64 num); + +extern __printf(2, 0) void prt_vprintf(struct printbuf *out, const char *fmt, va_list args); +extern __printf(2, 3) void prt_printf(struct printbuf *out, const char *fmt, ...); + +static const char hex_asc[] = "0123456789abcdef"; +#define hex_asc_lo(x) hex_asc[((x) & 0x0f)] +#define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] +static const char hex_asc_upper[] = "0123456789ABCDEF"; +#define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)] +#define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4] + +/* The hash is always the low bits of hash_len */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define HASH_LEN_DECLARE u32 hash; u32 len +#else + #define HASH_LEN_DECLARE u32 len; u32 hash +#endif + +struct qstr { + union { + struct { + HASH_LEN_DECLARE; + }; + u64 hash_len; + }; + const unsigned char *name; +}; + +#define QSTR_INIT(n,l) { { { .len = l } }, .name = n } + +#define POISON_FREE 0x6b + +static inline void dump_stack(void) {} + +#define unsafe_memcpy(dst, src, bytes, justification) \ + memcpy(dst, src, bytes) + +#ifdef __DECLARE_FLEX_ARRAY +#define DECLARE_FLEX_ARRAY(TYPE, NAME) __DECLARE_FLEX_ARRAY(TYPE, NAME) +#else +#define DECLARE_FLEX_ARRAY(T, member) T member[0] +#endif + +#endif diff --git a/c_src/include/linux/key.h b/c_src/include/linux/key.h new file mode 100644 index 00000000..cc6859a9 --- /dev/null +++ b/c_src/include/linux/key.h @@ -0,0 +1,42 @@ +#ifndef _LINUX_KEY_H +#define _LINUX_KEY_H + +#include <linux/types.h> +#include <linux/atomic.h> +#include <keyutils.h> + +struct user_key_payload { + size_t datalen; /* length of this data */ + char data[0]; /* actual data */ +}; + +struct key { + atomic_t usage; /* number of references */ + key_serial_t serial; /* key serial number */ + struct rw_semaphore sem; /* change vs change sem */ + struct user_key_payload payload; +}; + +static inline const struct user_key_payload *user_key_payload(const struct key *key) +{ + return &key->payload; +} + +static inline void key_put(struct key *key) +{ + if (atomic_dec_and_test(&key->usage)) + free(key); +} + +static inline struct key *__key_get(struct key *key) +{ + atomic_inc(&key->usage); + return key; +} + +static inline struct key *key_get(struct key *key) +{ + return key ? __key_get(key) : key; +} + +#endif /* _LINUX_KEY_H */ diff --git a/c_src/include/linux/kmemleak.h b/c_src/include/linux/kmemleak.h new file mode 100644 index 00000000..6a3cd1bf --- /dev/null +++ b/c_src/include/linux/kmemleak.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * include/linux/kmemleak.h + * + * Copyright (C) 2008 ARM Limited + * Written by Catalin Marinas <catalin.marinas@arm.com> + */ + +#ifndef __KMEMLEAK_H +#define __KMEMLEAK_H + +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#ifdef CONFIG_DEBUG_KMEMLEAK + +extern void kmemleak_init(void) __init; +extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) __ref; +extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) __ref; +extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) __ref; +extern void kmemleak_free(const void *ptr) __ref; +extern void kmemleak_free_part(const void *ptr, size_t size) __ref; +extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; +extern void kmemleak_update_trace(const void *ptr) __ref; +extern void kmemleak_not_leak(const void *ptr) __ref; +extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; +extern void kmemleak_no_scan(const void *ptr) __ref; +extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, + gfp_t gfp) __ref; +extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref; +extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; + +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, slab_flags_t flags, + gfp_t gfp) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_alloc(ptr, size, min_count, gfp); +} + +static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_free(ptr); +} + +static inline void kmemleak_erase(void **ptr) +{ + *ptr = NULL; +} + +#else + +static inline void kmemleak_init(void) +{ +} +static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, slab_flags_t flags, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) +{ +} +static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) +{ +} +static inline void kmemleak_free(const void *ptr) +{ +} +static inline void kmemleak_free_part(const void *ptr, size_t size) +{ +} +static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) +{ +} +static inline void kmemleak_free_percpu(const void __percpu *ptr) +{ +} +static inline void kmemleak_update_trace(const void *ptr) +{ +} +static inline void kmemleak_not_leak(const void *ptr) +{ +} +static inline void kmemleak_ignore(const void *ptr) +{ +} +static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) +{ +} +static inline void kmemleak_erase(void **ptr) +{ +} +static inline void kmemleak_no_scan(const void *ptr) +{ +} +static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, + gfp_t gfp) +{ +} +static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) +{ +} +static inline void kmemleak_ignore_phys(phys_addr_t phys) +{ +} + +#endif /* CONFIG_DEBUG_KMEMLEAK */ + +#endif /* __KMEMLEAK_H */ diff --git a/c_src/include/linux/kobject.h b/c_src/include/linux/kobject.h new file mode 100644 index 00000000..c33b2126 --- /dev/null +++ b/c_src/include/linux/kobject.h @@ -0,0 +1,129 @@ +/* + * kobject.h - generic kernel object infrastructure. + * + * Copyright (c) 2002-2003 Patrick Mochel + * Copyright (c) 2002-2003 Open Source Development Labs + * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (c) 2006-2008 Novell Inc. + * + * This file is released under the GPLv2. + * + * Please read Documentation/kobject.txt before using the kobject + * interface, ESPECIALLY the parts about reference counts and object + * destructors. + */ + +#ifndef _KOBJECT_H_ +#define _KOBJECT_H_ + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/sysfs.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct kset; + +struct kobj_type { + void (*release)(struct kobject *kobj); + const struct sysfs_ops *sysfs_ops; + const struct attribute_group **default_groups; + const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); + const void *(*namespace)(struct kobject *kobj); +}; + +struct kobj_uevent_env { +}; + +struct kobj_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count); +}; + +struct kobject { + struct kobject *parent; + struct kset *kset; + const struct kobj_type *ktype; + struct kernfs_node *sd; /* sysfs directory entry */ + atomic_t ref; + unsigned int state_initialized:1; + unsigned int state_in_sysfs:1; + unsigned int state_add_uevent_sent:1; + unsigned int state_remove_uevent_sent:1; + unsigned int uevent_suppress:1; +}; + +struct kset { + struct kobject kobj; +}; + +#define kobject_add(...) 0 + +static inline void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) +{ + memset(kobj, 0, sizeof(*kobj)); + + atomic_set(&kobj->ref, 1); + kobj->ktype = ktype; + kobj->state_initialized = 1; +} + +static inline void kobject_del(struct kobject *kobj); + +static inline void kobject_cleanup(struct kobject *kobj) +{ + const struct kobj_type *t = kobj->ktype; + + /* remove from sysfs if the caller did not do it */ + if (kobj->state_in_sysfs) + kobject_del(kobj); + + if (t && t->release) + t->release(kobj); +} + +static inline void kobject_put(struct kobject *kobj) +{ + BUG_ON(!kobj); + BUG_ON(!kobj->state_initialized); + + if (atomic_dec_and_test(&kobj->ref)) + kobject_cleanup(kobj); +} + +static inline void kobject_del(struct kobject *kobj) +{ + if (!kobj) + return; + + kobj->state_in_sysfs = 0; +#if 0 + kobj_kset_leave(kobj); +#endif + kobject_put(kobj->parent); + kobj->parent = NULL; +} + +static inline struct kobject *kobject_get(struct kobject *kobj) +{ + BUG_ON(!kobj); + BUG_ON(!kobj->state_initialized); + + atomic_inc(&kobj->ref); + return kobj; +} + +static inline void kset_unregister(struct kset *kset) +{ + kfree(kset); +} + +#define kset_create_and_add(_name, _u, _parent) \ + ((struct kset *) kzalloc(sizeof(struct kset), GFP_KERNEL)) + +#endif /* _KOBJECT_H_ */ diff --git a/c_src/include/linux/kthread.h b/c_src/include/linux/kthread.h new file mode 100644 index 00000000..3a8cf108 --- /dev/null +++ b/c_src/include/linux/kthread.h @@ -0,0 +1,118 @@ +#ifndef _LINUX_KTHREAD_H +#define _LINUX_KTHREAD_H + +/* Simple interface for creating and stopping kernel threads without mess. */ +#include <linux/err.h> +#include <linux/lockdep.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +__printf(3, 4) +struct task_struct *kthread_create(int (*threadfn)(void *data), + void *data, + const char namefmt[], ...); + + +struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), + void *data, + unsigned int cpu, + const char *namefmt); + +/** + * kthread_run - create and wake a thread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @namefmt: printf-style name for the thread. + * + * Description: Convenient wrapper for kthread_create() followed by + * wake_up_process(). Returns the kthread or ERR_PTR(-ENOMEM). + */ +#define kthread_run(threadfn, data, namefmt, ...) \ +({ \ + struct task_struct *__k \ + = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__k)) \ + wake_up_process(__k); \ + __k; \ +}) + +int kthread_stop(struct task_struct *k); +bool kthread_should_stop(void); +bool kthread_should_park(void); +bool kthread_freezable_should_stop(bool *was_frozen); +void *kthread_data(struct task_struct *k); +void *probe_kthread_data(struct task_struct *k); +int kthread_park(struct task_struct *k); +void kthread_unpark(struct task_struct *k); +void kthread_parkme(void); + +int kthreadd(void *unused); +extern struct task_struct *kthreadd_task; +extern int tsk_fork_get_node(struct task_struct *tsk); + +/* + * Simple work processor based on kthread. + * + * This provides easier way to make use of kthreads. A kthread_work + * can be queued and flushed using queue/flush_kthread_work() + * respectively. Queued kthread_works are processed by a kthread + * running kthread_worker_fn(). + */ +struct kthread_work; +typedef void (*kthread_work_func_t)(struct kthread_work *work); + +struct kthread_worker { + spinlock_t lock; + struct list_head work_list; + struct task_struct *task; + struct kthread_work *current_work; +}; + +struct kthread_work { + struct list_head node; + kthread_work_func_t func; + struct kthread_worker *worker; +}; + +#define KTHREAD_WORKER_INIT(worker) { \ + .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \ + .work_list = LIST_HEAD_INIT((worker).work_list), \ + } + +#define KTHREAD_WORK_INIT(work, fn) { \ + .node = LIST_HEAD_INIT((work).node), \ + .func = (fn), \ + } + +#define DEFINE_KTHREAD_WORKER(worker) \ + struct kthread_worker worker = KTHREAD_WORKER_INIT(worker) + +#define DEFINE_KTHREAD_WORK(work, fn) \ + struct kthread_work work = KTHREAD_WORK_INIT(work, fn) + +#define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) + +extern void __init_kthread_worker(struct kthread_worker *worker, + const char *name, struct lock_class_key *key); + +#define init_kthread_worker(worker) \ + do { \ + static struct lock_class_key __key; \ + __init_kthread_worker((worker), "("#worker")->lock", &__key); \ + } while (0) + +#define init_kthread_work(work, fn) \ + do { \ + memset((work), 0, sizeof(struct kthread_work)); \ + INIT_LIST_HEAD(&(work)->node); \ + (work)->func = (fn); \ + } while (0) + +int kthread_worker_fn(void *worker_ptr); + +bool queue_kthread_work(struct kthread_worker *worker, + struct kthread_work *work); +void flush_kthread_work(struct kthread_work *work); +void flush_kthread_worker(struct kthread_worker *worker); + +#endif /* _LINUX_KTHREAD_H */ diff --git a/c_src/include/linux/list.h b/c_src/include/linux/list.h new file mode 100644 index 00000000..d176d0d3 --- /dev/null +++ b/c_src/include/linux/list.h @@ -0,0 +1,112 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +#include <urcu/list.h> + +#define list_head cds_list_head +#define LIST_HEAD_INIT(l) CDS_LIST_HEAD_INIT(l) +#define LIST_HEAD(l) CDS_LIST_HEAD(l) +#define INIT_LIST_HEAD(l) CDS_INIT_LIST_HEAD(l) +#define list_add(n, h) cds_list_add(n, h) +#define list_add_tail(n, h) cds_list_add_tail(n, h) +#define __list_del_entry(l) cds_list_del(l) +#define __list_del(p, n) __cds_list_del(p, n) +#define list_del(l) cds_list_del(l) +#define list_del_init(l) cds_list_del_init(l) +#define list_replace(o, n) cds_list_replace(o, n) +#define list_replace_init(o, n) cds_list_replace_init(o, n) +#define list_move(l, h) cds_list_move(l, h) +#define list_empty(l) cds_list_empty(l) +#define list_splice(l, h) cds_list_splice(l, h) +#define list_entry(p, t, m) cds_list_entry(p, t, m) +#define list_first_entry(p, t, m) cds_list_first_entry(p, t, m) +#define list_for_each(p, h) cds_list_for_each(p, h) +#define list_for_each_prev(p, h) cds_list_for_each_prev(p, h) +#define list_for_each_safe(p, n, h) cds_list_for_each_safe(p, n, h) +#define list_for_each_prev_safe(p, n, h) cds_list_for_each_prev_safe(p, n, h) +#define list_for_each_entry(p, h, m) cds_list_for_each_entry(p, h, m) +#define list_for_each_entry_reverse(p, h, m) cds_list_for_each_entry_reverse(p, h, m) +#define list_for_each_entry_safe(p, n, h, m) cds_list_for_each_entry_safe(p, n, h, m) + +static inline int list_empty_careful(const struct list_head *head) +{ + struct list_head *next = head->next; + return (next == head) && (next == head->prev); +} + +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + list_del(list); + list_add_tail(list, head); +} + +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + list_splice(list, head); + INIT_LIST_HEAD(list); +} + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define list_first_entry_or_null(ptr, type, member) \ + (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) + +#define list_prev_entry(pos, member) \ + list_entry((pos)->member.prev, typeof(*(pos)), member) + +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_last_entry(head, typeof(*pos), member), \ + n = list_prev_entry(pos, member); \ + &pos->member != (head); \ + pos = n, n = list_prev_entry(n, member)) + +/* hlists: */ + +#include <urcu/hlist.h> + +#define hlist_head cds_hlist_head +#define hlist_node cds_hlist_node + +#define hlist_add_head(n, h) cds_hlist_add_head(n, h) +#define hlist_del(n) cds_hlist_del(n) +#define hlist_del_init(n) cds_hlist_del_init(n) + +static inline int hlist_unhashed(const struct hlist_node *h) +{ + return !h->prev; +} + +static inline void hlist_del_init(struct hlist_node *n) +{ + hlist_del(n); + n->prev = NULL; + n->next = NULL; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? hlist_entry(____ptr, type, member) : NULL; \ + }) + +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->next, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) + +static inline size_t list_count_nodes(struct list_head *head) +{ + struct list_head *pos; + size_t count = 0; + + list_for_each(pos, head) + count++; + + return count; +} + +#endif /* _LIST_LIST_H */ diff --git a/c_src/include/linux/list_nulls.h b/c_src/include/linux/list_nulls.h new file mode 100644 index 00000000..fa6e8471 --- /dev/null +++ b/c_src/include/linux/list_nulls.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LIST_NULLS_H +#define _LINUX_LIST_NULLS_H + +#include <linux/poison.h> +#include <linux/const.h> + +/* + * Special version of lists, where end of list is not a NULL pointer, + * but a 'nulls' marker, which can have many different values. + * (up to 2^31 different values guaranteed on all platforms) + * + * In the standard hlist, termination of a list is the NULL pointer. + * In this special 'nulls' variant, we use the fact that objects stored in + * a list are aligned on a word (4 or 8 bytes alignment). + * We therefore use the last significant bit of 'ptr' : + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) + * Set to 0 : This is a pointer to some object (ptr) + */ + +struct hlist_nulls_head { + struct hlist_nulls_node *first; +}; + +struct hlist_nulls_node { + struct hlist_nulls_node *next, **pprev; +}; +#define NULLS_MARKER(value) (1UL | (((long)value) << 1)) +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ + ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) + +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_nulls_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ + }) +/** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested + * + */ +static inline int is_a_nulls(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr & 1); +} + +/** + * get_nulls_value - Get the 'nulls' value of the end of chain + * @ptr: end of chain + * + * Should be called only if is_a_nulls(ptr); + */ +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr) >> 1; +} + +/** + * hlist_nulls_unhashed - Has node been removed and reinitialized? + * @h: Node to be checked + * + * Not that not all removal functions will leave a node in unhashed state. + * For example, hlist_del_init_rcu() leaves the node in unhashed state, + * but hlist_nulls_del() does not. + */ +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) +{ + return !h->pprev; +} + +/** + * hlist_nulls_unhashed_lockless - Has node been removed and reinitialized? + * @h: Node to be checked + * + * Not that not all removal functions will leave a node in unhashed state. + * For example, hlist_del_init_rcu() leaves the node in unhashed state, + * but hlist_nulls_del() does not. Unlike hlist_nulls_unhashed(), this + * function may be used locklessly. + */ +static inline int hlist_nulls_unhashed_lockless(const struct hlist_nulls_node *h) +{ + return !READ_ONCE(h->pprev); +} + +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) +{ + return is_a_nulls(READ_ONCE(h->first)); +} + +static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *first = h->first; + + n->next = first; + WRITE_ONCE(n->pprev, &h->first); + h->first = n; + if (!is_a_nulls(first)) + WRITE_ONCE(first->pprev, &n->next); +} + +static inline void __hlist_nulls_del(struct hlist_nulls_node *n) +{ + struct hlist_nulls_node *next = n->next; + struct hlist_nulls_node **pprev = n->pprev; + + WRITE_ONCE(*pprev, next); + if (!is_a_nulls(next)) + WRITE_ONCE(next->pprev, pprev); +} + +static inline void hlist_nulls_del(struct hlist_nulls_node *n) +{ + __hlist_nulls_del(n); + WRITE_ONCE(n->pprev, LIST_POISON2); +} + +/** + * hlist_nulls_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \ + for (; (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#endif diff --git a/c_src/include/linux/llist.h b/c_src/include/linux/llist.h new file mode 100644 index 00000000..2e9c7215 --- /dev/null +++ b/c_src/include/linux/llist.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef LLIST_H +#define LLIST_H +/* + * Lock-less NULL terminated single linked list + * + * Cases where locking is not needed: + * If there are multiple producers and multiple consumers, llist_add can be + * used in producers and llist_del_all can be used in consumers simultaneously + * without locking. Also a single consumer can use llist_del_first while + * multiple producers simultaneously use llist_add, without any locking. + * + * Cases where locking is needed: + * If we have multiple consumers with llist_del_first used in one consumer, and + * llist_del_first or llist_del_all used in other consumers, then a lock is + * needed. This is because llist_del_first depends on list->first->next not + * changing, but without lock protection, there's no way to be sure about that + * if a preemption happens in the middle of the delete operation and on being + * preempted back, the list->first is the same as before causing the cmpxchg in + * llist_del_first to succeed. For example, while a llist_del_first operation + * is in progress in one consumer, then a llist_del_first, llist_add, + * llist_add (or llist_del_all, llist_add, llist_add) sequence in another + * consumer may cause violations. + * + * This can be summarized as follows: + * + * | add | del_first | del_all + * add | - | - | - + * del_first | | L | L + * del_all | | | - + * + * Where, a particular row's operation can happen concurrently with a column's + * operation, with "-" being no lock needed, while "L" being lock is needed. + * + * The list entries deleted via llist_del_all can be traversed with + * traversing function such as llist_for_each etc. But the list + * entries can not be traversed safely before deleted from the list. + * The order of deleted entries is from the newest to the oldest added + * one. If you want to traverse from the oldest to the newest, you + * must reverse the order by yourself before traversing. + * + * The basic atomic operation of this list is cmpxchg on long. On + * architectures that don't have NMI-safe cmpxchg implementation, the + * list can NOT be used in NMI handlers. So code that uses the list in + * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/atomic.h> +#include <linux/kernel.h> + +struct llist_head { + struct llist_node *first; +}; + +struct llist_node { + struct llist_node *next; +}; + +#define LLIST_HEAD_INIT(name) { NULL } +#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) + +/** + * init_llist_head - initialize lock-less list head + * @head: the head for your lock-less list + */ +static inline void init_llist_head(struct llist_head *list) +{ + list->first = NULL; +} + +/** + * llist_entry - get the struct of this entry + * @ptr: the &struct llist_node pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the llist_node within the struct. + */ +#define llist_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * member_address_is_nonnull - check whether the member address is not NULL + * @ptr: the object pointer (struct type * that contains the llist_node) + * @member: the name of the llist_node within the struct. + * + * This macro is conceptually the same as + * &ptr->member != NULL + * but it works around the fact that compilers can decide that taking a member + * address is never a NULL pointer. + * + * Real objects that start at a high address and have a member at NULL are + * unlikely to exist, but such pointers may be returned e.g. by the + * container_of() macro. + */ +#define member_address_is_nonnull(ptr, member) \ + ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0) + +/** + * llist_for_each - iterate over some deleted entries of a lock-less list + * @pos: the &struct llist_node to use as a loop cursor + * @node: the first entry of deleted list entries + * + * In general, some entries of the lock-less list can be traversed + * safely only after being deleted from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each(pos, node) \ + for ((pos) = (node); pos; (pos) = (pos)->next) + +/** + * llist_for_each_safe - iterate over some deleted entries of a lock-less list + * safe against removal of list entry + * @pos: the &struct llist_node to use as a loop cursor + * @n: another &struct llist_node to use as temporary storage + * @node: the first entry of deleted list entries + * + * In general, some entries of the lock-less list can be traversed + * safely only after being deleted from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_safe(pos, n, node) \ + for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n)) + +/** + * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type + * @pos: the type * to use as a loop cursor. + * @node: the fist entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry(pos, node, member) \ + for ((pos) = llist_entry((node), typeof(*(pos)), member); \ + member_address_is_nonnull(pos, member); \ + (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) + +/** + * llist_for_each_entry_safe - iterate over some deleted entries of lock-less list of given type + * safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @node: the first entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry_safe(pos, n, node, member) \ + for (pos = llist_entry((node), typeof(*pos), member); \ + member_address_is_nonnull(pos, member) && \ + (n = llist_entry(pos->member.next, typeof(*n), member), true); \ + pos = n) + +/** + * llist_empty - tests whether a lock-less list is empty + * @head: the list to test + * + * Not guaranteed to be accurate or up to date. Just a quick way to + * test whether the list is empty without deleting something from the + * list. + */ +static inline bool llist_empty(const struct llist_head *head) +{ + return READ_ONCE(head->first) == NULL; +} + +static inline struct llist_node *llist_next(struct llist_node *node) +{ + return node->next; +} + +extern bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head); +/** + * llist_add - add a new entry + * @new: new entry to be added + * @head: the head for your lock-less list + * + * Returns true if the list was empty prior to adding this entry. + */ +static inline bool llist_add(struct llist_node *new, struct llist_head *head) +{ + return llist_add_batch(new, new, head); +} + +/** + * llist_del_all - delete all entries from lock-less list + * @head: the head of lock-less list to delete all entries + * + * If list is empty, return NULL, otherwise, delete all entries and + * return the pointer to the first entry. The order of entries + * deleted is from the newest to the oldest added one. + */ +static inline struct llist_node *llist_del_all(struct llist_head *head) +{ + return xchg(&head->first, NULL); +} + +extern struct llist_node *llist_del_first(struct llist_head *head); + +struct llist_node *llist_reverse_order(struct llist_node *head); + +#endif /* LLIST_H */ diff --git a/c_src/include/linux/lockdep.h b/c_src/include/linux/lockdep.h new file mode 100644 index 00000000..3831ef2d --- /dev/null +++ b/c_src/include/linux/lockdep.h @@ -0,0 +1,60 @@ +#ifndef __TOOLS_LINUX_LOCKDEP_H +#define __TOOLS_LINUX_LOCKDEP_H + +struct lock_class_key {}; +struct task_struct; + +# define lock_acquire(l, s, t, r, c, n, i) do { } while (0) +# define lock_release(l, i) do { } while (0) +# define lock_set_class(l, n, k, s, i) do { } while (0) +# define lock_set_subclass(l, s, i) do { } while (0) +# define lockdep_set_current_reclaim_state(g) do { } while (0) +# define lockdep_clear_current_reclaim_state() do { } while (0) +# define lockdep_trace_alloc(g) do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); (void)(name); } while (0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + do { (void)(key); } while (0) +#define lockdep_set_subclass(lock, sub) do { } while (0) + +#define lockdep_set_novalidate_class(lock) do { } while (0) + +#define lockdep_assert_held(l) do { (void)(l); } while (0) +#define lockdep_assert_held_once(l) do { (void)(l); } while (0) + +#define lock_acquire_shared(l, s, t, n, i) + +#define lockdep_acquire_shared(lock) + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) + +static inline void debug_show_all_locks(void) +{ +} + +static inline void debug_show_held_locks(struct task_struct *task) +{ +} + +static inline void +debug_check_no_locks_freed(const void *from, unsigned long len) +{ +} + +static inline void +debug_check_no_locks_held(void) +{ +} + +static inline int lock_class_is_held(struct lock_class_key *k) +{ + return 0; +} + +#endif /* __TOOLS_LINUX_LOCKDEP_H */ + diff --git a/c_src/include/linux/log2.h b/c_src/include/linux/log2.h new file mode 100644 index 00000000..f031ea12 --- /dev/null +++ b/c_src/include/linux/log2.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Integer base 2 logarithm calculation + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_LOG2_H +#define _LINUX_LOG2_H + +#include <linux/types.h> +#include <linux/bitops.h> + +/* + * non-constant log of base 2 calculators + * - the arch may override these in asm/bitops.h if they can be implemented + * more efficiently than using fls() and fls64() + * - the arch is not required to handle n==0 if implementing the fallback + */ +#ifndef CONFIG_ARCH_HAS_ILOG2_U32 +static inline __attribute__((const)) +int __ilog2_u32(u32 n) +{ + return fls(n) - 1; +} +#endif + +#ifndef CONFIG_ARCH_HAS_ILOG2_U64 +static inline __attribute__((const)) +int __ilog2_u64(u64 n) +{ + return fls64(n) - 1; +} +#endif + +/** + * is_power_of_2() - check if a value is a power of two + * @n: the value to check + * + * Determine whether some value is a power of two, where zero is + * *not* considered a power of two. + * Return: true if @n is a power of 2, otherwise false. + */ +static inline __attribute__((const)) +bool is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +/** + * __roundup_pow_of_two() - round up to nearest power of two + * @n: value to round up + */ +static inline __attribute__((const)) +unsigned long __roundup_pow_of_two(unsigned long n) +{ + return 1UL << fls_long(n - 1); +} + +/** + * __rounddown_pow_of_two() - round down to nearest power of two + * @n: value to round down + */ +static inline __attribute__((const)) +unsigned long __rounddown_pow_of_two(unsigned long n) +{ + return 1UL << (fls_long(n) - 1); +} + +/** + * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value + * @n: parameter + * + * Use this where sparse expects a true constant expression, e.g. for array + * indices. + */ +#define const_ilog2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n) < 2 ? 0 : \ + (n) & (1ULL << 63) ? 63 : \ + (n) & (1ULL << 62) ? 62 : \ + (n) & (1ULL << 61) ? 61 : \ + (n) & (1ULL << 60) ? 60 : \ + (n) & (1ULL << 59) ? 59 : \ + (n) & (1ULL << 58) ? 58 : \ + (n) & (1ULL << 57) ? 57 : \ + (n) & (1ULL << 56) ? 56 : \ + (n) & (1ULL << 55) ? 55 : \ + (n) & (1ULL << 54) ? 54 : \ + (n) & (1ULL << 53) ? 53 : \ + (n) & (1ULL << 52) ? 52 : \ + (n) & (1ULL << 51) ? 51 : \ + (n) & (1ULL << 50) ? 50 : \ + (n) & (1ULL << 49) ? 49 : \ + (n) & (1ULL << 48) ? 48 : \ + (n) & (1ULL << 47) ? 47 : \ + (n) & (1ULL << 46) ? 46 : \ + (n) & (1ULL << 45) ? 45 : \ + (n) & (1ULL << 44) ? 44 : \ + (n) & (1ULL << 43) ? 43 : \ + (n) & (1ULL << 42) ? 42 : \ + (n) & (1ULL << 41) ? 41 : \ + (n) & (1ULL << 40) ? 40 : \ + (n) & (1ULL << 39) ? 39 : \ + (n) & (1ULL << 38) ? 38 : \ + (n) & (1ULL << 37) ? 37 : \ + (n) & (1ULL << 36) ? 36 : \ + (n) & (1ULL << 35) ? 35 : \ + (n) & (1ULL << 34) ? 34 : \ + (n) & (1ULL << 33) ? 33 : \ + (n) & (1ULL << 32) ? 32 : \ + (n) & (1ULL << 31) ? 31 : \ + (n) & (1ULL << 30) ? 30 : \ + (n) & (1ULL << 29) ? 29 : \ + (n) & (1ULL << 28) ? 28 : \ + (n) & (1ULL << 27) ? 27 : \ + (n) & (1ULL << 26) ? 26 : \ + (n) & (1ULL << 25) ? 25 : \ + (n) & (1ULL << 24) ? 24 : \ + (n) & (1ULL << 23) ? 23 : \ + (n) & (1ULL << 22) ? 22 : \ + (n) & (1ULL << 21) ? 21 : \ + (n) & (1ULL << 20) ? 20 : \ + (n) & (1ULL << 19) ? 19 : \ + (n) & (1ULL << 18) ? 18 : \ + (n) & (1ULL << 17) ? 17 : \ + (n) & (1ULL << 16) ? 16 : \ + (n) & (1ULL << 15) ? 15 : \ + (n) & (1ULL << 14) ? 14 : \ + (n) & (1ULL << 13) ? 13 : \ + (n) & (1ULL << 12) ? 12 : \ + (n) & (1ULL << 11) ? 11 : \ + (n) & (1ULL << 10) ? 10 : \ + (n) & (1ULL << 9) ? 9 : \ + (n) & (1ULL << 8) ? 8 : \ + (n) & (1ULL << 7) ? 7 : \ + (n) & (1ULL << 6) ? 6 : \ + (n) & (1ULL << 5) ? 5 : \ + (n) & (1ULL << 4) ? 4 : \ + (n) & (1ULL << 3) ? 3 : \ + (n) & (1ULL << 2) ? 2 : \ + 1) : \ + -1) + +/** + * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value + * @n: parameter + * + * constant-capable log of base 2 calculation + * - this can be used to initialise global variables from constant data, hence + * the massive ternary operator construction + * + * selects the appropriately-sized optimised version depending on sizeof(n) + */ +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? \ + const_ilog2(n) : \ + (sizeof(n) <= 4) ? \ + __ilog2_u32(n) : \ + __ilog2_u64(n) \ + ) + +/** + * roundup_pow_of_two - round the given value up to nearest power of two + * @n: parameter + * + * round the given value up to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define roundup_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 1) ? 1 : \ + (1UL << (ilog2((n) - 1) + 1)) \ + ) : \ + __roundup_pow_of_two(n) \ + ) + +/** + * rounddown_pow_of_two - round the given value down to nearest power of two + * @n: parameter + * + * round the given value down to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define rounddown_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (1UL << ilog2(n))) : \ + __rounddown_pow_of_two(n) \ + ) + +static inline __attribute_const__ +int __order_base_2(unsigned long n) +{ + return n > 1 ? ilog2(n - 1) + 1 : 0; +} + +/** + * order_base_2 - calculate the (rounded up) base 2 order of the argument + * @n: parameter + * + * The first few values calculated by this routine: + * ob2(0) = 0 + * ob2(1) = 0 + * ob2(2) = 1 + * ob2(3) = 2 + * ob2(4) = 2 + * ob2(5) = 3 + * ... and so on. + */ +#define order_base_2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0 || (n) == 1) ? 0 : \ + ilog2((n) - 1) + 1) : \ + __order_base_2(n) \ +) + +static inline __attribute__((const)) +int __bits_per(unsigned long n) +{ + if (n < 2) + return 1; + if (is_power_of_2(n)) + return order_base_2(n) + 1; + return order_base_2(n); +} + +/** + * bits_per - calculate the number of bits required for the argument + * @n: parameter + * + * This is constant-capable and can be used for compile time + * initializations, e.g bitfields. + * + * The first few values calculated by this routine: + * bf(0) = 1 + * bf(1) = 1 + * bf(2) = 2 + * bf(3) = 2 + * bf(4) = 3 + * ... and so on. + */ +#define bits_per(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0 || (n) == 1) \ + ? 1 : ilog2(n) + 1 \ + ) : \ + __bits_per(n) \ +) + +/** + * get_order - Determine the allocation order of a memory size + * @size: The size for which to get the order + * + * Determine the allocation order of a particular sized block of memory. This + * is on a logarithmic scale, where: + * + * 0 -> 2^0 * PAGE_SIZE and below + * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 + * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 + * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 + * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 + * ... + * + * The order returned is used to find the smallest allocation granule required + * to hold an object of the specified size. + * + * The result is undefined if the size is 0. + */ +static inline __attribute_const__ int get_order(unsigned long size) +{ + if (__builtin_constant_p(size)) { + if (!size) + return BITS_PER_LONG - PAGE_SHIFT; + + if (size < (1UL << PAGE_SHIFT)) + return 0; + + return ilog2((size) - 1) - PAGE_SHIFT + 1; + } + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + return fls(size); +#else + return fls64(size); +#endif +} + +#endif /* _LINUX_LOG2_H */ diff --git a/c_src/include/linux/lz4.h b/c_src/include/linux/lz4.h new file mode 100644 index 00000000..f574964a --- /dev/null +++ b/c_src/include/linux/lz4.h @@ -0,0 +1,10 @@ +#include <lz4.h> + +#define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \ + LZ4_compress_destSize(src, dst, srclen, dstlen) + +#define LZ4_compress_HC(src, dst, srclen, dstlen, level, workspace) -1 + +#define LZ4_MEM_COMPRESS 0 +#define LZ4HC_MEM_COMPRESS 0 +#define LZ4HC_MIN_CLEVEL 0 diff --git a/c_src/include/linux/math.h b/c_src/include/linux/math.h new file mode 100644 index 00000000..85c8c8aa --- /dev/null +++ b/c_src/include/linux/math.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MATH_H +#define _LINUX_MATH_H + +#include <linux/kernel.h> + +/* abs() */ +#include <stdlib.h> + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) + +/** + * round_up - round up to next specified power of 2 + * @x: the value to round + * @y: multiple to round up to (must be a power of 2) + * + * Rounds @x up to next multiple of @y (which must be a power of 2). + * To perform arbitrary rounding up, use roundup() below. + */ +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) + +/** + * round_down - round down to next specified power of 2 + * @x: the value to round + * @y: multiple to round down to (must be a power of 2) + * + * Rounds @x down to next multiple of @y (which must be a power of 2). + * To perform arbitrary rounding down, use rounddown() below. + */ +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define DIV_ROUND_DOWN_ULL(ll, d) \ + ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_UP_ULL(ll, d) \ + DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) + +#if BITS_PER_LONG == 32 +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) +#else +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) +#endif + +/** + * roundup - round up to the next specified multiple + * @x: the value to up + * @y: multiple to round up to + * + * Rounds @x up to next multiple of @y. If @y will always be a power + * of 2, consider using the faster round_up(). + */ +#define roundup(x, y) ( \ +{ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +/** + * rounddown - round down to next specified multiple + * @x: the value to round + * @y: multiple to round down to + * + * Rounds @x down to next multiple of @y. If @y will always be a power + * of 2, consider using the faster round_down(). + */ +#define rounddown(x, y) ( \ +{ \ + typeof(x) __x = (x); \ + __x - (__x % (y)); \ +} \ +) + +/* + * Divide positive or negative dividend by positive or negative divisor + * and round to closest integer. Result is undefined for negative + * divisors if the dividend variable type is unsigned and for negative + * dividends if the divisor variable type is unsigned. + */ +#define DIV_ROUND_CLOSEST(x, divisor)( \ +{ \ + typeof(x) __x = x; \ + typeof(divisor) __d = divisor; \ + (((typeof(x))-1) > 0 || \ + ((typeof(divisor))-1) > 0 || \ + (((__x) > 0) == ((__d) > 0))) ? \ + (((__x) + ((__d) / 2)) / (__d)) : \ + (((__x) - ((__d) / 2)) / (__d)); \ +} \ +) +/* + * Same as above but for u64 dividends. divisor must be a 32-bit + * number. + */ +#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ +{ \ + typeof(divisor) __d = divisor; \ + unsigned long long _tmp = (x) + (__d) / 2; \ + do_div(_tmp, __d); \ + _tmp; \ +} \ +) + +/* + * Multiplies an integer by a fraction, while avoiding unnecessary + * overflow or loss of precision. + */ +#define mult_frac(x, numer, denom)( \ +{ \ + typeof(x) quot = (x) / (denom); \ + typeof(x) rem = (x) % (denom); \ + (quot * (numer)) + ((rem * (numer)) / (denom)); \ +} \ +) + +#define sector_div(a, b) do_div(a, b) + +/** + * reciprocal_scale - "scale" a value into range [0, ep_ro) + * @val: value + * @ep_ro: right open interval endpoint + * + * Perform a "reciprocal multiplication" in order to "scale" a value into + * range [0, @ep_ro), where the upper interval endpoint is right-open. + * This is useful, e.g. for accessing a index of an array containing + * @ep_ro elements, for example. Think of it as sort of modulus, only that + * the result isn't that of modulo. ;) Note that if initial input is a + * small value, then result will return 0. + * + * Return: a result based on @val in interval [0, @ep_ro). + */ +static inline u32 reciprocal_scale(u32 val, u32 ep_ro) +{ + return (u32)(((u64) val * ep_ro) >> 32); +} + +u64 int_pow(u64 base, unsigned int exp); +unsigned long int_sqrt(unsigned long); + +#if BITS_PER_LONG < 64 +u32 int_sqrt64(u64 x); +#else +static inline u32 int_sqrt64(u64 x) +{ + return (u32)int_sqrt(x); +} +#endif + +#define abs(x) __abs_choose_expr(x, long long, \ + __abs_choose_expr(x, long, \ + __abs_choose_expr(x, int, \ + __abs_choose_expr(x, short, \ + __abs_choose_expr(x, char, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), char), \ + (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ + ((void)0))))))) + +#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), signed type) || \ + __builtin_types_compatible_p(typeof(x), unsigned type), \ + ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) + +#endif /* _LINUX_MATH_H */ diff --git a/c_src/include/linux/math64.h b/c_src/include/linux/math64.h new file mode 100644 index 00000000..5eb6f064 --- /dev/null +++ b/c_src/include/linux/math64.h @@ -0,0 +1,85 @@ +#ifndef _LINUX_MATH64_H +#define _LINUX_MATH64_H + +#include <linux/types.h> + +#define do_div(n,base) ({ \ + u32 __base = (base); \ + u32 __rem; \ + __rem = ((u64)(n)) % __base; \ + (n) = ((u64)(n)) / __base; \ + __rem; \ + }) + +#define div64_long(x, y) div64_s64((x), (y)) +#define div64_ul(x, y) div64_u64((x), (y)) + +/** + * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder + * + * This is commonly provided by 32bit archs to provide an optimized 64bit + * divide. + */ +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** + * div_s64_rem - signed 64bit divide with 32bit divisor with remainder + */ +static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder + */ +static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** + * div64_u64 - unsigned 64bit divide with 64bit divisor + */ +static inline u64 div64_u64(u64 dividend, u64 divisor) +{ + return dividend / divisor; +} + +/** + * div64_s64 - signed 64bit divide with 64bit divisor + */ +static inline s64 div64_s64(s64 dividend, s64 divisor) +{ + return dividend / divisor; +} + +/** + * div_u64 - unsigned 64bit divide with 32bit divisor + * + * This is the most common 64bit divide and should be used if possible, + * as many 32bit archs can optimize this variant better than a full 64bit + * divide. + */ +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + u32 remainder; + return div_u64_rem(dividend, divisor, &remainder); +} + +/** + * div_s64 - signed 64bit divide with 32bit divisor + */ +static inline s64 div_s64(s64 dividend, s32 divisor) +{ + s32 remainder; + return div_s64_rem(dividend, divisor, &remainder); +} + +#endif /* _LINUX_MATH64_H */ diff --git a/c_src/include/linux/mempool.h b/c_src/include/linux/mempool.h new file mode 100644 index 00000000..506da24d --- /dev/null +++ b/c_src/include/linux/mempool.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * memory buffer pool support + */ +#ifndef _LINUX_MEMPOOL_H +#define _LINUX_MEMPOOL_H + +#include <linux/wait.h> +#include <linux/compiler.h> +#include <linux/slab.h> + +struct kmem_cache; + +typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); +typedef void (mempool_free_t)(void *element, void *pool_data); + +typedef struct mempool_s { + spinlock_t lock; + int min_nr; /* nr of elements at *elements */ + int curr_nr; /* Current nr of elements at *elements */ + void **elements; + + void *pool_data; + mempool_alloc_t *alloc; + mempool_free_t *free; + wait_queue_head_t wait; +} mempool_t; + +static inline bool mempool_initialized(mempool_t *pool) +{ + return pool->elements != NULL; +} + +void mempool_exit(mempool_t *pool); +int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int node_id); +int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data); + +extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data); +extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int nid); + +extern int mempool_resize(mempool_t *pool, int new_min_nr); +extern void mempool_destroy(mempool_t *pool); +extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; +extern void mempool_free(void *element, mempool_t *pool); + +/* + * A mempool_alloc_t and mempool_free_t that get the memory from + * a slab cache that is passed in through pool_data. + * Note: the slab cache may not have a ctor function. + */ +void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); +void mempool_free_slab(void *element, void *pool_data); + +static inline int +mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) +{ + return mempool_init(pool, min_nr, mempool_alloc_slab, + mempool_free_slab, (void *) kc); +} + +static inline mempool_t * +mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) +{ + return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, + (void *) kc); +} + +/* + * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the + * amount of memory specified by pool_data + */ +void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); +void mempool_kfree(void *element, void *pool_data); + +static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) +{ + return mempool_init(pool, min_nr, mempool_kmalloc, + mempool_kfree, (void *) size); +} + +static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) +{ + return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, + (void *) size); +} + +/* + * A mempool_alloc_t and mempool_free_t for a simple page allocator that + * allocates pages of the order specified by pool_data + */ +void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); +void mempool_free_pages(void *element, void *pool_data); + +static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) +{ + return mempool_init(pool, min_nr, mempool_alloc_pages, + mempool_free_pages, (void *)(long)order); +} + +static inline mempool_t *mempool_create_page_pool(int min_nr, int order) +{ + return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, + (void *)(long)order); +} + +#endif /* _LINUX_MEMPOOL_H */ diff --git a/c_src/include/linux/minmax.h b/c_src/include/linux/minmax.h new file mode 100644 index 00000000..ddc15bf7 --- /dev/null +++ b/c_src/include/linux/minmax.h @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MINMAX_H +#define _LINUX_MINMAX_H + +#include <linux/compiler.h> +#include <linux/const.h> +#include <linux/types.h> + +/* + * min()/max()/clamp() macros must accomplish three things: + * + * - Avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - Retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + * - Perform signed v unsigned type-checking (to generate compile + * errors instead of nasty runtime surprises). + * - Unsigned char/short are always promoted to signed int and can be + * compared against signed or unsigned arguments. + * - Unsigned arguments can be compared against non-negative signed constants. + * - Comparison of a signed argument against an unsigned constant fails + * even if the constant is below __INT_MAX__ and could be cast to int. + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +/* is_signed_type() isn't a constexpr for pointer types */ +#define __is_signed(x) \ + __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ + is_signed_type(typeof(x)), 0) + +/* True for a non-negative signed int constant */ +#define __is_noneg_int(x) \ + (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) + +#define __types_ok(x, y) \ + (__is_signed(x) == __is_signed(y) || \ + __is_signed((x) + 0) == __is_signed((y) + 0) || \ + __is_noneg_int(x) || __is_noneg_int(y)) + +#define __cmp_op_min < +#define __cmp_op_max > + +#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) + +#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + static_assert(__types_ok(x, y), \ + #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + __cmp(op, unique_x, unique_y); }) + +#define __careful_cmp(op, x, y) \ + __builtin_choose_expr(__is_constexpr((x) - (y)), \ + __cmp(op, x, y), \ + __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) + +#define __clamp(val, lo, hi) \ + ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) + +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ + (lo) <= (hi), true), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ + static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ + __clamp(unique_val, unique_lo, unique_hi); }) + +#define __careful_clamp(val, lo, hi) ({ \ + __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ + __clamp(val, lo, hi), \ + __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ + __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define min(x, y) __careful_cmp(min, x, y) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define max(x, y) __careful_cmp(max, x, y) + +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) + +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define max3(x, y, z) max((typeof(x))max(x, y), z) + +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @lo: lowest allowable value + * @hi: highest allowable value + * + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. + */ +#define clamp(val, lo, hi) __careful_clamp(val, lo, hi) + +/* + * ..and if you can't take the strict + * types, you can specify one yourself. + * + * Or not use min/max/clamp at all, of course. + */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) + +/* + * Do not check the array parameter using __must_be_array(). + * In the following legit use-case where the "array" passed is a simple pointer, + * __must_be_array() will return a failure. + * --- 8< --- + * int *buff + * ... + * min = min_array(buff, nb_items); + * --- 8< --- + * + * The first typeof(&(array)[0]) is needed in order to support arrays of both + * 'int *buff' and 'int buff[N]' types. + * + * The array can be an array of const items. + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order + * to discard the const qualifier for the __element variable. + */ +#define __minmax_array(op, array, len) ({ \ + typeof(&(array)[0]) __array = (array); \ + typeof(len) __len = (len); \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ + while (__len--) \ + __element = op(__element, __array[__len]); \ + __element; }) + +/** + * min_array - return minimum of values present in an array + * @array: array + * @len: array length + * + * Note that @len must not be zero (empty array). + */ +#define min_array(array, len) __minmax_array(min, array, len) + +/** + * max_array - return maximum of values present in an array + * @array: array + * @len: array length + * + * Note that @len must not be zero (empty array). + */ +#define max_array(array, len) __minmax_array(max, array, len) + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. + */ +#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi)) + +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) + +static inline bool in_range64(u64 val, u64 start, u64 len) +{ + return (val - start) < len; +} + +static inline bool in_range32(u32 val, u32 start, u32 len) +{ + return (val - start) < len; +} + +/** + * in_range - Determine if a value lies within a range. + * @val: Value to test. + * @start: First value in range. + * @len: Number of values in range. + * + * This is more efficient than "if (start <= val && val < (start + len))". + * It also gives a different answer if @start + @len overflows the size of + * the type by a sufficient amount to encompass @val. Decide for yourself + * which behaviour you want, or prove that start + len never overflow. + * Do not blindly replace one form with the other. + */ +#define in_range(val, start, len) \ + ((sizeof(start) | sizeof(len) | sizeof(val)) <= sizeof(u32) ? \ + in_range32(val, start, len) : in_range64(val, start, len)) + +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#endif /* _LINUX_MINMAX_H */ diff --git a/c_src/include/linux/mm.h b/c_src/include/linux/mm.h new file mode 100644 index 00000000..744a14ce --- /dev/null +++ b/c_src/include/linux/mm.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MM_H +#define _TOOLS_LINUX_MM_H + +#include <sys/syscall.h> +#include <linux/types.h> + +struct sysinfo { + long uptime; /* Seconds since boot */ + unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ + unsigned long totalram; /* Total usable main memory size */ + unsigned long freeram; /* Available memory size */ + unsigned long sharedram; /* Amount of shared memory */ + unsigned long bufferram; /* Memory used by buffers */ + unsigned long totalswap; /* Total swap space size */ + unsigned long freeswap; /* swap space still available */ + __u16 procs; /* Number of current processes */ + __u16 pad; /* Explicit padding for m68k */ + unsigned long totalhigh; /* Total high memory size */ + unsigned long freehigh; /* Available high memory size */ + __u32 mem_unit; /* Memory unit size in bytes */ +}; + + + +static inline void si_meminfo(struct sysinfo *val) +{ + BUG_ON(syscall(SYS_sysinfo, val)); +} + +#endif /* _TOOLS_LINUX_MM_H */ diff --git a/c_src/include/linux/module.h b/c_src/include/linux/module.h new file mode 100644 index 00000000..42d4e18a --- /dev/null +++ b/c_src/include/linux/module.h @@ -0,0 +1,48 @@ +#ifndef _LINUX_MODULE_H +#define _LINUX_MODULE_H + +#include <linux/stat.h> +#include <linux/compiler.h> +#include <linux/export.h> + +struct module; + +#define module_init(initfn) \ + __attribute__((constructor(120))) \ + static void __call_##initfn(void) { BUG_ON(initfn()); } + +#if 0 +#define module_exit(exitfn) \ + __attribute__((destructor(109))) \ + static void __call_##exitfn(void) { exitfn(); } +#endif + +#define module_exit(exitfn) \ + __attribute__((unused)) \ + static void __call_##exitfn(void) { exitfn(); } + +#define MODULE_INFO(tag, info) +#define MODULE_ALIAS(_alias) +#define MODULE_SOFTDEP(_softdep) +#define MODULE_LICENSE(_license) +#define MODULE_AUTHOR(_author) +#define MODULE_DESCRIPTION(_description) +#define MODULE_VERSION(_version) + +static inline void __module_get(struct module *module) +{ +} + +static inline int try_module_get(struct module *module) +{ + return 1; +} + +static inline void module_put(struct module *module) +{ +} + +#define module_param_named(name, value, type, perm) +#define MODULE_PARM_DESC(_parm, desc) + +#endif /* _LINUX_MODULE_H */ diff --git a/c_src/include/linux/mutex.h b/c_src/include/linux/mutex.h new file mode 100644 index 00000000..801f06e1 --- /dev/null +++ b/c_src/include/linux/mutex.h @@ -0,0 +1,18 @@ +#ifndef __TOOLS_LINUX_MUTEX_H +#define __TOOLS_LINUX_MUTEX_H + +#include <pthread.h> + +struct mutex { + pthread_mutex_t lock; +}; + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { .lock = PTHREAD_MUTEX_INITIALIZER } + +#define mutex_init(l) pthread_mutex_init(&(l)->lock, NULL) +#define mutex_lock(l) pthread_mutex_lock(&(l)->lock) +#define mutex_trylock(l) (!pthread_mutex_trylock(&(l)->lock)) +#define mutex_unlock(l) pthread_mutex_unlock(&(l)->lock) + +#endif /* __TOOLS_LINUX_MUTEX_H */ diff --git a/c_src/include/linux/osq_lock.h b/c_src/include/linux/osq_lock.h new file mode 100644 index 00000000..bde9f0d2 --- /dev/null +++ b/c_src/include/linux/osq_lock.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_OSQ_LOCK_H +#define __LINUX_OSQ_LOCK_H + +/* + * An MCS like lock especially tailored for optimistic spinning for sleeping + * lock implementations (mutex, rwsem, etc). + */ +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; + int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # + 1 value */ +}; + +struct optimistic_spin_queue { + /* + * Stores an encoded value of the CPU # of the tail node in the queue. + * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL. + */ + atomic_t tail; +}; + +#define OSQ_UNLOCKED_VAL (0) + +/* Init macro and function. */ +#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } + +static inline void osq_lock_init(struct optimistic_spin_queue *lock) +{ + atomic_set(&lock->tail, OSQ_UNLOCKED_VAL); +} + +static inline bool osq_lock(struct optimistic_spin_queue *lock) +{ + return false; +} + +static inline void osq_unlock(struct optimistic_spin_queue *lock) {} + +static inline bool osq_is_locked(struct optimistic_spin_queue *lock) +{ + return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL; +} + +#endif diff --git a/c_src/include/linux/overflow.h b/c_src/include/linux/overflow.h new file mode 100644 index 00000000..ba30f77e --- /dev/null +++ b/c_src/include/linux/overflow.h @@ -0,0 +1,345 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +#ifndef __LINUX_OVERFLOW_H +#define __LINUX_OVERFLOW_H + +#include <linux/compiler.h> +#include <linux/limits.h> + +/* + * In the fallback code below, we need to compute the minimum and + * maximum values representable in a given type. These macros may also + * be useful elsewhere, so we provide them outside the + * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. + * + * It would seem more obvious to do something like + * + * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) + * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) + * + * Unfortunately, the middle expressions, strictly speaking, have + * undefined behaviour, and at least some versions of gcc warn about + * the type_max expression (but not if -fsanitize=undefined is in + * effect; in that case, the warning is deferred to runtime...). + * + * The slightly excessive casting in type_min is to make sure the + * macros also produce sensible values for the exotic type _Bool. [The + * overflow checkers only almost work for _Bool, but that's + * a-feature-not-a-bug, since people shouldn't be doing arithmetic on + * _Bools. Besides, the gcc builtins don't allow _Bool* as third + * argument.] + * + * Idea stolen from + * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - + * credit to Christian Biere. + */ +#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) +#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_min(T) ((T)((T)-type_max(T)-(T)1)) + +/* + * Avoids triggering -Wtype-limits compilation warning, + * while using unsigned data types to check a < 0. + */ +#define is_non_negative(a) ((a) > 0 || (a) == 0) +#define is_negative(a) (!(is_non_negative(a))) + +/* + * Allows for effectively applying __must_check to a macro so we can have + * both the type-agnostic benefits of the macros while also being able to + * enforce that the return value is, in fact, checked. + */ +static inline bool __must_check __must_check_overflow(bool overflow) +{ + return unlikely(overflow); +} + +#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW +/* + * For simplicity and code hygiene, the fallback code below insists on + * a, b and *d having the same type (similar to the min() and max() + * macros), whereas gcc's type-generic overflow checkers accept + * different types. Hence we don't just make check_add_overflow an + * alias for __builtin_add_overflow, but add type checks similar to + * below. + */ +#define check_add_overflow(a, b, d) __must_check_overflow(({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_add_overflow(__a, __b, __d); \ +})) + +#define check_sub_overflow(a, b, d) __must_check_overflow(({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_sub_overflow(__a, __b, __d); \ +})) + +#define check_mul_overflow(a, b, d) __must_check_overflow(({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_mul_overflow(__a, __b, __d); \ +})) + +#else + + +/* Checking for unsigned overflow is relatively easy without causing UB. */ +#define __unsigned_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a + __b; \ + *__d < __a; \ +}) +#define __unsigned_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a - __b; \ + __a < __b; \ +}) +/* + * If one of a or b is a compile-time constant, this avoids a division. + */ +#define __unsigned_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a * __b; \ + __builtin_constant_p(__b) ? \ + __b > 0 && __a > type_max(typeof(__a)) / __b : \ + __a > 0 && __b > type_max(typeof(__b)) / __a; \ +}) + +/* + * For signed types, detecting overflow is much harder, especially if + * we want to avoid UB. But the interface of these macros is such that + * we must provide a result in *d, and in fact we must produce the + * result promised by gcc's builtins, which is simply the possibly + * wrapped-around value. Fortunately, we can just formally do the + * operations in the widest relevant unsigned type (u64) and then + * truncate the result - gcc is smart enough to generate the same code + * with and without the (u64) casts. + */ + +/* + * Adding two signed integers can overflow only if they have the same + * sign, and overflow has happened iff the result has the opposite + * sign. + */ +#define __signed_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a + (u64)__b; \ + (((~(__a ^ __b)) & (*__d ^ __a)) \ + & type_min(typeof(__a))) != 0; \ +}) + +/* + * Subtraction is similar, except that overflow can now happen only + * when the signs are opposite. In this case, overflow has happened if + * the result has the opposite sign of a. + */ +#define __signed_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a - (u64)__b; \ + ((((__a ^ __b)) & (*__d ^ __a)) \ + & type_min(typeof(__a))) != 0; \ +}) + +/* + * Signed multiplication is rather hard. gcc always follows C99, so + * division is truncated towards 0. This means that we can write the + * overflow check like this: + * + * (a > 0 && (b > MAX/a || b < MIN/a)) || + * (a < -1 && (b > MIN/a || b < MAX/a) || + * (a == -1 && b == MIN) + * + * The redundant casts of -1 are to silence an annoying -Wtype-limits + * (included in -Wextra) warning: When the type is u8 or u16, the + * __b_c_e in check_mul_overflow obviously selects + * __unsigned_mul_overflow, but unfortunately gcc still parses this + * code and warns about the limited range of __b. + */ + +#define __signed_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + typeof(a) __tmax = type_max(typeof(a)); \ + typeof(a) __tmin = type_min(typeof(a)); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a * (u64)__b; \ + (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ + (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ + (__b == (typeof(__b))-1 && __a == __tmin); \ +}) + + +#define check_add_overflow(a, b, d) __must_check_overflow( \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_add_overflow(a, b, d), \ + __unsigned_add_overflow(a, b, d))) + +#define check_sub_overflow(a, b, d) __must_check_overflow( \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_sub_overflow(a, b, d), \ + __unsigned_sub_overflow(a, b, d))) + +#define check_mul_overflow(a, b, d) __must_check_overflow( \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_mul_overflow(a, b, d), \ + __unsigned_mul_overflow(a, b, d))) + +#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ + +/** check_shl_overflow() - Calculate a left-shifted value and check overflow + * + * @a: Value to be shifted + * @s: How many bits left to shift + * @d: Pointer to where to store the result + * + * Computes *@d = (@a << @s) + * + * Returns true if '*d' cannot hold the result or when 'a << s' doesn't + * make sense. Example conditions: + * - 'a << s' causes bits to be lost when stored in *d. + * - 's' is garbage (e.g. negative) or so large that the result of + * 'a << s' is guaranteed to be 0. + * - 'a' is negative. + * - 'a << s' sets the sign bit, if any, in '*d'. + * + * '*d' will hold the results of the attempted shift, but is not + * considered "safe for use" if false is returned. + */ +#define check_shl_overflow(a, s, d) __must_check_overflow(({ \ + typeof(a) _a = a; \ + typeof(s) _s = s; \ + typeof(d) _d = d; \ + u64 _a_full = _a; \ + unsigned int _to_shift = \ + is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ + *_d = (_a_full << _to_shift); \ + (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \ + (*_d >> _to_shift) != _a); \ +})) + +/** + * array_size() - Calculate size of 2-dimensional array. + * + * @a: dimension one + * @b: dimension two + * + * Calculates size of 2-dimensional array: @a * @b. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +static inline __must_check size_t array_size(size_t a, size_t b) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** + * array3_size() - Calculate size of 3-dimensional array. + * + * @a: dimension one + * @b: dimension two + * @c: dimension three + * + * Calculates size of 3-dimensional array: @a * @b * @c. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + if (check_mul_overflow(bytes, c, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/* + * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for + * struct_size() below. + */ +static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + if (check_add_overflow(bytes, c, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** + * struct_size() - Calculate size of structure with trailing array. + * @p: Pointer to the structure. + * @member: Name of the array member. + * @count: Number of elements in the array. + * + * Calculates size of memory needed for structure @p followed by an + * array of @count number of @member elements. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size(p, member, count) \ + __ab_c_size(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member),\ + sizeof(*(p))) + +/** + * flex_array_size() - Calculate size of a flexible array member + * within an enclosing structure. + * + * @p: Pointer to the structure. + * @member: Name of the flexible array member. + * @count: Number of elements in the array. + * + * Calculates size of a flexible array of @count number of @member + * elements, at the end of structure @p. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define flex_array_size(p, member, count) \ + array_size(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member)) + +#endif /* __LINUX_OVERFLOW_H */ diff --git a/c_src/include/linux/page.h b/c_src/include/linux/page.h new file mode 100644 index 00000000..111e5e68 --- /dev/null +++ b/c_src/include/linux/page.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_PAGE_H +#define _LINUX_PAGE_H + +#include <sys/user.h> + +struct page; + +#ifndef PAGE_SIZE + +#define PAGE_SIZE 4096UL +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#endif + +#ifndef PAGE_SHIFT +#define PAGE_SHIFT 12 +#endif + + +#define virt_to_page(p) \ + ((struct page *) (((unsigned long) (p)) & PAGE_MASK)) +#define offset_in_page(p) ((unsigned long) (p) & ~PAGE_MASK) + +#define page_address(p) ((void *) (p)) + +#define kmap_atomic(page) page_address(page) +#define kunmap_atomic(addr) do {} while (0) + +#define kmap_local_page(page) page_address(page) +#define kunmap_local(addr) do {} while (0) + +#define PageHighMem(page) false + +static const char zero_page[PAGE_SIZE]; + +#define ZERO_PAGE(o) ((struct page *) &zero_page[0]) + +#endif /* _LINUX_PAGE_H */ diff --git a/c_src/include/linux/percpu-refcount.h b/c_src/include/linux/percpu-refcount.h new file mode 100644 index 00000000..06550564 --- /dev/null +++ b/c_src/include/linux/percpu-refcount.h @@ -0,0 +1,180 @@ +#ifndef __TOOLS_LINUX_PERCPU_REFCOUNT_H +#define __TOOLS_LINUX_PERCPU_REFCOUNT_H + +#include <linux/atomic.h> +#include <linux/kernel.h> +#include <linux/percpu.h> + +struct percpu_ref; +typedef void (percpu_ref_func_t)(struct percpu_ref *); + +/* flags set in the lower bits of percpu_ref->percpu_count_ptr */ +enum { + __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */ + __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */ + __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD, + + __PERCPU_REF_FLAG_BITS = 2, +}; + +/* @flags for percpu_ref_init() */ +enum { + PERCPU_REF_INIT_ATOMIC = 1 << 0, + + /* + * Start dead w/ ref == 0 in atomic mode. Must be revived with + * percpu_ref_reinit() before used. Implies INIT_ATOMIC. + */ + PERCPU_REF_INIT_DEAD = 1 << 1, +}; + +struct percpu_ref { + atomic_long_t count; + percpu_ref_func_t *release; + percpu_ref_func_t *confirm_switch; +}; + +static inline void percpu_ref_exit(struct percpu_ref *ref) {} + +static inline int __must_check percpu_ref_init(struct percpu_ref *ref, + percpu_ref_func_t *release, unsigned int flags, + gfp_t gfp) +{ + unsigned long start_count = 0; + + if (!(flags & PERCPU_REF_INIT_DEAD)) + start_count++; + + atomic_long_set(&ref->count, start_count); + + ref->release = release; + return 0; +} + +/** + * percpu_ref_get_many - increment a percpu refcount + * @ref: percpu_ref to get + * @nr: number of references to get + * + * Analogous to atomic_long_add(). + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) +{ + atomic_long_add(nr, &ref->count); +} + +/** + * percpu_ref_get - increment a percpu refcount + * @ref: percpu_ref to get + * + * Analagous to atomic_long_inc(). + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_get(struct percpu_ref *ref) +{ + percpu_ref_get_many(ref, 1); +} + +/** + * percpu_ref_tryget - try to increment a percpu refcount + * @ref: percpu_ref to try-get + * + * Increment a percpu refcount unless its count already reached zero. + * Returns %true on success; %false on failure. + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_tryget(struct percpu_ref *ref) +{ + return atomic_long_inc_not_zero(&ref->count); +} + +/** + * percpu_ref_tryget_live - try to increment a live percpu refcount + * @ref: percpu_ref to try-get + * + * Increment a percpu refcount unless it has already been killed. Returns + * %true on success; %false on failure. + * + * Completion of percpu_ref_kill() in itself doesn't guarantee that this + * function will fail. For such guarantee, percpu_ref_kill_and_confirm() + * should be used. After the confirm_kill callback is invoked, it's + * guaranteed that no new reference will be given out by + * percpu_ref_tryget_live(). + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) +{ + return atomic_long_inc_not_zero(&ref->count); +} + +/** + * percpu_ref_put_many - decrement a percpu refcount + * @ref: percpu_ref to put + * @nr: number of references to put + * + * Decrement the refcount, and if 0, call the release function (which was passed + * to percpu_ref_init()) + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) +{ + if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) + ref->release(ref); +} + +/** + * percpu_ref_put - decrement a percpu refcount + * @ref: percpu_ref to put + * + * Decrement the refcount, and if 0, call the release function (which was passed + * to percpu_ref_init()) + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_put(struct percpu_ref *ref) +{ + percpu_ref_put_many(ref, 1); +} + +static inline void percpu_ref_reinit(struct percpu_ref *ref) +{ + percpu_ref_get(ref); +} + +/** + * percpu_ref_kill - drop the initial ref + * @ref: percpu_ref to kill + * + * Must be used to drop the initial ref on a percpu refcount; must be called + * precisely once before shutdown. + */ +static inline void percpu_ref_kill(struct percpu_ref *ref) +{ + percpu_ref_put(ref); +} + +/** + * percpu_ref_is_zero - test whether a percpu refcount reached zero + * @ref: percpu_ref to test + * + * Returns %true if @ref reached zero. + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_is_zero(struct percpu_ref *ref) +{ + return !atomic_long_read(&ref->count); +} + +static inline bool percpu_ref_is_dying(struct percpu_ref *ref) +{ + return percpu_ref_is_zero(ref); +} + +#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */ diff --git a/c_src/include/linux/percpu-rwsem.h b/c_src/include/linux/percpu-rwsem.h new file mode 100644 index 00000000..153251c0 --- /dev/null +++ b/c_src/include/linux/percpu-rwsem.h @@ -0,0 +1,58 @@ + +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PERCPU_RWSEM_H +#define _LINUX_PERCPU_RWSEM_H + +#include <pthread.h> +#include <linux/preempt.h> + +struct percpu_rw_semaphore { + pthread_mutex_t lock; +}; + +static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_lock(&sem->lock); +} + +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_lock(&sem->lock); +} + +static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) +{ + return !pthread_mutex_trylock(&sem->lock); +} + +static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_unlock(&sem->lock); +} + +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_unlock(&sem->lock); +} + +static inline void percpu_down_write(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_lock(&sem->lock); +} + +static inline void percpu_up_write(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_unlock(&sem->lock); +} + +static inline void percpu_free_rwsem(struct percpu_rw_semaphore *sem) {} + +static inline int percpu_init_rwsem(struct percpu_rw_semaphore *sem) +{ + pthread_mutex_init(&sem->lock, NULL); + return 0; +} + +#define percpu_rwsem_assert_held(sem) do {} while (0) + +#endif diff --git a/c_src/include/linux/percpu.h b/c_src/include/linux/percpu.h new file mode 100644 index 00000000..740d8332 --- /dev/null +++ b/c_src/include/linux/percpu.h @@ -0,0 +1,191 @@ +#ifndef __TOOLS_LINUX_PERCPU_H +#define __TOOLS_LINUX_PERCPU_H + +#include <linux/cpumask.h> + +#define __percpu + +#define free_percpu(percpu) free(percpu) + +#define __alloc_percpu_gfp(size, align, gfp) calloc(1, size) +#define __alloc_percpu(size, align) calloc(1, size) + +#define alloc_percpu_gfp(type, gfp) \ + (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \ + __alignof__(type), gfp) +#define alloc_percpu(type) \ + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) + +#define __verify_pcpu_ptr(ptr) + +#define per_cpu_ptr(ptr, cpu) (ptr) +#define raw_cpu_ptr(ptr) (ptr) +#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) + +#define __pcpu_size_call_return(stem, variable) \ +({ \ + typeof(variable) pscr_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr_ret__ = stem##1(variable); break; \ + case 2: pscr_ret__ = stem##2(variable); break; \ + case 4: pscr_ret__ = stem##4(variable); break; \ + case 8: pscr_ret__ = stem##8(variable); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pscr_ret__; \ +}) + +#define __pcpu_size_call_return2(stem, variable, ...) \ +({ \ + typeof(variable) pscr2_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ + case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ + case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ + case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pscr2_ret__; \ +}) + +/* + * Special handling for cmpxchg_double. cmpxchg_double is passed two + * percpu variables. The first has to be aligned to a double word + * boundary and the second has to follow directly thereafter. + * We enforce this on all architectures even if they don't support + * a double cmpxchg instruction, since it's a cheap requirement, and it + * avoids breaking the requirement for architectures with the instruction. + */ +#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ +({ \ + bool pdcrb_ret__; \ + __verify_pcpu_ptr(&(pcp1)); \ + BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ + VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \ + VM_BUG_ON((unsigned long)(&(pcp2)) != \ + (unsigned long)(&(pcp1)) + sizeof(pcp1)); \ + switch(sizeof(pcp1)) { \ + case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ + case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ + case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ + case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pdcrb_ret__; \ +}) + +#define __pcpu_size_call(stem, variable, ...) \ +do { \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: stem##1(variable, __VA_ARGS__);break; \ + case 2: stem##2(variable, __VA_ARGS__);break; \ + case 4: stem##4(variable, __VA_ARGS__);break; \ + case 8: stem##8(variable, __VA_ARGS__);break; \ + default: \ + __bad_size_call_parameter();break; \ + } \ +} while (0) + +#define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp) +#define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val) +#define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, pcp, val) +#define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, pcp, val) +#define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, pcp, val) +#define raw_cpu_add_return(pcp, val) __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) +#define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval) +#define raw_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) +#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) + +#define raw_cpu_sub(pcp, val) raw_cpu_add(pcp, -(val)) +#define raw_cpu_inc(pcp) raw_cpu_add(pcp, 1) +#define raw_cpu_dec(pcp) raw_cpu_sub(pcp, 1) +#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) +#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) + +#define __this_cpu_read(pcp) \ +({ \ + raw_cpu_read(pcp); \ +}) + +#define __this_cpu_write(pcp, val) \ +({ \ + raw_cpu_write(pcp, val); \ +}) + +#define __this_cpu_add(pcp, val) \ +({ \ + raw_cpu_add(pcp, val); \ +}) + +#define __this_cpu_and(pcp, val) \ +({ \ + raw_cpu_and(pcp, val); \ +}) + +#define __this_cpu_or(pcp, val) \ +({ \ + raw_cpu_or(pcp, val); \ +}) + +#define __this_cpu_add_return(pcp, val) \ +({ \ + raw_cpu_add_return(pcp, val); \ +}) + +#define __this_cpu_xchg(pcp, nval) \ +({ \ + raw_cpu_xchg(pcp, nval); \ +}) + +#define __this_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + raw_cpu_cmpxchg(pcp, oval, nval); \ +}) + +#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2); \ +}) + +#define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1) +#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) + +#define this_cpu_read(pcp) ((pcp)) +#define this_cpu_write(pcp, val) ((pcp) = val) +#define this_cpu_add(pcp, val) ((pcp) += val) +#define this_cpu_and(pcp, val) ((pcp) &= val) +#define this_cpu_or(pcp, val) ((pcp) |= val) +#define this_cpu_add_return(pcp, val) ((pcp) += val) +#define this_cpu_xchg(pcp, nval) \ +({ \ + typeof(pcp) _r = (pcp); \ + (pcp) = (nval); \ + _r; \ +}) + +#define this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) +#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) + +#define this_cpu_sub(pcp, val) this_cpu_add(pcp, -(typeof(pcp))(val)) +#define this_cpu_inc(pcp) this_cpu_add(pcp, 1) +#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) +#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) + +#endif /* __TOOLS_LINUX_PERCPU_H */ diff --git a/c_src/include/linux/poison.h b/c_src/include/linux/poison.h new file mode 100644 index 00000000..851a855d --- /dev/null +++ b/c_src/include/linux/poison.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_POISON_H +#define _LINUX_POISON_H + +/********** include/linux/list.h **********/ + +/* + * Architectures might want to move the poison pointer offset + * into some well-recognized area such as 0xdead000000000000, + * that is also not mappable by user-space exploits: + */ +#ifdef CONFIG_ILLEGAL_POINTER_VALUE +# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) +#else +# define POISON_POINTER_DELTA 0 +#endif + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA) + +/********** include/linux/timer.h **********/ +#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) + +/********** mm/page_poison.c **********/ +#define PAGE_POISON 0xaa + +/********** mm/page_alloc.c ************/ + +#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA) + +/********** mm/slab.c **********/ +/* + * Magic nums for obj red zoning. + * Placed in the first word before and the first word after an obj. + */ +#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ +#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ + +#define SLUB_RED_INACTIVE 0xbb +#define SLUB_RED_ACTIVE 0xcc + +/* ...and for poisoning */ +#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ +#define POISON_FREE 0x6b /* for use-after-free poisoning */ +#define POISON_END 0xa5 /* end-byte of poisoning */ + +/********** arch/$ARCH/mm/init.c **********/ +#define POISON_FREE_INITMEM 0xcc + +/********** arch/ia64/hp/common/sba_iommu.c **********/ +/* + * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a + * value of "SBAIOMMU POISON\0" for spill-over poisoning. + */ + +/********** fs/jbd/journal.c **********/ +#define JBD_POISON_FREE 0x5b +#define JBD2_POISON_FREE 0x5c + +/********** drivers/base/dmapool.c **********/ +#define POOL_POISON_FREED 0xa7 /* !inuse */ +#define POOL_POISON_ALLOCATED 0xa9 /* !initted */ + +/********** drivers/atm/ **********/ +#define ATM_POISON_FREE 0x12 +#define ATM_POISON 0xdeadbeef + +/********** kernel/mutexes **********/ +#define MUTEX_DEBUG_INIT 0x11 +#define MUTEX_DEBUG_FREE 0x22 +#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) + +/********** security/ **********/ +#define KEY_DESTROY 0xbd + +/********** net/core/page_pool.c **********/ +#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) + +/********** net/core/skbuff.c **********/ +#define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA)) + +/********** kernel/bpf/ **********/ +#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) + +/********** VFS **********/ +#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) + +#endif diff --git a/c_src/include/linux/posix_acl.h b/c_src/include/linux/posix_acl.h new file mode 100644 index 00000000..1d21bfee --- /dev/null +++ b/c_src/include/linux/posix_acl.h @@ -0,0 +1,49 @@ +/* + File: linux/posix_acl.h + + (C) 2002 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + + +#ifndef __LINUX_POSIX_ACL_H +#define __LINUX_POSIX_ACL_H + +#include <linux/bug.h> +#include <linux/slab.h> +#include <linux/rcupdate.h> + +#define ACL_UNDEFINED_ID (-1) + +/* a_type field in acl_user_posix_entry_t */ +#define ACL_TYPE_ACCESS (0x8000) +#define ACL_TYPE_DEFAULT (0x4000) + +/* e_tag entry in struct posix_acl_entry */ +#define ACL_USER_OBJ (0x01) +#define ACL_USER (0x02) +#define ACL_GROUP_OBJ (0x04) +#define ACL_GROUP (0x08) +#define ACL_MASK (0x10) +#define ACL_OTHER (0x20) + +/* permissions in the e_perm field */ +#define ACL_READ (0x04) +#define ACL_WRITE (0x02) +#define ACL_EXECUTE (0x01) + +struct posix_acl_entry { + short e_tag; + unsigned short e_perm; + union { + uid_t e_uid; + gid_t e_gid; + }; +}; + +struct posix_acl { + struct rcu_head a_rcu; + unsigned int a_count; + struct posix_acl_entry a_entries[0]; +}; + +#endif /* __LINUX_POSIX_ACL_H */ diff --git a/c_src/include/linux/posix_acl_xattr.h b/c_src/include/linux/posix_acl_xattr.h new file mode 100644 index 00000000..a8dad160 --- /dev/null +++ b/c_src/include/linux/posix_acl_xattr.h @@ -0,0 +1,34 @@ +/* + File: linux/posix_acl_xattr.h + + Extended attribute system call representation of Access Control Lists. + + Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org> + Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> + */ +#ifndef _POSIX_ACL_XATTR_H +#define _POSIX_ACL_XATTR_H + +#include <uapi/linux/xattr.h> + +/* Supported ACL a_version fields */ +#define POSIX_ACL_XATTR_VERSION 0x0002 + +/* An undefined entry e_id value */ +#define ACL_UNDEFINED_ID (-1) + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} posix_acl_xattr_entry; + +typedef struct { + __le32 a_version; + posix_acl_xattr_entry a_entries[0]; +} posix_acl_xattr_header; + +extern const struct xattr_handler nop_posix_acl_access; +extern const struct xattr_handler nop_posix_acl_default; + +#endif /* _POSIX_ACL_XATTR_H */ diff --git a/c_src/include/linux/prandom.h b/c_src/include/linux/prandom.h new file mode 100644 index 00000000..9aea22dc --- /dev/null +++ b/c_src/include/linux/prandom.h @@ -0,0 +1,33 @@ +#ifndef _LINUX_PRANDOM_H +#define _LINUX_PRANDOM_H + +#include <linux/random.h> + +static inline void prandom_bytes(void *buf, int nbytes) +{ + return get_random_bytes(buf, nbytes); +} + +#define prandom_type(type) \ +static inline type prandom_##type(void) \ +{ \ + type v; \ + \ + prandom_bytes(&v, sizeof(v)); \ + return v; \ +} + +prandom_type(int); +prandom_type(long); +prandom_type(u32); +prandom_type(u64); +#undef prandom_type + +static inline u32 prandom_u32_max(u32 max) +{ + return prandom_u32() % max; + +} + +#endif /* _LINUX_PRANDOM_H */ + diff --git a/c_src/include/linux/preempt.h b/c_src/include/linux/preempt.h new file mode 100644 index 00000000..dbc7c24d --- /dev/null +++ b/c_src/include/linux/preempt.h @@ -0,0 +1,16 @@ +#ifndef __LINUX_PREEMPT_H +#define __LINUX_PREEMPT_H + +extern void preempt_disable(void); +extern void preempt_enable(void); + +#define sched_preempt_enable_no_resched() preempt_enable() +#define preempt_enable_no_resched() preempt_enable() +#define preempt_check_resched() do { } while (0) + +#define preempt_disable_notrace() preempt_disable() +#define preempt_enable_no_resched_notrace() preempt_enable() +#define preempt_enable_notrace() preempt_enable() +#define preemptible() 0 + +#endif /* __LINUX_PREEMPT_H */ diff --git a/c_src/include/linux/prefetch.h b/c_src/include/linux/prefetch.h new file mode 100644 index 00000000..b14fbe93 --- /dev/null +++ b/c_src/include/linux/prefetch.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_PREFETCH_H +#define _LINUX_PREFETCH_H + +#define prefetch(p) \ + ({ __maybe_unused typeof(p) __var = (p); }) + +#define prefetchw(p) \ + ({ __maybe_unused typeof(p) __var = (p); }) + +#endif /* _LINUX_PREFETCH_H */ diff --git a/c_src/include/linux/pretty-printers.h b/c_src/include/linux/pretty-printers.h new file mode 100644 index 00000000..f39d8edf --- /dev/null +++ b/c_src/include/linux/pretty-printers.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022 Kent Overstreet */ + +#ifndef _LINUX_PRETTY_PRINTERS_H +#define _LINUX_PRETTY_PRINTERS_H + +void prt_string_option(struct printbuf *, const char * const[], size_t); +void prt_bitflags(struct printbuf *, const char * const[], u64); + +#endif /* _LINUX_PRETTY_PRINTERS_H */ diff --git a/c_src/include/linux/printk.h b/c_src/include/linux/printk.h new file mode 100644 index 00000000..cdafb9af --- /dev/null +++ b/c_src/include/linux/printk.h @@ -0,0 +1,207 @@ +#ifndef __TOOLS_LINUX_PRINTK_H +#define __TOOLS_LINUX_PRINTK_H + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#include <linux/compiler.h> +#include <stdarg.h> +#include <stdio.h> + +#define KERN_EMERG "" +#define KERN_ALERT "" +#define KERN_CRIT "" +#define KERN_ERR "" +#define KERN_WARNING "" +#define KERN_NOTICE "" +#define KERN_INFO "" +#define KERN_DEBUG "" +#define KERN_DEFAULT "" +#define KERN_CONT "" +#define KERN_SOH "\001" + +static inline int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int i; + + i = vsnprintf(buf, size, fmt, args); + + if (likely(i < size)) + return i; + if (size != 0) + return size - 1; + return 0; +} + +static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vscnprintf(buf, size, fmt, args); + va_end(args); + + return i; +} + +#define printk(...) printf(__VA_ARGS__) +#define vprintk(...) vprintf(__VA_ARGS__) + +#define no_printk(fmt, ...) \ +({ \ + do { \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ + } while (0); \ + 0; \ +}) + +#define pr_emerg(fmt, ...) \ + printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert(fmt, ...) \ + printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit(fmt, ...) \ + printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err(fmt, ...) \ + printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning(fmt, ...) \ + printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn pr_warning +#define pr_notice(fmt, ...) \ + printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +/* + * Like KERN_CONT, pr_cont() should only be used when continuing + * a line with no newline ('\n') enclosed. Otherwise it defaults + * back to KERN_DEFAULT. + */ +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) + +/* pr_devel() should produce zero code unless DEBUG is defined */ +#ifdef DEBUG +#define pr_devel(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + + +/* If you are writing a driver, please use dev_dbg instead */ +#if defined(CONFIG_DYNAMIC_DEBUG) +#include <linux/dynamic_debug.h> + +/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ +#define pr_debug(fmt, ...) \ + dynamic_pr_debug(fmt, ##__VA_ARGS__) +#elif defined(DEBUG) +#define pr_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_debug(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + +/* + * Print a one-time message (analogous to WARN_ONCE() et al): + */ + +#define printk_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + bool __ret_print_once = !__print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk(fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_print_once); \ +}) +#define printk_deferred_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + bool __ret_print_once = !__print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk_deferred(fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_print_once); \ +}) + +#define pr_emerg_once(fmt, ...) \ + printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert_once(fmt, ...) \ + printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit_once(fmt, ...) \ + printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err_once(fmt, ...) \ + printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn_once(fmt, ...) \ + printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice_once(fmt, ...) \ + printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info_once(fmt, ...) \ + printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_cont_once(fmt, ...) \ + printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__) + +#if defined(DEBUG) +#define pr_devel_once(fmt, ...) \ + printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel_once(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + +/* If you are writing a driver, please use dev_dbg instead */ +#if defined(DEBUG) +#define pr_debug_once(fmt, ...) \ + printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_debug_once(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + +/* + * ratelimited messages with local ratelimit_state, + * no local ratelimit_state used in the !PRINTK case + */ +#define printk_ratelimited(fmt, ...) \ +({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&_rs)) \ + printk(fmt, ##__VA_ARGS__); \ +}) + +#define pr_emerg_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +/* no pr_cont_ratelimited, don't do that... */ + +#if defined(DEBUG) +#define pr_devel_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel_ratelimited(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif +#endif /* __TOOLS_LINUX_PRINTK_H */ diff --git a/c_src/include/linux/random.h b/c_src/include/linux/random.h new file mode 100644 index 00000000..3203d13c --- /dev/null +++ b/c_src/include/linux/random.h @@ -0,0 +1,70 @@ +/* + * include/linux/random.h + * + * Include file for the random number generator. + */ +#ifndef _LINUX_RANDOM_H +#define _LINUX_RANDOM_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <linux/bug.h> +#include <linux/log2.h> + +#ifdef SYS_getrandom +static inline int getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return syscall(SYS_getrandom, buf, buflen, flags); +} +#else +extern int urandom_fd; + +static inline int getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return read(urandom_fd, buf, buflen); +} +#endif + +static inline void get_random_bytes(void *buf, int nbytes) +{ + BUG_ON(getrandom(buf, nbytes, 0) != nbytes); +} + +#define get_random_type(type) \ +static inline type get_random_##type(void) \ +{ \ + type v; \ + \ + get_random_bytes(&v, sizeof(v)); \ + return v; \ +} + +get_random_type(int); +get_random_type(long); +get_random_type(u8); +get_random_type(u16); +get_random_type(u32); +get_random_type(u64); + +static inline u32 get_random_u32_below(u32 ceil) +{ + if (ceil <= 1) + return 0; + for (;;) { + if (ceil <= 1U << 8) { + u32 mult = ceil * get_random_u8(); + if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil)) + return mult >> 8; + } else if (ceil <= 1U << 16) { + u32 mult = ceil * get_random_u16(); + if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil)) + return mult >> 16; + } else { + u64 mult = (u64)ceil * get_random_u32(); + if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil)) + return mult >> 32; + } + } +} + +#endif /* _LINUX_RANDOM_H */ diff --git a/c_src/include/linux/ratelimit.h b/c_src/include/linux/ratelimit.h new file mode 100644 index 00000000..680181d2 --- /dev/null +++ b/c_src/include/linux/ratelimit.h @@ -0,0 +1,109 @@ +#ifndef _LINUX_RATELIMIT_H +#define _LINUX_RATELIMIT_H + +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 + +/* issue num suppressed message on exit */ +#define RATELIMIT_MSG_ON_RELEASE 1 + +struct ratelimit_state { + raw_spinlock_t lock; /* protect the state */ + + int interval; + int burst; + int printed; + int missed; + unsigned long begin; + unsigned long flags; +}; + +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } + +#define RATELIMIT_STATE_INIT_DISABLED \ + RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) + +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = \ + RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + +static inline void ratelimit_state_init(struct ratelimit_state *rs, + int interval, int burst) +{ + memset(rs, 0, sizeof(*rs)); + + raw_spin_lock_init(&rs->lock); + rs->interval = interval; + rs->burst = burst; +} + +static inline void ratelimit_default_init(struct ratelimit_state *rs) +{ + return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); +} + +static inline void ratelimit_state_exit(struct ratelimit_state *rs) +{ + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) + return; + + if (rs->missed) { + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", + current->comm, rs->missed); + rs->missed = 0; + } +} + +static inline void +ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) +{ + rs->flags = flags; +} + +extern struct ratelimit_state printk_ratelimit_state; + +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) + +#ifdef CONFIG_PRINTK + +#define WARN_ON_RATELIMIT(condition, state) \ + WARN_ON((condition) && __ratelimit(state)) + +#define WARN_RATELIMIT(condition, format, ...) \ +({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + int rtn = !!(condition); \ + \ + if (unlikely(rtn && __ratelimit(&_rs))) \ + WARN(rtn, format, ##__VA_ARGS__); \ + \ + rtn; \ +}) + +#else + +#define WARN_ON_RATELIMIT(condition, state) \ + WARN_ON(condition) + +#define WARN_RATELIMIT(condition, format, ...) \ +({ \ + int rtn = WARN(condition, format, ##__VA_ARGS__); \ + rtn; \ +}) + +#endif + +#endif /* _LINUX_RATELIMIT_H */ diff --git a/c_src/include/linux/rculist.h b/c_src/include/linux/rculist.h new file mode 100644 index 00000000..81df4e13 --- /dev/null +++ b/c_src/include/linux/rculist.h @@ -0,0 +1,16 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#include <urcu/rculist.h> + + +#include <urcu/rcuhlist.h> + +#define hlist_add_head_rcu cds_hlist_add_head_rcu +#define hlist_del_rcu cds_hlist_del_rcu + +#define hlist_for_each_rcu cds_hlist_for_each_rcu +#define hlist_for_each_entry_rcu cds_hlist_for_each_entry_rcu_2 + + +#endif diff --git a/c_src/include/linux/rcupdate.h b/c_src/include/linux/rcupdate.h new file mode 100644 index 00000000..f5260270 --- /dev/null +++ b/c_src/include/linux/rcupdate.h @@ -0,0 +1,48 @@ +#ifndef __TOOLS_LINUX_RCUPDATE_H +#define __TOOLS_LINUX_RCUPDATE_H + +#include <urcu.h> +#include <linux/compiler.h> + +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) + +#define rcu_dereference_check(p, c) rcu_dereference(p) +#define rcu_dereference_raw(p) rcu_dereference(p) +#define rcu_dereference_protected(p, c) rcu_dereference(p) +#define rcu_access_pointer(p) READ_ONCE(p) + +#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */ +#define kfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */ +#define kvfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */ + +#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v) + +/* Has the specified rcu_head structure been handed to call_rcu()? */ + +/** + * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu() + * @rhp: The rcu_head structure to initialize. + * + * If you intend to invoke rcu_head_after_call_rcu() to test whether a + * given rcu_head structure has already been passed to call_rcu(), then + * you must also invoke this rcu_head_init() function on it just after + * allocating that structure. Calls to this function must not race with + * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation. + */ +static inline void rcu_head_init(struct rcu_head *rhp) +{ + rhp->func = (void *)~0L; +} + +static inline bool +rcu_head_after_call_rcu(struct rcu_head *rhp, + void (*f)(struct rcu_head *head)) +{ + void (*func)(struct rcu_head *head) = READ_ONCE(rhp->func); + + if (func == f) + return true; + return false; +} + +#endif /* __TOOLS_LINUX_RCUPDATE_H */ diff --git a/c_src/include/linux/refcount.h b/c_src/include/linux/refcount.h new file mode 100644 index 00000000..ddeec986 --- /dev/null +++ b/c_src/include/linux/refcount.h @@ -0,0 +1,352 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * Saturation semantics + * ==================== + * + * refcount_t differs from atomic_t in that the counter saturates at + * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the + * counter and causing 'spurious' use-after-free issues. In order to avoid the + * cost associated with introducing cmpxchg() loops into all of the saturating + * operations, we temporarily allow the counter to take on an unchecked value + * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow + * or overflow has occurred. Although this is racy when multiple threads + * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly + * equidistant from 0 and INT_MAX we minimise the scope for error: + * + * INT_MAX REFCOUNT_SATURATED UINT_MAX + * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff) + * +--------------------------------+----------------+----------------+ + * <---------- bad value! ----------> + * + * (in a signed view of the world, the "bad value" range corresponds to + * a negative counter value). + * + * As an example, consider a refcount_inc() operation that causes the counter + * to overflow: + * + * int old = atomic_fetch_add_relaxed(r); + * // old is INT_MAX, refcount now INT_MIN (0x8000_0000) + * if (old < 0) + * atomic_set(r, REFCOUNT_SATURATED); + * + * If another thread also performs a refcount_inc() operation between the two + * atomic operations, then the count will continue to edge closer to 0. If it + * reaches a value of 1 before /any/ of the threads reset it to the saturated + * value, then a concurrent refcount_dec_and_test() may erroneously free the + * underlying object. + * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently + * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK). + * With the current PID limit, if no batched refcounting operations are used and + * the attacker can't repeatedly trigger kernel oopses in the middle of refcount + * operations, this makes it impossible for a saturated refcount to leave the + * saturation range, even if it is possible for multiple uses of the same + * refcount to nest in the context of a single task: + * + * (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT = + * 0x40000000 / 0x400000 = 0x100 = 256 + * + * If hundreds of references are added/removed with a single refcounting + * operation, it may potentially be possible to leave the saturation range; but + * given the precise timing details involved with the round-robin scheduling of + * each thread manipulating the refcount and the need to hit the race multiple + * times in succession, there doesn't appear to be a practical avenue of attack + * even if using refcount_add() operations with larger increments. + * + * Memory ordering + * =============== + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + * The decrements dec_and_test() and sub_and_test() also provide acquire + * ordering on success. + * + */ + +#ifndef _LINUX_REFCOUNT_H +#define _LINUX_REFCOUNT_H + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/limits.h> + +struct mutex; + +/** + * typedef refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * + * The counter saturates at REFCOUNT_SATURATED and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) + +enum refcount_saturation_type { + REFCOUNT_ADD_NOT_ZERO_OVF, + REFCOUNT_ADD_OVF, + REFCOUNT_ADD_UAF, + REFCOUNT_SUB_UAF, + REFCOUNT_DEC_LEAK, +}; + +/** + * refcount_set - set a refcount's value + * @r: the refcount + * @n: value to which the refcount will be set + */ +static inline void refcount_set(refcount_t *r, int n) +{ + atomic_set(&r->refs, n); +} + +/** + * refcount_read - get a refcount's value + * @r: the refcount + * + * Return: the refcount's value + */ +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) +{ + int old = refcount_read(r); + + do { + if (!old) + break; + } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i)); + + if (oldp) + *oldp = old; + + return old; +} + +/** + * refcount_add_not_zero - add a value to a refcount unless it is 0 + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + return __refcount_add_not_zero(i, r, NULL); +} + +static inline void __refcount_add(int i, refcount_t *r, int *oldp) +{ + int old = atomic_add_return(i, &r->refs); + + if (oldp) + *oldp = old; +} + +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ +static inline void refcount_add(int i, refcount_t *r) +{ + __refcount_add(i, r, NULL); +} + +static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero(1, r, oldp); +} + +/** + * refcount_inc_not_zero - increment a refcount unless it is 0 + * @r: the refcount to increment + * + * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED + * and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ +static inline __must_check bool refcount_inc_not_zero(refcount_t *r) +{ + return __refcount_inc_not_zero(r, NULL); +} + +static inline void __refcount_inc(refcount_t *r, int *oldp) +{ + __refcount_add(1, r, oldp); +} + +/** + * refcount_inc - increment a refcount + * @r: the refcount to increment + * + * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object. + * + * Will WARN if the refcount is 0, as this represents a possible use-after-free + * condition. + */ +static inline void refcount_inc(refcount_t *r) +{ + __refcount_inc(r, NULL); +} + +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +{ + int old = atomic_sub_return_release(i, &r->refs); + + if (oldp) + *oldp = old; + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + return false; +} + +/** + * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * @i: amount to subtract from the refcount + * @r: the refcount + * + * Similar to atomic_dec_and_test(), but it will WARN, return false and + * ultimately leak on underflow and will fail to decrement when saturated + * at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_dec(), or one of its variants, should instead be used to + * decrement a reference count. + * + * Return: true if the resulting refcount is 0, false otherwise + */ +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +{ + return __refcount_sub_and_test(i, r, NULL); +} + +static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) +{ + return __refcount_sub_and_test(1, r, oldp); +} + +/** + * refcount_dec_and_test - decrement a refcount and test if it is 0 + * @r: the refcount + * + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Return: true if the resulting refcount is 0, false otherwise + */ +static inline __must_check bool refcount_dec_and_test(refcount_t *r) +{ + return __refcount_dec_and_test(r, NULL); +} + +static inline void __refcount_dec(refcount_t *r, int *oldp) +{ + int old = atomic_sub_return_release(1, &r->refs); + + if (oldp) + *oldp = old; +} + +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ +static inline void refcount_dec(refcount_t *r) +{ + __refcount_dec(r, NULL); +} + +extern __must_check bool refcount_dec_if_one(refcount_t *r); +extern __must_check bool refcount_dec_not_one(refcount_t *r); +extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock); +extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock); +extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, + spinlock_t *lock, + unsigned long *flags) __cond_acquires(lock); +#endif /* _LINUX_REFCOUNT_H */ diff --git a/c_src/include/linux/rhashtable-types.h b/c_src/include/linux/rhashtable-types.h new file mode 100644 index 00000000..57467cbf --- /dev/null +++ b/c_src/include/linux/rhashtable-types.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Simple structures that might be needed in include + * files. + */ + +#ifndef _LINUX_RHASHTABLE_TYPES_H +#define _LINUX_RHASHTABLE_TYPES_H + +#include <linux/atomic.h> +#include <linux/compiler.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> + +struct rhash_head { + struct rhash_head __rcu *next; +}; + +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + +struct bucket_table; + +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + +typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); + +/** + * struct rhashtable_params - Hash table construction parameters + * @nelem_hint: Hint on number of elements, should be 75% of desired size + * @key_len: Length of key + * @key_offset: Offset of key in struct to be hashed + * @head_offset: Offset of rhash_head in struct to be hashed + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking + * @automatic_shrinking: Enable automatic shrinking of tables + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) + * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object + */ +struct rhashtable_params { + u16 nelem_hint; + u16 key_len; + u16 key_offset; + u16 head_offset; + unsigned int max_size; + u16 min_size; + bool automatic_shrinking; + rht_hashfn_t hashfn; + rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; +}; + +/** + * struct rhashtable - Hash table handle + * @tbl: Bucket table + * @key_len: Key length for hashfn + * @max_elems: Maximum number of elements in table + * @p: Configuration parameters + * @rhlist: True if this is an rhltable + * @run_work: Deferred worker to expand/shrink asynchronously + * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table + */ +struct rhashtable { + struct bucket_table __rcu *tbl; + unsigned int key_len; + unsigned int max_elems; + struct rhashtable_params p; + bool rhlist; + struct work_struct run_work; + struct mutex mutex; + spinlock_t lock; + atomic_t nelems; +}; + +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @tbl: The table that we were walking over + */ +struct rhashtable_walker { + struct list_head list; + struct bucket_table *tbl; +}; + +/** + * struct rhashtable_iter - Hash table iterator + * @ht: Table to iterate through + * @p: Current pointer + * @list: Current hash list pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhlist_head *list; + struct rhashtable_walker walker; + unsigned int slot; + unsigned int skip; + bool end_of_table; +}; + +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); + +#endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/c_src/include/linux/rhashtable.h b/c_src/include/linux/rhashtable.h new file mode 100644 index 00000000..1c6dbdc8 --- /dev/null +++ b/c_src/include/linux/rhashtable.h @@ -0,0 +1,1267 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au> + * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * + * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_RHASHTABLE_H +#define _LINUX_RHASHTABLE_H + +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/jhash.h> +#include <linux/list_nulls.h> +#include <linux/rcupdate.h> +#include <linux/workqueue.h> +#include <linux/rculist.h> +#include <linux/bit_spinlock.h> + +#include <linux/rhashtable-types.h> +/* + * Objects in an rhashtable have an embedded struct rhash_head + * which is linked into as hash chain from the hash table - or one + * of two or more hash tables when the rhashtable is being resized. + * The end of the chain is marked with a special nulls marks which has + * the least significant bit set but otherwise stores the address of + * the hash bucket. This allows us to be sure we've found the end + * of the right list. + * The value stored in the hash bucket has BIT(0) used as a lock bit. + * This bit must be atomically set before any changes are made to + * the chain. To avoid dereferencing this pointer without clearing + * the bit first, we use an opaque 'struct rhash_lock_head *' for the + * pointer stored in the bucket. This struct needs to be defined so + * that rcu_dereference() works on it, but it has no content so a + * cast is needed for it to be useful. This ensures it isn't + * used by mistake with clearing the lock bit first. + */ +struct rhash_lock_head {}; + +/* Maximum chain length before rehash + * + * The maximum (not average) chain length grows with the size of the hash + * table, at a rate of (log N)/(log log N). + * + * The value of 16 is selected so that even if the hash table grew to + * 2^32 you would not expect the maximum chain length to exceed it + * unless we are under attack (or extremely unlucky). + * + * As this limit is only to detect attacks, we don't need to set it to a + * lower value as you'd need the chain length to vastly exceed 16 to have + * any real effect on the system. + */ +#define RHT_ELASTICITY 16u + +/** + * struct bucket_table - Table of hash buckets + * @size: Number of hash buckets + * @nest: Number of bits of first-level nested table. + * @rehash: Current bucket being rehashed + * @hash_rnd: Random seed to fold into hash + * @walkers: List of active walkers + * @rcu: RCU structure for freeing the table + * @future_tbl: Table under construction during rehashing + * @ntbl: Nested table used when out of memory. + * @buckets: size * hash buckets + */ +struct bucket_table { + unsigned int size; + unsigned int nest; + u32 hash_rnd; + struct list_head walkers; + struct rcu_head rcu; + + struct bucket_table __rcu *future_tbl; + + struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; +}; + +/* + * NULLS_MARKER() expects a hash value with the low + * bits mostly likely to be significant, and it discards + * the msb. + * We give it an address, in which the bottom bit is + * always 0, and the msb might be significant. + * So we shift the address down one bit to align with + * expectations and avoid losing a significant bit. + * + * We never store the NULLS_MARKER in the hash table + * itself as we need the lsb for locking. + * Instead we store a NULL + */ +#define RHT_NULLS_MARKER(ptr) \ + ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) +#define INIT_RHT_NULLS_HEAD(ptr) \ + ((ptr) = NULL) + +static inline bool rht_is_a_nulls(const struct rhash_head *ptr) +{ + return ((unsigned long) ptr & 1); +} + +static inline void *rht_obj(const struct rhashtable *ht, + const struct rhash_head *he) +{ + return (char *)he - ht->p.head_offset; +} + +static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, + unsigned int hash) +{ + return hash & (tbl->size - 1); +} + +static inline unsigned int rht_key_get_hash(struct rhashtable *ht, + const void *key, const struct rhashtable_params params, + unsigned int hash_rnd) +{ + unsigned int hash; + + /* params must be equal to ht->p if it isn't constant. */ + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, hash_rnd); + else if (params.key_len) { + unsigned int key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), hash_rnd); + } else { + unsigned int key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, hash_rnd); + else + hash = jhash(key, key_len, hash_rnd); + } + + return hash; +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd); + + return rht_bucket_index(tbl, hash); +} + +static inline unsigned int rht_head_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const struct rhash_head *he, const struct rhashtable_params params) +{ + const char *ptr = rht_obj(ht, he); + + return likely(params.obj_hashfn) ? + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : + rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_75(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Expand table when exceeding 75% load */ + return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_shrink_below_30(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Shrink table beneath 30% load */ + return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && + tbl->size > ht->p.min_size; +} + +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_grow_above_max - returns true if table is above maximum + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_max(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) >= ht->max_elems; +} + +#ifdef CONFIG_PROVE_LOCKING +int lockdep_rht_mutex_is_held(struct rhashtable *ht); +int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); +#else +static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht) +{ + return 1; +} + +static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, + u32 hash) +{ + return 1; +} +#endif /* CONFIG_PROVE_LOCKING */ + +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj); + +void rhashtable_walk_enter(struct rhashtable *ht, + struct rhashtable_iter *iter); +void rhashtable_walk_exit(struct rhashtable_iter *iter); +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); + +static inline void rhashtable_walk_start(struct rhashtable_iter *iter) +{ + (void)rhashtable_walk_start_check(iter); +} + +void *rhashtable_walk_next(struct rhashtable_iter *iter); +void *rhashtable_walk_peek(struct rhashtable_iter *iter); +void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); + +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg); +void rhashtable_destroy(struct rhashtable *ht); + +struct rhash_lock_head __rcu **rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **__rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); + +#define rht_dereference(p, ht) \ + rcu_dereference(p) + +#define rht_dereference_rcu(p, ht) \ + rcu_dereference(p) + +#define rht_dereference_bucket(p, tbl, hash) \ + rcu_dereference(p) + +#define rht_dereference_bucket_rcu(p, tbl, hash) \ + rcu_dereference(p) + +#define rht_entry(tpos, pos, member) \ + ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) + +static inline struct rhash_lock_head __rcu *const *rht_bucket( + const struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_lock_head __rcu **rht_bucket_var( + struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_lock_head __rcu **rht_bucket_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : + &tbl->buckets[hash]; +} + +/* + * We lock a bucket by setting BIT(0) in the pointer - this is always + * zero in real pointers. The NULLS mark is never stored in the bucket, + * rather we store NULL if the bucket is empty. + * bit_spin_locks do not handle contention well, but the whole point + * of the hashtable design is to achieve minimum per-bucket contention. + * A nested hash table might not have a bucket pointer. In that case + * we cannot get a lock. For remove and replace the bucket cannot be + * interesting and doesn't need locking. + * For insert we allocate the bucket if this is the last bucket_table, + * and then take the lock. + * Sometimes we unlock a bucket by writing a new pointer there. In that + * case we don't need to unlock, but we do need to reset state such as + * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer() + * provides the same release semantics that bit_spin_unlock() provides, + * this is safe. + * When we write to a bucket without unlocking, we use rht_assign_locked(). + */ + +static inline void rht_lock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt) +{ + bit_spin_lock(0, (unsigned long *)bkt); +} + +static inline void rht_lock_nested(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bucket, + unsigned int subclass) +{ + bit_spin_lock(0, (unsigned long *)bucket); +} + +static inline void rht_unlock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt) +{ + bit_spin_unlock(0, (unsigned long *)bkt); +} + +static inline struct rhash_head *__rht_ptr( + struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt) +{ + return (struct rhash_head *) + ((unsigned long)p & ~BIT(0) ?: + (unsigned long)RHT_NULLS_MARKER(bkt)); +} + +/* + * Where 'bkt' is a bucket and might be locked: + * rht_ptr_rcu() dereferences that pointer and clears the lock bit. + * rht_ptr() dereferences in a context where the bucket is locked. + * rht_ptr_exclusive() dereferences in a context where exclusive + * access is guaranteed, such as when destroying the table. + */ +static inline struct rhash_head *rht_ptr_rcu( + struct rhash_lock_head __rcu *const *bkt) +{ + return __rht_ptr(rcu_dereference(*bkt), bkt); +} + +static inline struct rhash_head *rht_ptr( + struct rhash_lock_head __rcu *const *bkt, + struct bucket_table *tbl, + unsigned int hash) +{ + return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); +} + +static inline struct rhash_head *rht_ptr_exclusive( + struct rhash_lock_head __rcu *const *bkt) +{ + return __rht_ptr(rcu_dereference(*bkt), bkt); +} + +static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) +{ + if (rht_is_a_nulls(obj)) + obj = NULL; + rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); +} + +static inline void rht_assign_unlock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) +{ + if (rht_is_a_nulls(obj)) + obj = NULL; + rcu_assign_pointer(*bkt, (void *)obj); + preempt_enable(); + __release(bitlock); + bit_spin_wake(0, (unsigned long *) bkt); +} + +/** + * rht_for_each_from - iterate over hash chain from given head + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the &struct rhash_head to start from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + */ +#define rht_for_each_from(pos, head, tbl, hash) \ + for (pos = head; \ + !rht_is_a_nulls(pos); \ + pos = rht_dereference_bucket((pos)->next, tbl, hash)) + +/** + * rht_for_each - iterate over hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + */ +#define rht_for_each(pos, tbl, hash) \ + rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + tbl, hash) + +/** + * rht_for_each_entry_from - iterate over hash chain from given head + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the &struct rhash_head to start from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + */ +#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ + for (pos = head; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = rht_dereference_bucket((pos)->next, tbl, hash)) + +/** + * rht_for_each_entry - iterate over hash chain of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + */ +#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_from(tpos, pos, \ + rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + tbl, hash, member) + +/** + * rht_for_each_entry_safe - safely iterate over hash chain of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @next: the &struct rhash_head to use as next in loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive allows for the looped code to + * remove the loop cursor from the list. + */ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL) + +/** + * rht_for_each_rcu_from - iterate over rcu hash chain from given head + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the &struct rhash_head to start from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_rcu_from(pos, head, tbl, hash) \ + for (({barrier(); }), \ + pos = head; \ + !rht_is_a_nulls(pos); \ + pos = rcu_dereference_raw(pos->next)) + +/** + * rht_for_each_rcu - iterate over rcu hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_rcu(pos, tbl, hash) \ + for (({barrier(); }), \ + pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ + !rht_is_a_nulls(pos); \ + pos = rcu_dereference_raw(pos->next)) + +/** + * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the &struct rhash_head to start from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ + for (({barrier(); }), \ + pos = head; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) + +/** + * rht_for_each_entry_rcu - iterate over rcu hash chain of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_from(tpos, pos, \ + rht_ptr_rcu(rht_bucket(tbl, hash)), \ + tbl, hash, member) + +/** + * rhl_for_each_rcu - iterate over rcu hash table list + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_rcu(pos, list) \ + for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + +/** + * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * @member: name of the &struct rlist_head within the hashable struct. + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_entry_rcu(tpos, pos, list, member) \ + for (pos = list; pos && rht_entry(tpos, pos, member); \ + pos = rcu_dereference_raw(pos->next)) + +static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + const char *ptr = obj; + + return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); +} + +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_lock_head __rcu *const *bkt; + struct bucket_table *tbl; + struct rhash_head *he; + unsigned int hash; + + tbl = rht_dereference_rcu(ht->tbl, ht); +restart: + hash = rht_key_hashfn(ht, tbl, key, params); + bkt = rht_bucket(tbl, hash); + do { + rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + return he; + } + /* An object might have been moved to a different hash chain, + * while we walk along it - better check and retry. + */ + } while (he != RHT_NULLS_MARKER(bkt)); + + /* Ensure we see any new tables. */ + smp_rmb(); + + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) + goto restart; + + return NULL; +} + +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/** + * rhltable_lookup - search hash list table + * @hlt: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. All matching entries are returned + * in a list. + * + * This must only be called under the RCU read lock. + * + * Returns the list of entries that match the given key. + */ +static inline struct rhlist_head *rhltable_lookup( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + + return he ? container_of(he, struct rhlist_head, rhead) : NULL; +} + +/* Internal function, please use rhashtable_insert_fast() instead. This + * function returns the existing element already in hashes in there is a clash, + * otherwise it returns an error via ERR_PTR(). + */ +static inline void *__rhashtable_insert_fast( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params, bool rhlist) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_lock_head __rcu **bkt; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; + struct rhash_head *head; + unsigned int hash; + int elasticity; + void *data; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + elasticity = RHT_ELASTICITY; + bkt = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!bkt) + goto out; + pprev = NULL; + rht_lock(tbl, bkt); + + if (unlikely(rcu_access_pointer(tbl->future_tbl))) { +slow_path: + rht_unlock(tbl, bkt); + rcu_read_unlock(); + return rhashtable_insert_slow(ht, key, obj); + } + + rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { + struct rhlist_head *plist; + struct rhlist_head *list; + + elasticity--; + if (!key || + (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; + continue; + } + + data = rht_obj(ht, head); + + if (!rhlist) + goto out_unlock; + + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(tbl, bkt); + } else + rht_assign_unlock(tbl, bkt, obj); + data = NULL; + goto out; + } + + if (elasticity <= 0) + goto slow_path; + + data = ERR_PTR(-E2BIG); + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out_unlock; + + if (unlikely(rht_grow_above_100(ht, tbl))) + goto slow_path; + + /* Inserting at head of list makes unlocking free. */ + head = rht_ptr(bkt, tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + if (rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } + + atomic_inc(&ht->nelems); + rht_assign_unlock(tbl, bkt, obj); + + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); + + data = NULL; +out: + rcu_read_unlock(); + + return data; + +out_unlock: + rht_unlock(tbl, bkt); + goto out; +} + +/** + * rhashtable_insert_fast - insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Will take the per bucket bitlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. + */ +static inline int rhashtable_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + void *ret; + + ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhltable_insert_key - insert object into hash list table + * @hlt: hash list table + * @key: the pointer to the key + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take the per bucket bitlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. + */ +static inline int rhltable_insert_key( + struct rhltable *hlt, const void *key, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, + params, true)); +} + +/** + * rhltable_insert - insert object into hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take the per bucket bitlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. + */ +static inline int rhltable_insert( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + const char *key = rht_obj(&hlt->ht, &list->rhead); + + key += params.key_offset; + + return rhltable_insert_key(hlt, key, list, params); +} + +/** + * rhashtable_lookup_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * This lookup function may only be used for fixed key hash table (key_len + * parameter set). It will BUG() if used inappropriately. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. + */ +static inline int rhashtable_lookup_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + void *ret; + + BUG_ON(ht->p.obj_hashfn); + + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Just like rhashtable_lookup_insert_fast(), but this function returns the + * object if it exists, NULL if it did not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); +} + +/** + * rhashtable_lookup_insert_key - search and insert object to hash table + * with explicit key + * @ht: hash table + * @key: key + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. + * + * Returns zero on success. + */ +static inline int rhashtable_lookup_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + void *ret; + + BUG_ON(!ht->p.obj_hashfn || !key); + + ret = __rhashtable_insert_fast(ht, key, obj, params, false); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_key - lookup and insert object into hash table + * @ht: hash table + * @key: key + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Just like rhashtable_lookup_insert_key(), but this function returns the + * object if it exists, NULL if it does not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + BUG_ON(!ht->p.obj_hashfn || !key); + + return __rhashtable_insert_fast(ht, key, obj, params, false); +} + +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast_one( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) +{ + struct rhash_lock_head __rcu **bkt; + struct rhash_head __rcu **pprev; + struct rhash_head *he; + unsigned int hash; + int err = -ENOENT; + + hash = rht_head_hashfn(ht, tbl, obj, params); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; + pprev = NULL; + rht_lock(tbl, bkt); + + rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { + struct rhlist_head *list; + + list = container_of(he, struct rhlist_head, rhead); + + if (he != obj) { + struct rhlist_head __rcu **lpprev; + + pprev = &he->next; + + if (!rhlist) + continue; + + do { + lpprev = &list->next; + list = rht_dereference_bucket(list->next, + tbl, hash); + } while (list && obj != &list->rhead); + + if (!list) + continue; + + list = rht_dereference_bucket(list->next, tbl, hash); + RCU_INIT_POINTER(*lpprev, list); + err = 0; + break; + } + + obj = rht_dereference_bucket(obj->next, tbl, hash); + err = 1; + + if (rhlist) { + list = rht_dereference_bucket(list->next, tbl, hash); + if (list) { + RCU_INIT_POINTER(list->rhead.next, obj); + obj = &list->rhead; + err = 0; + } + } + + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(tbl, bkt); + } else { + rht_assign_unlock(tbl, bkt, obj); + } + goto unlocked; + } + + rht_unlock(tbl, bkt); +unlocked: + if (err > 0) { + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + err = 0; + } + + return err; +} + +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params, bool rhlist) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, + rhlist)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table if permitted when residency drops + * below 30%. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(ht, obj, params, false); +} + +/** + * rhltable_remove - remove object from hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table if permitted when residency drops + * below 30% + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhltable_remove( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); +} + +/* Internal function, please use rhashtable_replace_fast() instead */ +static inline int __rhashtable_replace_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct rhash_lock_head __rcu **bkt; + struct rhash_head __rcu **pprev; + struct rhash_head *he; + unsigned int hash; + int err = -ENOENT; + + /* Minimally, the old and new objects must have same hash + * (which should mean identifiers are the same). + */ + hash = rht_head_hashfn(ht, tbl, obj_old, params); + if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) + return -EINVAL; + + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; + + pprev = NULL; + rht_lock(tbl, bkt); + + rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { + if (he != obj_old) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(obj_new->next, obj_old->next); + if (pprev) { + rcu_assign_pointer(*pprev, obj_new); + rht_unlock(tbl, bkt); + } else { + rht_assign_unlock(tbl, bkt, obj_new); + } + err = 0; + goto unlocked; + } + + rht_unlock(tbl, bkt); + +unlocked: + return err; +} + +/** + * rhashtable_replace_fast - replace an object in hash table + * @ht: hash table + * @obj_old: pointer to hash head inside object being replaced + * @obj_new: pointer to hash head inside object which is new + * @params: hash table parameters + * + * Replacing an object doesn't affect the number of elements in the hash table + * or bucket, so we don't need to worry about shrinking or expanding the + * table here. + * + * Returns zero on success, -ENOENT if the entry could not be found, + * -EINVAL if hash is not the same for the old and new objects. + */ +static inline int rhashtable_replace_fast( + struct rhashtable *ht, struct rhash_head *obj_old, + struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, + obj_new, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + +/** + * rhltable_walk_enter - Initialise an iterator + * @hlt: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. + * + * You must call rhashtable_walk_exit after this function returns. + */ +static inline void rhltable_walk_enter(struct rhltable *hlt, + struct rhashtable_iter *iter) +{ + return rhashtable_walk_enter(&hlt->ht, iter); +} + +/** + * rhltable_free_and_destroy - free elements and destroy hash list table + * @hlt: the hash list table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * See documentation for rhashtable_free_and_destroy. + */ +static inline void rhltable_free_and_destroy(struct rhltable *hlt, + void (*free_fn)(void *ptr, + void *arg), + void *arg) +{ + return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); +} + +static inline void rhltable_destroy(struct rhltable *hlt) +{ + return rhltable_free_and_destroy(hlt, NULL, NULL); +} + +#endif /* _LINUX_RHASHTABLE_H */ diff --git a/c_src/include/linux/rwsem.h b/c_src/include/linux/rwsem.h new file mode 100644 index 00000000..f851d6a2 --- /dev/null +++ b/c_src/include/linux/rwsem.h @@ -0,0 +1,29 @@ +#ifndef __TOOLS_LINUX_RWSEM_H +#define __TOOLS_LINUX_RWSEM_H + +#include <pthread.h> + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +#define __RWSEM_INITIALIZER(name) \ + { .lock = PTHREAD_RWLOCK_INITIALIZER } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *lock) +{ + pthread_rwlock_init(&lock->lock, NULL); +} + +#define down_read(l) pthread_rwlock_rdlock(&(l)->lock) +#define down_read_killable(l) (pthread_rwlock_rdlock(&(l)->lock), 0) +#define down_read_trylock(l) (!pthread_rwlock_tryrdlock(&(l)->lock)) +#define up_read(l) pthread_rwlock_unlock(&(l)->lock) + +#define down_write(l) pthread_rwlock_wrlock(&(l)->lock) +#define up_write(l) pthread_rwlock_unlock(&(l)->lock) + +#endif /* __TOOLS_LINUX_RWSEM_H */ diff --git a/c_src/include/linux/scatterlist.h b/c_src/include/linux/scatterlist.h new file mode 100644 index 00000000..1e4395c5 --- /dev/null +++ b/c_src/include/linux/scatterlist.h @@ -0,0 +1,109 @@ +#ifndef _LINUX_SCATTERLIST_H +#define _LINUX_SCATTERLIST_H + +#include <linux/bug.h> +#include <linux/slab.h> + +struct scatterlist { + unsigned long page_link; + unsigned int offset; + unsigned int length; +}; + +#define sg_is_chain(sg) ((sg)->page_link & 0x01) +#define sg_is_last(sg) ((sg)->page_link & 0x02) +#define sg_chain_ptr(sg) \ + ((struct scatterlist *) ((sg)->page_link & ~0x03)) + +static inline void sg_assign_page(struct scatterlist *sg, struct page *page) +{ + unsigned long page_link = sg->page_link & 0x3; + + /* + * In order for the low bit stealing approach to work, pages + * must be aligned at a 32-bit boundary as a minimum. + */ + BUG_ON((unsigned long) page & 0x03); + sg->page_link = page_link | (unsigned long) page; +} + +static inline void sg_set_page(struct scatterlist *sg, struct page *page, + unsigned int len, unsigned int offset) +{ + sg_assign_page(sg, page); + sg->offset = offset; + sg->length = len; +} + +static inline struct page *sg_page(struct scatterlist *sg) +{ + return (struct page *)((sg)->page_link & ~0x3); +} + +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); +} + +static inline struct scatterlist *sg_next(struct scatterlist *sg) +{ + if (sg_is_last(sg)) + return NULL; + + sg++; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + + return sg; +} + +#define for_each_sg(sglist, sg, nr, __i) \ + for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg)) + +static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, + struct scatterlist *sgl) +{ + /* + * offset and length are unused for chain entry. Clear them. + */ + prv[prv_nents - 1].offset = 0; + prv[prv_nents - 1].length = 0; + + /* + * Set lowest bit to indicate a link pointer, and make sure to clear + * the termination bit if it happens to be set. + */ + prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02; +} + +static inline void sg_mark_end(struct scatterlist *sg) +{ + sg->page_link |= 0x02; + sg->page_link &= ~0x01; +} + +static inline void sg_unmark_end(struct scatterlist *sg) +{ + sg->page_link &= ~0x02; +} + +static inline void *sg_virt(struct scatterlist *sg) +{ + return page_address(sg_page(sg)) + sg->offset; +} + +static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); + sg_mark_end(&sgl[nents - 1]); +} + +static inline void sg_init_one(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + sg_init_table(sg, 1); + sg_set_buf(sg, buf, buflen); +} + +#endif /* _LINUX_SCATTERLIST_H */ diff --git a/c_src/include/linux/sched.h b/c_src/include/linux/sched.h new file mode 100644 index 00000000..7afb6d54 --- /dev/null +++ b/c_src/include/linux/sched.h @@ -0,0 +1,186 @@ +#ifndef __TOOLS_LINUX_SCHED_H +#define __TOOLS_LINUX_SCHED_H + +#include <pthread.h> +#include <time.h> +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/completion.h> +#include <linux/jiffies.h> +#include <linux/rwsem.h> +#include <linux/time64.h> + +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 +/* in tsk->exit_state */ +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) +/* in tsk->state again */ +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 +#define TASK_WAKING 256 +#define TASK_PARKED 512 +#define TASK_NOLOAD 1024 +#define TASK_NEW 2048 +#define TASK_IDLE_WORKER 4096 +#define TASK_STATE_MAX 8192 +#define TASK_FREEZABLE (1U << 14) + +/* Convenience macros for the sake of set_task_state */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) + +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + +/* Convenience macros for the sake of wake_up */ +#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) + +#define TASK_COMM_LEN 16 + +#define PF_EXITING 0x00000004 /* getting shut down */ +#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ +#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ +#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* dumped core */ +#define PF_SIGNALED 0x00000400 /* killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ +#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ +#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ +#define PF_FROZEN 0x00010000 /* frozen for system suspend */ +#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ +#define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ +#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ + +struct task_struct { + pthread_t thread; + + int (*thread_fn)(void *); + void *thread_data; + + atomic_t usage; + int state; + + /* kthread: */ + unsigned long kthread_flags; + struct completion exited; + + unsigned flags; + + bool on_cpu; + char comm[TASK_COMM_LEN]; + pid_t pid; + + struct bio_list *bio_list; + + struct signal_struct { + struct rw_semaphore exec_update_lock; + } *signal, _signal; +}; + +extern __thread struct task_struct *current; + +#define __set_task_state(tsk, state_value) \ + do { (tsk)->state = (state_value); } while (0) +#define set_task_state(tsk, state_value) \ + smp_store_mb((tsk)->state, (state_value)) +#define __set_current_state(state_value) \ + do { current->state = (state_value); } while (0) +#define set_current_state(state_value) \ + smp_store_mb(current->state, (state_value)) + +static inline struct task_struct *get_task_struct(struct task_struct *task) +{ + atomic_inc(&task->usage); + return task; + +} + +extern void __put_task_struct(struct task_struct *t); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); +} + +static inline void cond_resched(void) {} +#define need_resched() 0 + +void schedule(void); + +#define MAX_SCHEDULE_TIMEOUT LONG_MAX +long schedule_timeout(long timeout); + +static inline void io_schedule(void) +{ + schedule(); +} + +static inline long io_schedule_timeout(long timeout) +{ + return schedule_timeout(timeout); +} + +int wake_up_process(struct task_struct *); + +static inline u64 ktime_get_seconds(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + return ts.tv_sec; +} + +static inline u64 ktime_get_real_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_REALTIME, &ts); + return timespec_to_ns(&ts); +} + +static inline u64 ktime_get_real_seconds(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_REALTIME, &ts); + + return ts.tv_sec; +} + +static inline void ktime_get_coarse_real_ts64(struct timespec64 *ts) +{ + clock_gettime(CLOCK_REALTIME_COARSE, ts); +} + +#define current_kernel_time64() current_kernel_time() +#define CURRENT_TIME (current_kernel_time()) + +static inline unsigned int stack_trace_save_tsk(struct task_struct *task, + unsigned long *store, unsigned int size, + unsigned int skipnr) +{ + return 0; +} + +#endif /* __TOOLS_LINUX_SCHED_H */ diff --git a/c_src/include/linux/sched/clock.h b/c_src/include/linux/sched/clock.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/linux/sched/clock.h diff --git a/c_src/include/linux/sched/cputime.h b/c_src/include/linux/sched/cputime.h new file mode 100644 index 00000000..a89c626f --- /dev/null +++ b/c_src/include/linux/sched/cputime.h @@ -0,0 +1,6 @@ + +static inline void task_cputime_adjusted(struct task_struct *p, u64 *utime, u64 *stime) +{ + *utime = 0; + *stime = 0; +} diff --git a/c_src/include/linux/sched/debug.h b/c_src/include/linux/sched/debug.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/linux/sched/debug.h diff --git a/c_src/include/linux/sched/mm.h b/c_src/include/linux/sched/mm.h new file mode 100644 index 00000000..03feda7a --- /dev/null +++ b/c_src/include/linux/sched/mm.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_SCHED_MM_H +#define _LINUX_SCHED_MM_H + +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ + +static inline unsigned int memalloc_nofs_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOFS; + current->flags |= PF_MEMALLOC_NOFS; + return flags; +} + +static inline void memalloc_nofs_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; +} + +static inline unsigned int memalloc_noreclaim_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC; + current->flags |= PF_MEMALLOC; + return flags; +} + +static inline void memalloc_noreclaim_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC) | flags; +} + +#endif /* _LINUX_SCHED_MM_H */ diff --git a/c_src/include/linux/sched/rt.h b/c_src/include/linux/sched/rt.h new file mode 100644 index 00000000..ef3040e4 --- /dev/null +++ b/c_src/include/linux/sched/rt.h @@ -0,0 +1,9 @@ +#ifndef _SCHED_RT_H +#define _SCHED_RT_H + +static inline int rt_task(struct task_struct *p) +{ + return 0; +} + +#endif /* _SCHED_RT_H */ diff --git a/c_src/include/linux/sched/signal.h b/c_src/include/linux/sched/signal.h new file mode 100644 index 00000000..20bdc050 --- /dev/null +++ b/c_src/include/linux/sched/signal.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_SIGNAL_H +#define _LINUX_SCHED_SIGNAL_H + +static inline int fatal_signal_pending(struct task_struct *p) +{ + return 0; +} + +#endif /* _LINUX_SCHED_SIGNAL_H */ + diff --git a/c_src/include/linux/sched/task.h b/c_src/include/linux/sched/task.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/linux/sched/task.h diff --git a/c_src/include/linux/sched/task_stack.h b/c_src/include/linux/sched/task_stack.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/linux/sched/task_stack.h diff --git a/c_src/include/linux/semaphore.h b/c_src/include/linux/semaphore.h new file mode 100644 index 00000000..498e717a --- /dev/null +++ b/c_src/include/linux/semaphore.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008 Intel Corporation + * Author: Matthew Wilcox <willy@linux.intel.com> + * + * Please see kernel/locking/semaphore.c for documentation of these functions + */ +#ifndef __LINUX_SEMAPHORE_H +#define __LINUX_SEMAPHORE_H + +#include <linux/list.h> +#include <linux/spinlock.h> + +/* Please don't access any members of this structure directly */ +struct semaphore { + raw_spinlock_t lock; + unsigned int count; + struct list_head wait_list; +}; + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ + .count = n, \ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ +} + +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + +static inline void sema_init(struct semaphore *sem, int val) +{ + *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val); +} + +extern void down(struct semaphore *sem); +extern int __must_check down_interruptible(struct semaphore *sem); +extern int __must_check down_killable(struct semaphore *sem); +extern int __must_check down_trylock(struct semaphore *sem); +extern int __must_check down_timeout(struct semaphore *sem, long); +extern void up(struct semaphore *sem); + +#endif /* __LINUX_SEMAPHORE_H */ diff --git a/c_src/include/linux/seq_buf.h b/c_src/include/linux/seq_buf.h new file mode 100644 index 00000000..8c9c0dd7 --- /dev/null +++ b/c_src/include/linux/seq_buf.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SEQ_BUF_H +#define _LINUX_SEQ_BUF_H + +#include <linux/kernel.h> +#include <stdarg.h> +#include <string.h> + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use. + */ + +/** + * seq_buf - seq buffer structure + * @buffer: pointer to the buffer + * @size: size of the buffer + * @len: the amount of data inside the buffer + * @readpos: The next position to read in the buffer. + */ +struct seq_buf { + char *buffer; + size_t size; + size_t len; + loff_t readpos; +}; + +static inline void seq_buf_clear(struct seq_buf *s) +{ + s->len = 0; + s->readpos = 0; +} + +static inline void +seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) +{ + s->buffer = buf; + s->size = size; + seq_buf_clear(s); +} + +/* + * seq_buf have a buffer that might overflow. When this happens + * the len and size are set to be equal. + */ +static inline bool +seq_buf_has_overflowed(struct seq_buf *s) +{ + return s->len > s->size; +} + +static inline void +seq_buf_set_overflow(struct seq_buf *s) +{ + s->len = s->size + 1; +} + +/* + * How much buffer is left on the seq_buf? + */ +static inline unsigned int +seq_buf_buffer_left(struct seq_buf *s) +{ + if (seq_buf_has_overflowed(s)) + return 0; + + return s->size - s->len; +} + +/* How much buffer was written? */ +static inline unsigned int seq_buf_used(struct seq_buf *s) +{ + return min(s->len, s->size); +} + +/** + * seq_buf_terminate - Make sure buffer is nul terminated + * @s: the seq_buf descriptor to terminate. + * + * This makes sure that the buffer in @s is nul terminated and + * safe to read as a string. + * + * Note, if this is called when the buffer has overflowed, then + * the last byte of the buffer is zeroed, and the len will still + * point passed it. + * + * After this function is called, s->buffer is safe to use + * in string operations. + */ +static inline void seq_buf_terminate(struct seq_buf *s) +{ + if (WARN_ON(s->size == 0)) + return; + + if (seq_buf_buffer_left(s)) + s->buffer[s->len] = 0; + else + s->buffer[s->size - 1] = 0; +} + +/** + * seq_buf_get_buf - get buffer to write arbitrary data to + * @s: the seq_buf handle + * @bufp: the beginning of the buffer is stored here + * + * Return the number of bytes available in the buffer, or zero if + * there's no space. + */ +static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) +{ + WARN_ON(s->len > s->size + 1); + + if (s->len < s->size) { + *bufp = s->buffer + s->len; + return s->size - s->len; + } + + *bufp = NULL; + return 0; +} + +/** + * seq_buf_commit - commit data to the buffer + * @s: the seq_buf handle + * @num: the number of bytes to commit + * + * Commit @num bytes of data written to a buffer previously acquired + * by seq_buf_get. To signal an error condition, or that the data + * didn't fit in the available space, pass a negative @num value. + */ +static inline void seq_buf_commit(struct seq_buf *s, int num) +{ + if (num < 0) { + seq_buf_set_overflow(s); + } else { + /* num must be negative on overflow */ + BUG_ON(s->len + num > s->size); + s->len += num; + } +} + +extern __printf(2, 3) +int seq_buf_printf(struct seq_buf *s, const char *fmt, ...); +extern __printf(2, 0) +int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args); +extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, + int cnt); +extern int seq_buf_puts(struct seq_buf *s, const char *str); +extern int seq_buf_putc(struct seq_buf *s, unsigned char c); + +void seq_buf_human_readable_u64(struct seq_buf *, u64); + +#endif /* _LINUX_SEQ_BUF_H */ diff --git a/c_src/include/linux/seq_file.h b/c_src/include/linux/seq_file.h new file mode 100644 index 00000000..b455ebca --- /dev/null +++ b/c_src/include/linux/seq_file.h @@ -0,0 +1,21 @@ +#ifndef _LINUX_SEQ_FILE_H +#define _LINUX_SEQ_FILE_H + +#include <linux/types.h> +#include <linux/fs.h> + +struct seq_file { + char *buf; + size_t size; + size_t from; + size_t count; + size_t pad_until; + loff_t index; + loff_t read_pos; + u64 version; + int poll_event; + const struct file *file; + void *private; +}; + +#endif diff --git a/c_src/include/linux/seqlock.h b/c_src/include/linux/seqlock.h new file mode 100644 index 00000000..435420fe --- /dev/null +++ b/c_src/include/linux/seqlock.h @@ -0,0 +1,47 @@ +#ifndef __LINUX_SEQLOCK_H +#define __LINUX_SEQLOCK_H + +#include <linux/compiler.h> + +typedef struct seqcount { + unsigned sequence; +} seqcount_t; + +static inline void seqcount_init(seqcount_t *s) +{ + s->sequence = 0; +} + +static inline unsigned read_seqcount_begin(const seqcount_t *s) +{ + unsigned ret; + +repeat: + ret = READ_ONCE(s->sequence); + if (unlikely(ret & 1)) { + cpu_relax(); + goto repeat; + } + smp_rmb(); + return ret; +} + +static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) +{ + smp_rmb(); + return unlikely(s->sequence != start); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); +} + +static inline void write_seqcount_end(seqcount_t *s) +{ + smp_wmb(); + s->sequence++; +} + +#endif /* __LINUX_SEQLOCK_H */ diff --git a/c_src/include/linux/shrinker.h b/c_src/include/linux/shrinker.h new file mode 100644 index 00000000..d0a84794 --- /dev/null +++ b/c_src/include/linux/shrinker.h @@ -0,0 +1,35 @@ +#ifndef __TOOLS_LINUX_SHRINKER_H +#define __TOOLS_LINUX_SHRINKER_H + +#include <linux/list.h> +#include <linux/types.h> + +struct shrink_control { + gfp_t gfp_mask; + unsigned long nr_to_scan; +}; + +#define SHRINK_STOP (~0UL) + +struct seq_buf; +struct shrinker { + unsigned long (*count_objects)(struct shrinker *, + struct shrink_control *sc); + unsigned long (*scan_objects)(struct shrinker *, + struct shrink_control *sc); + void (*to_text)(struct seq_buf *, struct shrinker *); + + int seeks; /* seeks to recreate an obj */ + long batch; /* reclaim batch size, 0 = default */ + struct list_head list; + void *private_data; +}; + +void shrinker_free(struct shrinker *); +struct shrinker *shrinker_alloc(unsigned int, const char *, ...); + +int shrinker_register(struct shrinker *); + +void run_shrinkers(gfp_t gfp_mask, bool); + +#endif /* __TOOLS_LINUX_SHRINKER_H */ diff --git a/c_src/include/linux/siphash.h b/c_src/include/linux/siphash.h new file mode 100644 index 00000000..bf21591a --- /dev/null +++ b/c_src/include/linux/siphash.h @@ -0,0 +1,145 @@ +/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. + */ + +#ifndef _LINUX_SIPHASH_H +#define _LINUX_SIPHASH_H + +#include <linux/types.h> +#include <linux/kernel.h> + +#define SIPHASH_ALIGNMENT __alignof__(u64) +typedef struct { + u64 key[2]; +} siphash_key_t; + +static inline bool siphash_key_is_zero(const siphash_key_t *key) +{ + return !(key->key[0] | key->key[1]); +} + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); +#endif + +u64 siphash_1u64(const u64 a, const siphash_key_t *key); +u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); +u64 siphash_3u64(const u64 a, const u64 b, const u64 c, + const siphash_key_t *key); +u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, + const siphash_key_t *key); +u64 siphash_1u32(const u32 a, const siphash_key_t *key); +u64 siphash_3u32(const u32 a, const u32 b, const u32 c, + const siphash_key_t *key); + +static inline u64 siphash_2u32(const u32 a, const u32 b, + const siphash_key_t *key) +{ + return siphash_1u64((u64)b << 32 | a, key); +} +static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, + const u32 d, const siphash_key_t *key) +{ + return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); +} + + +static inline u64 ___siphash_aligned(const __le64 *data, size_t len, + const siphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return siphash_1u32(le32_to_cpup((const __le32 *)data), key); + if (__builtin_constant_p(len) && len == 8) + return siphash_1u64(le64_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 16) + return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 24) + return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 32) + return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), le64_to_cpu(data[3]), + key); + return __siphash_aligned(data, len, key); +} + +/** + * siphash - compute 64-bit siphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the siphash key + */ +static inline u64 siphash(const void *data, size_t len, + const siphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + return __siphash_unaligned(data, len, key); +#endif + return ___siphash_aligned(data, len, key); +} + +#define HSIPHASH_ALIGNMENT __alignof__(unsigned long) +typedef struct { + unsigned long key[2]; +} hsiphash_key_t; + +u32 __hsiphash_aligned(const void *data, size_t len, + const hsiphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key); +#endif + +u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); +u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); +u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, + const hsiphash_key_t *key); +u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, + const hsiphash_key_t *key); + +static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, + const hsiphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return hsiphash_1u32(le32_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 8) + return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 12) + return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 16) + return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), le32_to_cpu(data[3]), + key); + return __hsiphash_aligned(data, len, key); +} + +/** + * hsiphash - compute 32-bit hsiphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the hsiphash key + */ +static inline u32 hsiphash(const void *data, size_t len, + const hsiphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + return __hsiphash_unaligned(data, len, key); +#endif + return ___hsiphash_aligned(data, len, key); +} + +#endif /* _LINUX_SIPHASH_H */ diff --git a/c_src/include/linux/slab.h b/c_src/include/linux/slab.h new file mode 100644 index 00000000..ca0c7934 --- /dev/null +++ b/c_src/include/linux/slab.h @@ -0,0 +1,272 @@ +#ifndef __TOOLS_LINUX_SLAB_H +#define __TOOLS_LINUX_SLAB_H + +#include <malloc.h> +#include <stdlib.h> +#include <string.h> + +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/overflow.h> +#include <linux/page.h> +#include <linux/shrinker.h> +#include <linux/types.h> + +#include <stdlib.h> +#include <sys/mman.h> + +#define alloc_hooks(_do, ...) _do + +#define ARCH_KMALLOC_MINALIGN 16 +#define KMALLOC_MAX_SIZE SIZE_MAX + +static inline void *kmalloc_noprof(size_t size, gfp_t flags) +{ + unsigned i; + void *p; + + for (i = 0; i < 10; i++) { + if (size) { + size_t alignment = min_t(size_t, PAGE_SIZE, + rounddown_pow_of_two(size)); + alignment = max(sizeof(void *), alignment); + if (posix_memalign(&p, alignment, size)) + p = NULL; + } else { + p = malloc(0); + } + + if (p) { + if (flags & __GFP_ZERO) + memset(p, 0, size); + break; + } + + run_shrinkers(flags, true); + } + + return p; +} +#define kmalloc kmalloc_noprof + +static inline void *krealloc(void *old, size_t size, gfp_t flags) +{ + void *new; + + new = kmalloc(size, flags); + if (!new) + return NULL; + + if (flags & __GFP_ZERO) + memset(new, 0, size); + + if (old) { + memcpy(new, old, + min(malloc_usable_size(old), + malloc_usable_size(new))); + free(old); + } + + return new; +} + +static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, flags); +} + +#define kzalloc(size, flags) kmalloc(size, flags|__GFP_ZERO) + +static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + return kmalloc(bytes, flags); +} + +#define kvmalloc_array(n, size, flags) \ + ((size) != 0 && (n) > SIZE_MAX / (size) \ + ? NULL : kmalloc((n) * (size), flags)) + +#define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO) + +#define kfree(p) free(p) +#define kzfree(p) free(p) + +#define kvmalloc(size, flags) kmalloc(size, flags) +#define kvzalloc(size, flags) kzalloc(size, flags) +#define kvfree(p) kfree(p) + +static inline struct page *alloc_pages_noprof(gfp_t flags, unsigned int order) +{ + size_t size = PAGE_SIZE << order; + unsigned i; + void *p; + + for (i = 0; i < 10; i++) { + p = aligned_alloc(PAGE_SIZE, size); + + if (p) { + if (flags & __GFP_ZERO) + memset(p, 0, size); + break; + } + + run_shrinkers(flags, true); + } + + return p; +} +#define alloc_pages alloc_pages_noprof + +#define alloc_page(gfp) alloc_pages(gfp, 0) + +#define _get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) +#define __get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) +#define get_free_pages_noprof(gfp, order) \ + ((unsigned long) alloc_pages(gfp, order)) +#define __get_free_page(gfp) __get_free_pages(gfp, 0) + +#define __free_pages(page, order) \ +do { \ + (void) order; \ + free(page); \ +} while (0) + +#define free_pages(addr, order) \ +do { \ + (void) order; \ + free((void *) (addr)); \ +} while (0) + +#define __free_page(page) __free_pages((page), 0) +#define free_page(addr) free_pages((addr), 0) + +#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ +#define VM_ALLOC 0x00000002 /* vmalloc() */ +#define VM_MAP 0x00000004 /* vmap()ed pages */ +#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ +#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ + +static inline void vunmap(const void *addr) {} + +static inline void *vmap(struct page **pages, unsigned int count, + unsigned long flags, unsigned prot) +{ + return NULL; +} + +#define is_vmalloc_addr(page) 0 + +#define vmalloc_to_page(addr) ((struct page *) (addr)) + +static inline void *kmemdup(const void *src, size_t len, gfp_t gfp) +{ + void *p; + + p = kmalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} + +struct kmem_cache { + size_t obj_size; +}; + +static inline void *kmem_cache_alloc(struct kmem_cache *c, gfp_t gfp) +{ + return kmalloc(c->obj_size, gfp); +} + +static inline void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t gfp) +{ + return kzalloc(c->obj_size, gfp); +} + +static inline void kmem_cache_free(struct kmem_cache *c, void *p) +{ + kfree(p); +} + +static inline void kmem_cache_destroy(struct kmem_cache *p) +{ + kfree(p); +} + +static inline struct kmem_cache *kmem_cache_create(size_t obj_size) +{ + struct kmem_cache *p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + p->obj_size = obj_size; + return p; +} + +#define KMEM_CACHE(_struct, _flags) kmem_cache_create(sizeof(struct _struct)) + +#define PAGE_KERNEL 0 +#define PAGE_KERNEL_EXEC 1 + +#define vfree(p) free(p) + +static inline void *__vmalloc_noprof(unsigned long size, gfp_t flags) +{ + unsigned i; + void *p; + + size = round_up(size, PAGE_SIZE); + + for (i = 0; i < 10; i++) { + p = aligned_alloc(PAGE_SIZE, size); + + if (p) { + if (flags & __GFP_ZERO) + memset(p, 0, size); + break; + } + + run_shrinkers(flags, true); + } + + return p; +} +#define __vmalloc __vmalloc_noprof + +static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask) +{ + void *p; + + p = __vmalloc(size, gfp_mask); + if (!p) + return NULL; + + if (mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) { + vfree(p); + return NULL; + } + + return p; +} + +static inline void *vmalloc(unsigned long size) +{ + return __vmalloc(size, GFP_KERNEL); +} + +static inline void *vzalloc(unsigned long size) +{ + return __vmalloc(size, GFP_KERNEL|__GFP_ZERO); +} + +#endif /* __TOOLS_LINUX_SLAB_H */ diff --git a/c_src/include/linux/sort.h b/c_src/include/linux/sort.h new file mode 100644 index 00000000..afea0445 --- /dev/null +++ b/c_src/include/linux/sort.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_SORT_H +#define _LINUX_SORT_H + +#include <stdlib.h> + +static inline void sort(void *base, size_t num, size_t size, + int (*cmp_func)(const void *, const void *), + void (*swap_func)(void *, void *, int size)) +{ + return qsort(base, num, size, cmp_func); +} + +#endif diff --git a/c_src/include/linux/spinlock.h b/c_src/include/linux/spinlock.h new file mode 100644 index 00000000..6c4a623c --- /dev/null +++ b/c_src/include/linux/spinlock.h @@ -0,0 +1,65 @@ +#ifndef __TOOLS_LINUX_SPINLOCK_H +#define __TOOLS_LINUX_SPINLOCK_H + +#include <linux/atomic.h> +#include <pthread.h> + +typedef struct { + pthread_mutex_t lock; +} raw_spinlock_t; + +#define __RAW_SPIN_LOCK_UNLOCKED(name) (raw_spinlock_t) { .lock = PTHREAD_MUTEX_INITIALIZER } + +static inline void raw_spin_lock_init(raw_spinlock_t *lock) +{ + pthread_mutex_init(&lock->lock, NULL); +} + +static inline bool raw_spin_trylock(raw_spinlock_t *lock) +{ + return !pthread_mutex_trylock(&lock->lock); +} + +static inline void raw_spin_lock(raw_spinlock_t *lock) +{ + pthread_mutex_lock(&lock->lock); +} + +static inline void raw_spin_unlock(raw_spinlock_t *lock) +{ + pthread_mutex_unlock(&lock->lock); +} + +#define raw_spin_lock_irq(lock) raw_spin_lock(lock) +#define raw_spin_unlock_irq(lock) raw_spin_unlock(lock) + +#define raw_spin_lock_irqsave(lock, flags) \ +do { \ + flags = 0; \ + raw_spin_lock(lock); \ +} while (0) + +#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock) + +typedef raw_spinlock_t spinlock_t; + +#define __SPIN_LOCK_UNLOCKED(name) __RAW_SPIN_LOCK_UNLOCKED(name) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) + +#define spin_lock_init(lock) raw_spin_lock_init(lock) +#define spin_lock(lock) raw_spin_lock(lock) +#define spin_unlock(lock) raw_spin_unlock(lock) + +#define spin_lock_nested(lock, n) spin_lock(lock) + +#define spin_lock_bh(lock) raw_spin_lock(lock) +#define spin_unlock_bh(lock) raw_spin_unlock(lock) + +#define spin_lock_irq(lock) raw_spin_lock(lock) +#define spin_unlock_irq(lock) raw_spin_unlock(lock) + +#define spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags) +#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags) + +#endif /* __TOOLS_LINUX_SPINLOCK_H */ diff --git a/c_src/include/linux/srcu.h b/c_src/include/linux/srcu.h new file mode 100644 index 00000000..75823cf2 --- /dev/null +++ b/c_src/include/linux/srcu.h @@ -0,0 +1,31 @@ +#ifndef __TOOLS_LINUX_SRCU_H +#define __TOOLS_LINUX_SRCU_H + +struct srcu_struct { +}; + +static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) {} + +static inline int srcu_read_lock(struct srcu_struct *ssp) +{ + return 0; +} + +static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) +{ + return false; +} + +static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) +{ + return 0; +} + +static inline void cleanup_srcu_struct(struct srcu_struct *ssp) {} + +static inline int init_srcu_struct(struct srcu_struct *ssp) +{ + return 0; +} + +#endif /* __TOOLS_LINUX_SRCU_H */ diff --git a/c_src/include/linux/stat.h b/c_src/include/linux/stat.h new file mode 100644 index 00000000..1a30957b --- /dev/null +++ b/c_src/include/linux/stat.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_STAT_H +#define _LINUX_STAT_H + +#include <sys/stat.h> + +#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) +#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) +#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) +#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) +#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) + +#endif diff --git a/c_src/include/linux/string.h b/c_src/include/linux/string.h new file mode 100644 index 00000000..3ceda3a3 --- /dev/null +++ b/c_src/include/linux/string.h @@ -0,0 +1,17 @@ +#ifndef _TOOLS_LINUX_STRING_H_ +#define _TOOLS_LINUX_STRING_H_ + +#include <stdlib.h> +#include <string.h> +#include <linux/types.h> /* for size_t */ + +extern size_t strlcpy(char *dest, const char *src, size_t size); +extern ssize_t strscpy(char *dest, const char *src, size_t count); +extern char *strim(char *); +extern void memzero_explicit(void *, size_t); +int match_string(const char * const *, size_t, const char *); + +#define kstrndup(s, n, gfp) strndup(s, n) +#define kstrdup(s, gfp) strdup(s) + +#endif /* _LINUX_STRING_H_ */ diff --git a/c_src/include/linux/string_helpers.h b/c_src/include/linux/string_helpers.h new file mode 100644 index 00000000..af587706 --- /dev/null +++ b/c_src/include/linux/string_helpers.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_STRING_HELPERS_H_ +#define _LINUX_STRING_HELPERS_H_ + +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/types.h> + + +/* Descriptions of the types of units to + * print in */ +enum string_size_units { + STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ + STRING_UNITS_2, /* use binary powers of 2^10 */ +}; + +int string_get_size(u64 size, u64 blk_size, enum string_size_units units, + char *buf, int len); + +#endif diff --git a/c_src/include/linux/sysfs.h b/c_src/include/linux/sysfs.h new file mode 100644 index 00000000..cb75d88b --- /dev/null +++ b/c_src/include/linux/sysfs.h @@ -0,0 +1,38 @@ +#ifndef _SYSFS_H_ +#define _SYSFS_H_ + +#include <linux/compiler.h> + +struct kobject; + +struct attribute { + const char *name; + umode_t mode; +}; + +struct attribute_group { + struct attribute **attrs; +}; + +struct sysfs_ops { + ssize_t (*show)(struct kobject *, struct attribute *, char *); + ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); +}; + +static inline int sysfs_create_files(struct kobject *kobj, + const struct attribute **attr) +{ + return 0; +} + +static inline int sysfs_create_link(struct kobject *kobj, + struct kobject *target, const char *name) +{ + return 0; +} + +static inline void sysfs_remove_link(struct kobject *kobj, const char *name) +{ +} + +#endif /* _SYSFS_H_ */ diff --git a/c_src/include/linux/time64.h b/c_src/include/linux/time64.h new file mode 100644 index 00000000..cd6cc1c1 --- /dev/null +++ b/c_src/include/linux/time64.h @@ -0,0 +1,51 @@ +#ifndef _LINUX_TIME64_H +#define _LINUX_TIME64_H + +#include <linux/types.h> + +#define timespec64 timespec + +typedef __s64 time64_t; + +/* Parameters used to convert the timespec values: */ +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + +static inline struct timespec ns_to_timespec(const u64 nsec) +{ + return (struct timespec) { + .tv_sec = nsec / NSEC_PER_SEC, + .tv_nsec = nsec % NSEC_PER_SEC, + }; +} + +static inline s64 timespec_to_ns(const struct timespec *ts) +{ + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +static inline struct timespec timespec_trunc(struct timespec t, unsigned gran) +{ + /* Avoid division in the common cases 1 ns and 1 s. */ + if (gran == 1) { + /* nothing */ + } else if (gran == NSEC_PER_SEC) { + t.tv_nsec = 0; + } else if (gran > 1 && gran < NSEC_PER_SEC) { + t.tv_nsec -= t.tv_nsec % gran; + } else { + WARN(1, "illegal file time granularity: %u", gran); + } + return t; +} + +#define ns_to_timespec64 ns_to_timespec +#define timespec64_to_ns timespec_to_ns +#define timespec64_trunc timespec_trunc + +#endif /* _LINUX_TIME64_H */ diff --git a/c_src/include/linux/timer.h b/c_src/include/linux/timer.h new file mode 100644 index 00000000..9667acf9 --- /dev/null +++ b/c_src/include/linux/timer.h @@ -0,0 +1,46 @@ +#ifndef __TOOLS_LINUX_TIMER_H +#define __TOOLS_LINUX_TIMER_H + +#include <string.h> +#include <linux/types.h> + +struct timer_list { + unsigned long expires; + void (*function)(struct timer_list *timer); + bool pending; +}; + +static inline void timer_setup(struct timer_list *timer, + void (*func)(struct timer_list *), + unsigned int flags) +{ + memset(timer, 0, sizeof(*timer)); + timer->function = func; +} + +#define timer_setup_on_stack(timer, callback, flags) \ + timer_setup(timer, callback, flags) + +#define destroy_timer_on_stack(timer) do {} while (0) + +static inline int timer_pending(const struct timer_list *timer) +{ + return timer->pending; +} + +int del_timer(struct timer_list * timer); +int del_timer_sync(struct timer_list *timer); + +#define del_singleshot_timer_sync(timer) del_timer_sync(timer) + +int mod_timer(struct timer_list *timer, unsigned long expires); + +static inline void add_timer(struct timer_list *timer) +{ + BUG_ON(timer_pending(timer)); + mod_timer(timer, timer->expires); +} + +void flush_timers(void); + +#endif /* __TOOLS_LINUX_TIMER_H */ diff --git a/c_src/include/linux/tracepoint.h b/c_src/include/linux/tracepoint.h new file mode 100644 index 00000000..1686cb90 --- /dev/null +++ b/c_src/include/linux/tracepoint.h @@ -0,0 +1,62 @@ +#ifndef __TOOLS_LINUX_TRACEPOINT_H +#define __TOOLS_LINUX_TRACEPOINT_H + +#define PARAMS(args...) args + +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args +#define TP_CONDITION(args...) args + +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ + static inline void trace_##name(proto) \ + { } \ + static inline void trace_##name##_rcuidle(proto) \ + { } \ + static inline int \ + register_trace_##name(void (*probe)(data_proto), \ + void *data) \ + { \ + return -ENOSYS; \ + } \ + static inline int \ + unregister_trace_##name(void (*probe)(data_proto), \ + void *data) \ + { \ + return -ENOSYS; \ + } \ + static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ + { \ + } \ + static inline bool \ + trace_##name##_enabled(void) \ + { \ + return false; \ + } + +#define DEFINE_TRACE_FN(name, reg, unreg) +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + +#define DECLARE_TRACE_NOARGS(name) \ + __DECLARE_TRACE(name, void, , \ + cpu_online(raw_smp_processor_id()), \ + void *__data, __data) + +#define DECLARE_TRACE(name, proto, args) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) + +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT(name, proto, args, struct, assign, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + +#endif /* __TOOLS_LINUX_TRACEPOINT_H */ diff --git a/c_src/include/linux/typecheck.h b/c_src/include/linux/typecheck.h new file mode 100644 index 00000000..eb5b74a5 --- /dev/null +++ b/c_src/include/linux/typecheck.h @@ -0,0 +1,24 @@ +#ifndef TYPECHECK_H_INCLUDED +#define TYPECHECK_H_INCLUDED + +/* + * Check at compile time that something is of a particular type. + * Always evaluates to 1 so you may use it easily in comparisons. + */ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ +}) + +/* + * Check at compile time that 'function' is a certain type, or is a pointer + * to that type (needs to use typedef for the function type.) + */ +#define typecheck_fn(type,function) \ +({ typeof(type) __tmp = function; \ + (void)__tmp; \ +}) + +#endif /* TYPECHECK_H_INCLUDED */ diff --git a/c_src/include/linux/types.h b/c_src/include/linux/types.h new file mode 100644 index 00000000..ce454e26 --- /dev/null +++ b/c_src/include/linux/types.h @@ -0,0 +1,87 @@ +#ifndef _TOOLS_LINUX_TYPES_H_ +#define _TOOLS_LINUX_TYPES_H_ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#include <asm/types.h> + +#include <linux/cache.h> + +#define BITS_PER_LONG __BITS_PER_LONG + +struct page; +struct kmem_cache; + +typedef unsigned long pgoff_t; + +typedef unsigned short umode_t; + +typedef unsigned gfp_t; + +#define GFP_ATOMIC 0 +#define GFP_NOFS 0 +#define GFP_NOIO 0 +#define GFP_NOWAIT 0 +#define __GFP_FS 0 +#define __GFP_IO 0 +#define __GFP_NOWARN 0 +#define __GFP_NORETRY 0 +#define __GFP_NOFAIL 0 +#define __GFP_ZERO 1 +#define GFP_KERNEL 2 + +#define PAGE_ALLOC_COSTLY_ORDER 6 + +typedef __u64 u64; +typedef __s64 s64; +typedef __u32 u32; +typedef __s32 s32; +typedef __u16 u16; +typedef __s16 s16; +typedef __u8 u8; +typedef __s8 s8; + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + +#define __force +#define __user +#define __must_check +#define __cold + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +#ifndef __aligned_u64 +# define __aligned_u64 __u64 __attribute__((aligned(8))) +#endif + +typedef u64 sector_t; + +typedef int (*cmp_func_t)(const void *a, const void *b); + +typedef unsigned int __bitwise slab_flags_t; +typedef u64 phys_addr_t; +struct vm_struct; +struct mnt_idmap; + +#endif /* _TOOLS_LINUX_TYPES_H_ */ diff --git a/c_src/include/linux/unaligned/be_byteshift.h b/c_src/include/linux/unaligned/be_byteshift.h new file mode 100644 index 00000000..9356b242 --- /dev/null +++ b/c_src/include/linux/unaligned/be_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H +#define _LINUX_UNALIGNED_BE_BYTESHIFT_H + +#include <linux/types.h> + +static inline u16 __get_unaligned_be16(const u8 *p) +{ + return p[0] << 8 | p[1]; +} + +static inline u32 __get_unaligned_be32(const u8 *p) +{ + return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static inline u64 __get_unaligned_be64(const u8 *p) +{ + return (u64)__get_unaligned_be32(p) << 32 | + __get_unaligned_be32(p + 4); +} + +static inline void __put_unaligned_be16(u16 val, u8 *p) +{ + *p++ = val >> 8; + *p++ = val; +} + +static inline void __put_unaligned_be32(u32 val, u8 *p) +{ + __put_unaligned_be16(val >> 16, p); + __put_unaligned_be16(val, p + 2); +} + +static inline void __put_unaligned_be64(u64 val, u8 *p) +{ + __put_unaligned_be32(val >> 32, p); + __put_unaligned_be32(val, p + 4); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_be16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_be32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_be64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_be16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_be32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_be64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */ diff --git a/c_src/include/linux/unaligned/be_struct.h b/c_src/include/linux/unaligned/be_struct.h new file mode 100644 index 00000000..13241583 --- /dev/null +++ b/c_src/include/linux/unaligned/be_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_STRUCT_H +#define _LINUX_UNALIGNED_BE_STRUCT_H + +#include <linux/unaligned/packed_struct.h> + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */ diff --git a/c_src/include/linux/unaligned/generic.h b/c_src/include/linux/unaligned/generic.h new file mode 100644 index 00000000..02d97ff3 --- /dev/null +++ b/c_src/include/linux/unaligned/generic.h @@ -0,0 +1,68 @@ +#ifndef _LINUX_UNALIGNED_GENERIC_H +#define _LINUX_UNALIGNED_GENERIC_H + +/* + * Cause a link-time error if we try an unaligned access other than + * 1,2,4 or 8 bytes long + */ +extern void __bad_unaligned_access_size(void); + +#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __put_unaligned_le(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_le16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_le32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_le64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#define __put_unaligned_be(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_be16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_be32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_be64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#endif /* _LINUX_UNALIGNED_GENERIC_H */ diff --git a/c_src/include/linux/unaligned/le_byteshift.h b/c_src/include/linux/unaligned/le_byteshift.h new file mode 100644 index 00000000..be376fb7 --- /dev/null +++ b/c_src/include/linux/unaligned/le_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H +#define _LINUX_UNALIGNED_LE_BYTESHIFT_H + +#include <linux/types.h> + +static inline u16 __get_unaligned_le16(const u8 *p) +{ + return p[0] | p[1] << 8; +} + +static inline u32 __get_unaligned_le32(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +} + +static inline u64 __get_unaligned_le64(const u8 *p) +{ + return (u64)__get_unaligned_le32(p + 4) << 32 | + __get_unaligned_le32(p); +} + +static inline void __put_unaligned_le16(u16 val, u8 *p) +{ + *p++ = val; + *p++ = val >> 8; +} + +static inline void __put_unaligned_le32(u32 val, u8 *p) +{ + __put_unaligned_le16(val >> 16, p + 2); + __put_unaligned_le16(val, p); +} + +static inline void __put_unaligned_le64(u64 val, u8 *p) +{ + __put_unaligned_le32(val >> 32, p + 4); + __put_unaligned_le32(val, p); +} + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_le16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_le32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_le64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_le16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_le32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_le64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */ diff --git a/c_src/include/linux/unaligned/le_struct.h b/c_src/include/linux/unaligned/le_struct.h new file mode 100644 index 00000000..088c4572 --- /dev/null +++ b/c_src/include/linux/unaligned/le_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_STRUCT_H +#define _LINUX_UNALIGNED_LE_STRUCT_H + +#include <linux/unaligned/packed_struct.h> + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */ diff --git a/c_src/include/linux/unaligned/packed_struct.h b/c_src/include/linux/unaligned/packed_struct.h new file mode 100644 index 00000000..c0d817de --- /dev/null +++ b/c_src/include/linux/unaligned/packed_struct.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H +#define _LINUX_UNALIGNED_PACKED_STRUCT_H + +#include <linux/kernel.h> + +struct __una_u16 { u16 x; } __packed; +struct __una_u32 { u32 x; } __packed; +struct __una_u64 { u64 x; } __packed; + +static inline u16 __get_unaligned_cpu16(const void *p) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *)p; + return ptr->x; +} + +static inline u32 __get_unaligned_cpu32(const void *p) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *)p; + return ptr->x; +} + +static inline u64 __get_unaligned_cpu64(const void *p) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *)p; + return ptr->x; +} + +static inline void __put_unaligned_cpu16(u16 val, void *p) +{ + struct __una_u16 *ptr = (struct __una_u16 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu32(u32 val, void *p) +{ + struct __una_u32 *ptr = (struct __una_u32 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu64(u64 val, void *p) +{ + struct __una_u64 *ptr = (struct __una_u64 *)p; + ptr->x = val; +} + +#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */ diff --git a/c_src/include/linux/uuid.h b/c_src/include/linux/uuid.h new file mode 100644 index 00000000..a9990902 --- /dev/null +++ b/c_src/include/linux/uuid.h @@ -0,0 +1,41 @@ +/* + * UUID/GUID definition + * + * Copyright (C) 2010, 2016 Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _LINUX_UUID_H_ +#define _LINUX_UUID_H_ + +#include <string.h> +#include <asm/types.h> +#include <stdbool.h> + +#define UUID_SIZE 16 + +typedef struct { + __u8 b[UUID_SIZE]; +} __uuid_t; + +#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((__uuid_t) \ +{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +static inline bool uuid_equal(const __uuid_t *u1, const __uuid_t *u2) +{ + return memcmp(u1, u2, sizeof(__uuid_t)) == 0; +} + +#endif diff --git a/c_src/include/linux/vmalloc.h b/c_src/include/linux/vmalloc.h new file mode 100644 index 00000000..55fffb59 --- /dev/null +++ b/c_src/include/linux/vmalloc.h @@ -0,0 +1,6 @@ +#ifndef __TOOLS_LINUX_VMALLOC_H +#define __TOOLS_LINUX_VMALLOC_H + +#include "linux/slab.h" + +#endif /* __TOOLS_LINUX_VMALLOC_H */ diff --git a/c_src/include/linux/wait.h b/c_src/include/linux/wait.h new file mode 100644 index 00000000..4b9cbf38 --- /dev/null +++ b/c_src/include/linux/wait.h @@ -0,0 +1,138 @@ +#ifndef _LINUX_WAIT_H +#define _LINUX_WAIT_H + +#include <pthread.h> +#include <linux/bitmap.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +typedef struct __wait_queue wait_queue_t; +typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); + +#define WQ_FLAG_EXCLUSIVE 0x01 + +struct __wait_queue { + unsigned int flags; + void *private; + wait_queue_func_t func; + struct list_head task_list; +}; + +struct wait_queue_head { + spinlock_t lock; + struct list_head task_list; +}; + +typedef struct wait_queue_head wait_queue_head_t; + +void wake_up(wait_queue_head_t *); +void wake_up_all(wait_queue_head_t *); +void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); +void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); +int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); +int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); + +#define DECLARE_WAITQUEUE(name, tsk) \ + wait_queue_t name = { \ + .private = tsk, \ + .func = default_wake_function, \ + .task_list = { NULL, NULL } \ + } + +#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .task_list = { &(name).task_list, &(name).task_list } } + +#define DECLARE_WAIT_QUEUE_HEAD(name) \ + struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) + +static inline void init_waitqueue_head(wait_queue_head_t *q) +{ + spin_lock_init(&q->lock); + INIT_LIST_HEAD(&q->task_list); +} + +#define DEFINE_WAIT(name) \ + wait_queue_t name = { \ + .private = current, \ + .func = autoremove_wake_function, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ + } + +#define ___wait_cond_timeout(condition) \ +({ \ + bool __cond = (condition); \ + if (__cond && !__ret) \ + __ret = 1; \ + __cond || !__ret; \ +}) + +#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ +({ \ + DEFINE_WAIT(__wait); \ + long __ret = ret; \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, state); \ + if (condition) \ + break; \ + cmd; \ + } \ + finish_wait(&wq, &__wait); \ + __ret; \ +}) + +#define __wait_event(wq, condition) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + schedule()) + +#define wait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __wait_event(wq, condition); \ +} while (0) + +#define wait_event_freezable(wq, condition) ({wait_event(wq, condition); 0; }) +#define wait_event_killable(wq, condition) ({wait_event(wq, condition); 0; }) +#define wait_event_interruptible(wq, condition) ({wait_event(wq, condition); 0; }) + +#define __wait_event_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +#define wait_event_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_timeout(wq, condition, timeout); \ + __ret; \ +}) + +void wake_up_bit(void *, int); +void __wait_on_bit(void *, int, unsigned); +void __wait_on_bit_lock(void *, int, unsigned); + +static inline int +wait_on_bit(unsigned long *word, int bit, unsigned mode) +{ + if (!test_bit(bit, word)) + return 0; + __wait_on_bit(word, bit, mode); + return 0; +} + +static inline int +wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) +{ + if (!test_and_set_bit(bit, word)) + return 0; + __wait_on_bit_lock(word, bit, mode); + return 0; +} + +#define wait_on_bit_io(w, b, m) wait_on_bit(w, b, m) +#define wait_on_bit_lock_io(w, b, m) wait_on_bit_lock(w, b, m) + +#endif /* _LINUX_WAIT_H */ diff --git a/c_src/include/linux/workqueue.h b/c_src/include/linux/workqueue.h new file mode 100644 index 00000000..1406c958 --- /dev/null +++ b/c_src/include/linux/workqueue.h @@ -0,0 +1,185 @@ +#ifndef __TOOLS_LINUX_WORKQUEUE_H +#define __TOOLS_LINUX_WORKQUEUE_H + +#include <linux/list.h> +#include <linux/timer.h> + +struct task_struct; +struct workqueue_struct; +struct work_struct; +typedef void (*work_func_t)(struct work_struct *work); +void delayed_work_timer_fn(struct timer_list *); + +#define work_data_bits(work) ((unsigned long *)(&(work)->data)) + +#if 0 +enum { + //WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ + //WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ + // + //WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + //WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, +}; +#endif + +struct work_struct { + atomic_long_t data; + struct list_head entry; + work_func_t func; +}; + +#define INIT_WORK(_work, _func) \ +do { \ + (_work)->data.counter = 0; \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->func = (_func); \ +} while (0) + +struct delayed_work { + struct work_struct work; + struct timer_list timer; + struct workqueue_struct *wq; +}; + +#define INIT_DELAYED_WORK(_work, _func) \ + do { \ + INIT_WORK(&(_work)->work, (_func)); \ + timer_setup(&(_work)->timer, delayed_work_timer_fn, 0); \ + } while (0) + +static inline struct delayed_work *to_delayed_work(struct work_struct *work) +{ + return container_of(work, struct delayed_work, work); +} + +enum { + WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ + WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ + WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ + WQ_HIGHPRI = 1 << 4, /* high priority */ + WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ + WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ + + /* + * Per-cpu workqueues are generally preferred because they tend to + * show better performance thanks to cache locality. Per-cpu + * workqueues exclude the scheduler from choosing the CPU to + * execute the worker threads, which has an unfortunate side effect + * of increasing power consumption. + * + * The scheduler considers a CPU idle if it doesn't have any task + * to execute and tries to keep idle cores idle to conserve power; + * however, for example, a per-cpu work item scheduled from an + * interrupt handler on an idle CPU will force the scheduler to + * excute the work item on that CPU breaking the idleness, which in + * turn may lead to more scheduling choices which are sub-optimal + * in terms of power consumption. + * + * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default + * but become unbound if workqueue.power_efficient kernel param is + * specified. Per-cpu workqueues which are identified to + * contribute significantly to power-consumption are identified and + * marked with this flag and enabling the power_efficient mode + * leads to noticeable power saving at the cost of small + * performance disadvantage. + * + * http://thread.gmane.org/gmane.linux.kernel/1480396 + */ + WQ_POWER_EFFICIENT = 1 << 7, + + __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ + __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ + + WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ + WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ + WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, +}; + +/* unbound wq's aren't per-cpu, scale max_active according to #cpus */ +#define WQ_UNBOUND_MAX_ACTIVE WQ_MAX_ACTIVE + +extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_highpri_wq; +extern struct workqueue_struct *system_long_wq; +extern struct workqueue_struct *system_unbound_wq; +extern struct workqueue_struct *system_freezable_wq; + +extern struct workqueue_struct * +alloc_workqueue(const char *fmt, unsigned int flags, + int max_active, ...) __printf(1, 4); + +#define alloc_ordered_workqueue(fmt, flags, args...) \ + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + +#define create_workqueue(name) \ + alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) +#define create_freezable_workqueue(name) \ + alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ + WQ_MEM_RECLAIM, 1, (name)) +#define create_singlethread_workqueue(name) \ + alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) + +extern void destroy_workqueue(struct workqueue_struct *wq); + +struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); +void free_workqueue_attrs(struct workqueue_attrs *attrs); +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs); + +extern bool queue_work(struct workqueue_struct *wq, + struct work_struct *work); +extern bool queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *work, unsigned long delay); +extern bool mod_delayed_work(struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay); + +extern void flush_workqueue(struct workqueue_struct *wq); +extern void drain_workqueue(struct workqueue_struct *wq); + +extern int schedule_on_each_cpu(work_func_t func); + +extern bool flush_work(struct work_struct *work); +extern bool cancel_work_sync(struct work_struct *work); + +extern bool flush_delayed_work(struct delayed_work *dwork); +extern bool cancel_delayed_work(struct delayed_work *dwork); +extern bool cancel_delayed_work_sync(struct delayed_work *dwork); + +extern void workqueue_set_max_active(struct workqueue_struct *wq, + int max_active); +extern bool current_is_workqueue_rescuer(void); +extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); +extern unsigned int work_busy(struct work_struct *work); +extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); +extern void print_worker_info(const char *log_lvl, struct task_struct *task); +extern void show_workqueue_state(void); + +static inline bool schedule_work_on(int cpu, struct work_struct *work) +{ + return queue_work(system_wq, work); +} + +static inline bool schedule_work(struct work_struct *work) +{ + return queue_work(system_wq, work); +} + +static inline void flush_scheduled_work(void) +{ + flush_workqueue(system_wq); +} + +static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work(system_wq, dwork, delay); +} + +static inline bool schedule_delayed_work(struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work(system_wq, dwork, delay); +} + +#endif /* __TOOLS_LINUX_WORKQUEUE_H */ diff --git a/c_src/include/linux/xattr.h b/c_src/include/linux/xattr.h new file mode 100644 index 00000000..dcdff6e8 --- /dev/null +++ b/c_src/include/linux/xattr.h @@ -0,0 +1,78 @@ +/* + File: linux/xattr.h + + Extended attributes handling. + + Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> + Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> +*/ +#ifndef _LINUX_XATTR_H +#define _LINUX_XATTR_H + + +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <uapi/linux/xattr.h> + +#ifndef XATTR_CREATE +#define XATTR_CREATE 0x1 +#endif + +#ifndef XATTR_REPLACE +#define XATTR_REPLACE 0x2 +#endif + +struct inode; +struct dentry; +struct user_namespace; + +/* + * struct xattr_handler: When @name is set, match attributes with exactly that + * name. When @prefix is set instead, match attributes with that prefix and + * with a non-empty suffix. + */ +struct xattr_handler { + const char *name; + const char *prefix; + int flags; /* fs private flags */ + bool (*list)(struct dentry *dentry); + int (*get)(const struct xattr_handler *, struct dentry *dentry, + struct inode *inode, const char *name, void *buffer, + size_t size); + int (*set)(const struct xattr_handler *, + struct mnt_idmap *idmap, struct dentry *dentry, + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags); +}; + +const char *xattr_full_name(const struct xattr_handler *, const char *); + +struct xattr { + const char *name; + void *value; + size_t value_len; +}; + +ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); +ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); +ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); +int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); +int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); +int vfs_removexattr(struct dentry *, const char *); + +ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); +ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); +int generic_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); +int generic_removexattr(struct dentry *dentry, const char *name); +ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, + char **xattr_value, size_t size, gfp_t flags); + +static inline const char *xattr_prefix(const struct xattr_handler *handler) +{ + return handler->prefix ?: handler->name; +} + +#endif /* _LINUX_XATTR_H */ diff --git a/c_src/include/linux/xxhash.h b/c_src/include/linux/xxhash.h new file mode 100644 index 00000000..df425114 --- /dev/null +++ b/c_src/include/linux/xxhash.h @@ -0,0 +1,259 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2012-2016, Yann Collet. + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + * + * You can contact the author at: + * - xxHash homepage: https://cyan4973.github.io/xxHash/ + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * Notice extracted from xxHash homepage: + * + * xxHash is an extremely fast Hash algorithm, running at RAM speed limits. + * It also successfully passes all tests from the SMHasher suite. + * + * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 + * Duo @3GHz) + * + * Name Speed Q.Score Author + * xxHash 5.4 GB/s 10 + * CrapWow 3.2 GB/s 2 Andrew + * MumurHash 3a 2.7 GB/s 10 Austin Appleby + * SpookyHash 2.0 GB/s 10 Bob Jenkins + * SBox 1.4 GB/s 9 Bret Mulvey + * Lookup3 1.2 GB/s 9 Bob Jenkins + * SuperFastHash 1.2 GB/s 1 Paul Hsieh + * CityHash64 1.05 GB/s 10 Pike & Alakuijala + * FNV 0.55 GB/s 5 Fowler, Noll, Vo + * CRC32 0.43 GB/s 9 + * MD5-32 0.33 GB/s 10 Ronald L. Rivest + * SHA1-32 0.28 GB/s 10 + * + * Q.Score is a measure of quality of the hash function. + * It depends on successfully passing SMHasher test set. + * 10 is a perfect score. + * + * A 64-bits version, named xxh64 offers much better speed, + * but for 64-bits applications only. + * Name Speed on 64 bits Speed on 32 bits + * xxh64 13.8 GB/s 1.9 GB/s + * xxh32 6.8 GB/s 6.0 GB/s + */ + +#ifndef XXHASH_H +#define XXHASH_H + +#include <linux/types.h> + +/*-**************************** + * Simple Hash Functions + *****************************/ + +/** + * xxh32() - calculate the 32-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s + * + * Return: The 32-bit hash of the data. + */ +uint32_t xxh32(const void *input, size_t length, uint32_t seed); + +/** + * xxh64() - calculate the 64-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems. + * + * Return: The 64-bit hash of the data. + */ +uint64_t xxh64(const void *input, size_t length, uint64_t seed); + +/** + * xxhash() - calculate wordsize hash of the input with a given seed + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * If the hash does not need to be comparable between machines with + * different word sizes, this function will call whichever of xxh32() + * or xxh64() is faster. + * + * Return: wordsize hash of the data. + */ + +static inline unsigned long xxhash(const void *input, size_t length, + uint64_t seed) +{ +#if BITS_PER_LONG == 64 + return xxh64(input, length, seed); +#else + return xxh32(input, length, seed); +#endif +} + +/*-**************************** + * Streaming Hash Functions + *****************************/ + +/* + * These definitions are only meant to allow allocation of XXH state + * statically, on stack, or in a struct for example. + * Do not use members directly. + */ + +/** + * struct xxh32_state - private xxh32 state, do not use members directly + */ +struct xxh32_state { + uint32_t total_len_32; + uint32_t large_len; + uint32_t v1; + uint32_t v2; + uint32_t v3; + uint32_t v4; + uint32_t mem32[4]; + uint32_t memsize; +}; + +/** + * struct xxh32_state - private xxh64 state, do not use members directly + */ +struct xxh64_state { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; +}; + +/** + * xxh32_reset() - reset the xxh32 state to start a new hashing operation + * + * @state: The xxh32 state to reset. + * @seed: Initialize the hash state with this seed. + * + * Call this function on any xxh32_state to prepare for a new hashing operation. + */ +void xxh32_reset(struct xxh32_state *state, uint32_t seed); + +/** + * xxh32_update() - hash the data given and update the xxh32 state + * + * @state: The xxh32 state to update. + * @input: The data to hash. + * @length: The length of the data to hash. + * + * After calling xxh32_reset() call xxh32_update() as many times as necessary. + * + * Return: Zero on success, otherwise an error code. + */ +int xxh32_update(struct xxh32_state *state, const void *input, size_t length); + +/** + * xxh32_digest() - produce the current xxh32 hash + * + * @state: Produce the current xxh32 hash of this state. + * + * A hash value can be produced at any time. It is still possible to continue + * inserting input into the hash state after a call to xxh32_digest(), and + * generate new hashes later on, by calling xxh32_digest() again. + * + * Return: The xxh32 hash stored in the state. + */ +uint32_t xxh32_digest(const struct xxh32_state *state); + +/** + * xxh64_reset() - reset the xxh64 state to start a new hashing operation + * + * @state: The xxh64 state to reset. + * @seed: Initialize the hash state with this seed. + */ +void xxh64_reset(struct xxh64_state *state, uint64_t seed); + +/** + * xxh64_update() - hash the data given and update the xxh64 state + * @state: The xxh64 state to update. + * @input: The data to hash. + * @length: The length of the data to hash. + * + * After calling xxh64_reset() call xxh64_update() as many times as necessary. + * + * Return: Zero on success, otherwise an error code. + */ +int xxh64_update(struct xxh64_state *state, const void *input, size_t length); + +/** + * xxh64_digest() - produce the current xxh64 hash + * + * @state: Produce the current xxh64 hash of this state. + * + * A hash value can be produced at any time. It is still possible to continue + * inserting input into the hash state after a call to xxh64_digest(), and + * generate new hashes later on, by calling xxh64_digest() again. + * + * Return: The xxh64 hash stored in the state. + */ +uint64_t xxh64_digest(const struct xxh64_state *state); + +/*-************************** + * Utils + ***************************/ + +/** + * xxh32_copy_state() - copy the source state into the destination state + * + * @src: The source xxh32 state. + * @dst: The destination xxh32 state. + */ +void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src); + +/** + * xxh64_copy_state() - copy the source state into the destination state + * + * @src: The source xxh64 state. + * @dst: The destination xxh64 state. + */ +void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src); + +#endif /* XXHASH_H */ diff --git a/c_src/include/linux/zlib.h b/c_src/include/linux/zlib.h new file mode 100644 index 00000000..45cfbd87 --- /dev/null +++ b/c_src/include/linux/zlib.h @@ -0,0 +1,18 @@ +#ifndef _ZLIB_H +#define _ZLIB_H + +#include <zlib.h> + +#define zlib_inflate_workspacesize() 0 +#define zlib_deflate_workspacesize(windowBits, memLevel) 0 + +#define zlib_inflateInit2 inflateInit2 +#define zlib_inflate inflate + +#define zlib_deflateInit2 deflateInit2 +#define zlib_deflate deflate +#define zlib_deflateEnd deflateEnd + +#define DEF_MEM_LEVEL 8 + +#endif /* _ZLIB_H */ diff --git a/c_src/include/linux/zstd.h b/c_src/include/linux/zstd.h new file mode 100644 index 00000000..b0fa1eda --- /dev/null +++ b/c_src/include/linux/zstd.h @@ -0,0 +1,447 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd) and + * the GPLv2 (found in the COPYING file in the root directory of + * https://github.com/facebook/zstd). You may select, at your option, one of the + * above-listed licenses. + */ + +#ifndef LINUX_ZSTD_H +#define LINUX_ZSTD_H + +/** + * This is a kernel-style API that wraps the upstream zstd API, which cannot be + * used directly because the symbols aren't exported. It exposes the minimal + * functionality which is currently required by users of zstd in the kernel. + * Expose extra functions from lib/zstd/zstd.h as needed. + */ + +/* ====== Dependency ====== */ +#include <linux/types.h> +#include <zstd.h> +#include <linux/zstd_errors.h> + +/* ====== Helper Functions ====== */ +/** + * zstd_compress_bound() - maximum compressed size in worst case scenario + * @src_size: The size of the data to compress. + * + * Return: The maximum compressed size in the worst case scenario. + */ +size_t zstd_compress_bound(size_t src_size); + +/** + * zstd_is_error() - tells if a size_t function result is an error code + * @code: The function result to check for error. + * + * Return: Non-zero iff the code is an error. + */ +unsigned int zstd_is_error(size_t code); + +/** + * enum zstd_error_code - zstd error codes + */ +typedef ZSTD_ErrorCode zstd_error_code; + +/** + * zstd_get_error_code() - translates an error function result to an error code + * @code: The function result for which zstd_is_error(code) is true. + * + * Return: A unique error code for this error. + */ +zstd_error_code zstd_get_error_code(size_t code); + +/** + * zstd_get_error_name() - translates an error function result to a string + * @code: The function result for which zstd_is_error(code) is true. + * + * Return: An error string corresponding to the error code. + */ +const char *zstd_get_error_name(size_t code); + +/** + * zstd_min_clevel() - minimum allowed compression level + * + * Return: The minimum allowed compression level. + */ +int zstd_min_clevel(void); + +/** + * zstd_max_clevel() - maximum allowed compression level + * + * Return: The maximum allowed compression level. + */ +int zstd_max_clevel(void); + +/* ====== Parameter Selection ====== */ + +/** + * enum zstd_strategy - zstd compression search strategy + * + * From faster to stronger. See zstd_lib.h. + */ +typedef ZSTD_strategy zstd_strategy; + +/** + * struct zstd_compression_parameters - zstd compression parameters + * @windowLog: Log of the largest match distance. Larger means more + * compression, and more memory needed during decompression. + * @chainLog: Fully searched segment. Larger means more compression, + * slower, and more memory (useless for fast). + * @hashLog: Dispatch table. Larger means more compression, + * slower, and more memory. + * @searchLog: Number of searches. Larger means more compression and slower. + * @searchLength: Match length searched. Larger means faster decompression, + * sometimes less compression. + * @targetLength: Acceptable match size for optimal parser (only). Larger means + * more compression, and slower. + * @strategy: The zstd compression strategy. + * + * See zstd_lib.h. + */ +typedef ZSTD_compressionParameters zstd_compression_parameters; + +/** + * struct zstd_frame_parameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the + * frame header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the + * end of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame + * header when using dictionary compression. + * + * The default value is all fields set to 0. See zstd_lib.h. + */ +typedef ZSTD_frameParameters zstd_frame_parameters; + +/** + * struct zstd_parameters - zstd parameters + * @cParams: The compression parameters. + * @fParams: The frame parameters. + */ +typedef ZSTD_parameters zstd_parameters; + +/** + * zstd_get_params() - returns zstd_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. + * + * Return: The selected zstd_parameters. + */ +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size); + +/* ====== Single-pass Compression ====== */ + +typedef ZSTD_CCtx zstd_cctx; + +/** + * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx + * @parameters: The compression parameters to be used. + * + * If multiple compression parameters might be used, the caller must call + * zstd_cctx_workspace_bound() for each set of parameters and use the maximum + * size. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). + */ +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); + +/** + * zstd_init_cctx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to + * determine how large the workspace must be. + * + * Return: A zstd compression context or NULL on error. + */ +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); + +/** + * zstd_compress_cctx() - compress src into dst with the initialized parameters + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @parameters: The compression parameters to be used. + * + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters); + +/* ====== Single-pass Decompression ====== */ + +typedef ZSTD_DCtx zstd_dctx; + +/** + * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_dctx(). + */ +size_t zstd_dctx_workspace_bound(void); + +/** + * zstd_init_dctx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to + * determine how large the workspace must be. + * + * Return: A zstd decompression context or NULL on error. + */ +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); + +/** + * zstd_decompress_dctx() - decompress zstd compressed src into dst + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. + * + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size); + +/* ====== Streaming Buffers ====== */ + +/** + * struct zstd_in_buffer - input buffer for streaming + * @src: Start of the input buffer. + * @size: Size of the input buffer. + * @pos: Position where reading stopped. Will be updated. + * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. + */ +typedef ZSTD_inBuffer zstd_in_buffer; + +/** + * struct zstd_out_buffer - output buffer for streaming + * @dst: Start of the output buffer. + * @size: Size of the output buffer. + * @pos: Position where writing stopped. Will be updated. + * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. + */ +typedef ZSTD_outBuffer zstd_out_buffer; + +/* ====== Streaming Compression ====== */ + +typedef ZSTD_CStream zstd_cstream; + +/** + * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream + * @cparams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cstream(). + */ +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); + +/** + * zstd_init_cstream() - initialize a zstd streaming compression context + * @parameters The zstd parameters to use for compression. + * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller + * must pass the source size (zero means empty source). + * Otherwise, the caller may optionally pass the source + * size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. + * Use zstd_cstream_workspace_bound(params->cparams) to + * determine how large the workspace must be. + * + * Return: The zstd streaming compression context or NULL on error. + */ +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size); + +/** + * zstd_reset_cstream() - reset the context using parameters from creation + * @cstream: The zstd streaming compression context to reset. + * @pledged_src_size: Optionally the source size, or zero if unknown. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. If `pledged_src_size` is non-zero the frame + * content size is always written into the frame header. + * + * Return: Zero or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size); + +/** + * zstd_compress_stream() - streaming compress some of input into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data + * was read. Note that it may not consume the entire input, in which + * case `input->pos < input->size`, and it's up to the caller to + * present remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input); + +/** + * zstd_flush_stream() - flush internal buffers into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * zstd_flush_stream() must be called until it returns 0, meaning all the data + * has been flushed. Since zstd_flush_stream() causes a block to be ended, + * calling it too often will degrade the compression ratio. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). + */ +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output); + +/** + * zstd_end_stream() - flush internal buffers into output and end the frame + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * zstd_end_stream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). + */ +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output); + +/* ====== Streaming Decompression ====== */ + +typedef ZSTD_DStream zstd_dstream; + +/** + * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream + * @max_window_size: The maximum window size allowed for compressed frames. + * + * Return: A lower bound on the size of the workspace that is passed + * to zstd_init_dstream(). + */ +size_t zstd_dstream_workspace_bound(size_t max_window_size); + +/** + * zstd_init_dstream() - initialize a zstd streaming decompression context + * @max_window_size: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use zstd_dstream_workspace_bound(max_window_size) to + * determine how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size); + +/** + * zstd_reset_dstream() - reset the context using parameters from creation + * @dstream: The zstd streaming decompression context to reset. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. + * + * Return: Zero or an error, which can be checked using zstd_is_error(). + */ +size_t zstd_reset_dstream(zstd_dstream *dstream); + +/** + * zstd_decompress_stream() - streaming decompress some of input into output + * @dstream: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * zstd_decompress_stream() will not consume the last byte of the frame until + * the entire frame is flushed. + * + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the + * input for the next function call or an error, which can be checked + * using zstd_is_error(). The size hint will never load more than the + * frame. + */ +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input); + +/* ====== Frame Inspection Functions ====== */ + +/** + * zstd_find_frame_compressed_size() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded + * frame or a skippable frame. + * @src_size: The size of the source buffer. It must be at least as large as the + * size of the frame. + * + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with zstd_is_error(). + * Suitable to pass to ZSTD_decompress() or similar functions. + */ +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); + +/** + * struct zstd_frame_params - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not + * present. + * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @blockSizeMax: The maximum block size. + * @frameType: The frame type (zstd or skippable) + * @headerSize: The size of the frame header. + * @dictID: The dictionary id, or 0 if not present. + * @checksumFlag: Whether a checksum was used. + * + * See zstd_lib.h. + */ +typedef ZSTD_frameHeader zstd_frame_header; + +/** + * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame + * @params: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @src_size: The size of the source buffer. + * + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using zstd_is_error(). + */ +size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, + size_t src_size); + +#endif /* LINUX_ZSTD_H */ diff --git a/c_src/include/linux/zstd_errors.h b/c_src/include/linux/zstd_errors.h new file mode 100644 index 00000000..58b6dd45 --- /dev/null +++ b/c_src/include/linux/zstd_errors.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + + +/*===== dependency =====*/ +#include <linux/types.h> /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/c_src/include/trace/define_trace.h b/c_src/include/trace/define_trace.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/include/trace/define_trace.h diff --git a/c_src/include/trace/events/lock.h b/c_src/include/trace/events/lock.h new file mode 100644 index 00000000..9ebd081e --- /dev/null +++ b/c_src/include/trace/events/lock.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H + +#include <linux/sched.h> +#include <linux/tracepoint.h> + +/* flags for lock:contention_begin */ +#define LCB_F_SPIN (1U << 0) +#define LCB_F_READ (1U << 1) +#define LCB_F_WRITE (1U << 2) +#define LCB_F_RT (1U << 3) +#define LCB_F_PERCPU (1U << 4) +#define LCB_F_MUTEX (1U << 5) + + +#ifdef CONFIG_LOCKDEP + +#include <linux/lockdep.h> + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + __field(void *, lockdep_addr) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + __entry->lockdep_addr = lock; + ), + + TP_printk("%p %s%s%s", __entry->lockdep_addr, + (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +DECLARE_EVENT_CLASS(lock, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string( name, lock->name ) + __field( void *, lockdep_addr ) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + __entry->lockdep_addr = lock; + ), + + TP_printk("%p %s", __entry->lockdep_addr, __get_str(name)) +); + +DEFINE_EVENT(lock, lock_release, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip) +); + +#ifdef CONFIG_LOCK_STAT + +DEFINE_EVENT(lock, lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip) +); + +DEFINE_EVENT(lock, lock_acquired, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip) +); + +#endif /* CONFIG_LOCK_STAT */ +#endif /* CONFIG_LOCKDEP */ + +TRACE_EVENT(contention_begin, + + TP_PROTO(void *lock, unsigned int flags), + + TP_ARGS(lock, flags), + + TP_STRUCT__entry( + __field(void *, lock_addr) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->lock_addr = lock; + __entry->flags = flags; + ), + + TP_printk("%p (flags=%s)", __entry->lock_addr, + __print_flags(__entry->flags, "|", + { LCB_F_SPIN, "SPIN" }, + { LCB_F_READ, "READ" }, + { LCB_F_WRITE, "WRITE" }, + { LCB_F_RT, "RT" }, + { LCB_F_PERCPU, "PERCPU" }, + { LCB_F_MUTEX, "MUTEX" } + )) +); + +TRACE_EVENT(contention_end, + + TP_PROTO(void *lock, int ret), + + TP_ARGS(lock, ret), + + TP_STRUCT__entry( + __field(void *, lock_addr) + __field(int, ret) + ), + + TP_fast_assign( + __entry->lock_addr = lock; + __entry->ret = ret; + ), + + TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret) +); + +#endif /* _TRACE_LOCK_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/c_src/include/uapi/linux/xattr.h b/c_src/include/uapi/linux/xattr.h new file mode 100644 index 00000000..1590c49c --- /dev/null +++ b/c_src/include/uapi/linux/xattr.h @@ -0,0 +1,77 @@ +/* + File: linux/xattr.h + + Extended attributes handling. + + Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> + Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> +*/ + +#include <linux/libc-compat.h> + +#ifndef _UAPI_LINUX_XATTR_H +#define _UAPI_LINUX_XATTR_H + +#if __UAPI_DEF_XATTR +#define __USE_KERNEL_XATTR_DEFS + +#define XATTR_CREATE 0x1 /* set value, fail if attr already exists */ +#define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ +#endif + +/* Namespaces */ +#define XATTR_OS2_PREFIX "os2." +#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1) + +#define XATTR_MAC_OSX_PREFIX "osx." +#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1) + +#define XATTR_BTRFS_PREFIX "btrfs." +#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1) + +#define XATTR_SECURITY_PREFIX "security." +#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) + +#define XATTR_SYSTEM_PREFIX "system." +#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1) + +#define XATTR_TRUSTED_PREFIX "trusted." +#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) + +#define XATTR_USER_PREFIX "user." +#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) + +/* Security namespace */ +#define XATTR_EVM_SUFFIX "evm" +#define XATTR_NAME_EVM XATTR_SECURITY_PREFIX XATTR_EVM_SUFFIX + +#define XATTR_IMA_SUFFIX "ima" +#define XATTR_NAME_IMA XATTR_SECURITY_PREFIX XATTR_IMA_SUFFIX + +#define XATTR_SELINUX_SUFFIX "selinux" +#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX + +#define XATTR_SMACK_SUFFIX "SMACK64" +#define XATTR_SMACK_IPIN "SMACK64IPIN" +#define XATTR_SMACK_IPOUT "SMACK64IPOUT" +#define XATTR_SMACK_EXEC "SMACK64EXEC" +#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE" +#define XATTR_SMACK_MMAP "SMACK64MMAP" +#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX +#define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN +#define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT +#define XATTR_NAME_SMACKEXEC XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC +#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE +#define XATTR_NAME_SMACKMMAP XATTR_SECURITY_PREFIX XATTR_SMACK_MMAP + +#define XATTR_CAPS_SUFFIX "capability" +#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX + +#define XATTR_POSIX_ACL_ACCESS "posix_acl_access" +#define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS +#define XATTR_POSIX_ACL_DEFAULT "posix_acl_default" +#define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT + + +#endif /* _UAPI_LINUX_XATTR_H */ |