summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2017-03-19 15:56:34 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-03-19 17:31:47 -0800
commit5ec39af8eaba49aee7bafa44c661da39e2f40dc3 (patch)
tree1fb1a981602cbf22c7d2b2dba1168c715d7cecb5 /include
parentbb1941de5378a7b8122d3575dcbc7d0aeb6326f0 (diff)
Rename from bcache-tools to bcachefs-tools
Diffstat (limited to 'include')
-rw-r--r--include/linux/bcache-ioctl.h104
-rw-r--r--include/linux/bcache.h1449
-rw-r--r--include/linux/blkdev.h6
-rw-r--r--include/linux/closure.h385
-rw-r--r--include/trace/events/bcachefs.h (renamed from include/trace/events/bcache.h)672
5 files changed, 518 insertions, 2098 deletions
diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h
deleted file mode 100644
index ca769369..00000000
--- a/include/linux/bcache-ioctl.h
+++ /dev/null
@@ -1,104 +0,0 @@
-#ifndef _LINUX_BCACHE_IOCTL_H
-#define _LINUX_BCACHE_IOCTL_H
-
-#include <linux/bcache.h>
-#include <linux/uuid.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define BCH_FORCE_IF_DATA_LOST (1 << 0)
-#define BCH_FORCE_IF_METADATA_LOST (1 << 1)
-#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
-#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
-
-#define BCH_FORCE_IF_DEGRADED \
- (BCH_FORCE_IF_DATA_DEGRADED| \
- BCH_FORCE_IF_METADATA_DEGRADED)
-
-#define BCH_BY_UUID (1 << 4)
-
-/* global control dev: */
-
-#define BCH_IOCTL_ASSEMBLE _IOW(0xbc, 1, struct bch_ioctl_assemble)
-#define BCH_IOCTL_INCREMENTAL _IOW(0xbc, 2, struct bch_ioctl_incremental)
-
-struct bch_ioctl_assemble {
- __u32 flags;
- __u32 nr_devs;
- __u64 pad;
- __u64 devs[];
-};
-
-struct bch_ioctl_incremental {
- __u32 flags;
- __u64 pad;
- __u64 dev;
-};
-
-/* filesystem ioctls: */
-
-#define BCH_IOCTL_QUERY_UUID _IOR(0xbc, 1, struct bch_ioctl_query_uuid)
-#define BCH_IOCTL_START _IOW(0xbc, 2, struct bch_ioctl_start)
-#define BCH_IOCTL_STOP _IO(0xbc, 3)
-#define BCH_IOCTL_DISK_ADD _IOW(0xbc, 4, struct bch_ioctl_disk)
-#define BCH_IOCTL_DISK_REMOVE _IOW(0xbc, 5, struct bch_ioctl_disk)
-#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk)
-#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk)
-#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state)
-#define BCH_IOCTL_DISK_EVACUATE _IOW(0xbc, 9, struct bch_ioctl_disk)
-#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data)
-
-struct bch_ioctl_query_uuid {
- uuid_le uuid;
-};
-
-struct bch_ioctl_start {
- __u32 flags;
- __u32 pad;
-};
-
-struct bch_ioctl_disk {
- __u32 flags;
- __u32 pad;
- __u64 dev;
-};
-
-struct bch_ioctl_disk_set_state {
- __u32 flags;
- __u8 new_state;
- __u8 pad[3];
- __u64 dev;
-};
-
-#define BCH_REWRITE_INCREASE_REPLICAS (1 << 0)
-#define BCH_REWRITE_DECREASE_REPLICAS (1 << 1)
-
-#define BCH_REWRITE_RECOMPRESS (1 << 0)
-#define BCH_REWRITE_DECREASE_REPLICAS (1 << 1)
-
-enum bch_data_ops {
- BCH_DATA_SCRUB,
-};
-
-struct bch_data_op {
- __u8 type;
-};
-
-struct bch_ioctl_data {
- __u32 flags;
- __u32 pad;
-
- __u64 start_inode;
- __u64 start_offset;
-
- __u64 end_inode;
- __u64 end_offset;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LINUX_BCACHE_IOCTL_H */
diff --git a/include/linux/bcache.h b/include/linux/bcache.h
deleted file mode 100644
index c221747b..00000000
--- a/include/linux/bcache.h
+++ /dev/null
@@ -1,1449 +0,0 @@
-#ifndef _LINUX_BCACHE_H
-#define _LINUX_BCACHE_H
-
-/*
- * Bcache on disk data structures
- */
-
-#ifdef __cplusplus
-typedef bool _Bool;
-extern "C" {
-#endif
-
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <linux/uuid.h>
-
-#define LE32_BITMASK(name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
-static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \
- \
-static inline __u64 name(const type *k) \
-{ \
- return (__le32_to_cpu(k->field) >> offset) & \
- ~(~0ULL << (end - offset)); \
-} \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- __u64 new = __le32_to_cpu(k->field); \
- \
- new &= ~(~(~0ULL << (end - offset)) << offset); \
- new |= (v & ~(~0ULL << (end - offset))) << offset; \
- k->field = __cpu_to_le32(new); \
-}
-
-#define LE64_BITMASK(name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
-static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \
- \
-static inline __u64 name(const type *k) \
-{ \
- return (__le64_to_cpu(k->field) >> offset) & \
- ~(~0ULL << (end - offset)); \
-} \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- __u64 new = __le64_to_cpu(k->field); \
- \
- new &= ~(~(~0ULL << (end - offset)) << offset); \
- new |= (v & ~(~0ULL << (end - offset))) << offset; \
- k->field = __cpu_to_le64(new); \
-}
-
-struct bkey_format {
- __u8 key_u64s;
- __u8 nr_fields;
- /* One unused slot for now: */
- __u8 bits_per_field[6];
- __le64 field_offset[6];
-};
-
-/* Btree keys - all units are in sectors */
-
-struct bpos {
- /* Word order matches machine byte order */
-#if defined(__LITTLE_ENDIAN)
- __u32 snapshot;
- __u64 offset;
- __u64 inode;
-#elif defined(__BIG_ENDIAN)
- __u64 inode;
- __u64 offset; /* Points to end of extent - sectors */
- __u32 snapshot;
-#else
-#error edit for your odd byteorder.
-#endif
-} __attribute__((packed, aligned(4)));
-
-#define KEY_INODE_MAX ((__u64)~0ULL)
-#define KEY_OFFSET_MAX ((__u64)~0ULL)
-#define KEY_SNAPSHOT_MAX ((__u32)~0U)
-
-static inline struct bpos POS(__u64 inode, __u64 offset)
-{
- struct bpos ret;
-
- ret.inode = inode;
- ret.offset = offset;
- ret.snapshot = 0;
-
- return ret;
-}
-
-#define POS_MIN POS(0, 0)
-#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
-
-/* Empty placeholder struct, for container_of() */
-struct bch_val {
- __u64 __nothing[0];
-};
-
-struct bversion {
-#if defined(__LITTLE_ENDIAN)
- __u64 lo;
- __u32 hi;
-#elif defined(__BIG_ENDIAN)
- __u32 hi;
- __u64 lo;
-#endif
-} __attribute__((packed, aligned(4)));
-
-struct bkey {
- /* Size of combined key and value, in u64s */
- __u8 u64s;
-
- /* Format of key (0 for format local to btree node) */
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 format:7,
- needs_whiteout:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u8 needs_whiteout:1,
- format:7;
-#else
-#error edit for your odd byteorder.
-#endif
-
- /* Type of the value */
- __u8 type;
-
-#if defined(__LITTLE_ENDIAN)
- __u8 pad[1];
-
- struct bversion version;
- __u32 size; /* extent size, in sectors */
- struct bpos p;
-#elif defined(__BIG_ENDIAN)
- struct bpos p;
- __u32 size; /* extent size, in sectors */
- struct bversion version;
-
- __u8 pad[1];
-#endif
-} __attribute__((packed, aligned(8)));
-
-struct bkey_packed {
- __u64 _data[0];
-
- /* Size of combined key and value, in u64s */
- __u8 u64s;
-
- /* Format of key (0 for format local to btree node) */
-
- /*
- * XXX: next incompat on disk format change, switch format and
- * needs_whiteout - bkey_packed() will be cheaper if format is the high
- * bits of the bitfield
- */
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 format:7,
- needs_whiteout:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u8 needs_whiteout:1,
- format:7;
-#endif
-
- /* Type of the value */
- __u8 type;
- __u8 key_start[0];
-
- /*
- * We copy bkeys with struct assignment in various places, and while
- * that shouldn't be done with packed bkeys we can't disallow it in C,
- * and it's legal to cast a bkey to a bkey_packed - so padding it out
- * to the same size as struct bkey should hopefully be safest.
- */
- __u8 pad[sizeof(struct bkey) - 3];
-} __attribute__((packed, aligned(8)));
-
-#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
-#define KEY_PACKED_BITS_START 24
-
-#define KEY_SIZE_MAX ((__u32)~0U)
-
-#define KEY_FORMAT_LOCAL_BTREE 0
-#define KEY_FORMAT_CURRENT 1
-
-enum bch_bkey_fields {
- BKEY_FIELD_INODE,
- BKEY_FIELD_OFFSET,
- BKEY_FIELD_SNAPSHOT,
- BKEY_FIELD_SIZE,
- BKEY_FIELD_VERSION_HI,
- BKEY_FIELD_VERSION_LO,
- BKEY_NR_FIELDS,
-};
-
-#define bkey_format_field(name, field) \
- [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
-
-#define BKEY_FORMAT_CURRENT \
-((struct bkey_format) { \
- .key_u64s = BKEY_U64s, \
- .nr_fields = BKEY_NR_FIELDS, \
- .bits_per_field = { \
- bkey_format_field(INODE, p.inode), \
- bkey_format_field(OFFSET, p.offset), \
- bkey_format_field(SNAPSHOT, p.snapshot), \
- bkey_format_field(SIZE, size), \
- bkey_format_field(VERSION_HI, version.hi), \
- bkey_format_field(VERSION_LO, version.lo), \
- }, \
-})
-
-/* bkey with inline value */
-struct bkey_i {
- __u64 _data[0];
-
- union {
- struct {
- /* Size of combined key and value, in u64s */
- __u8 u64s;
- };
- struct {
- struct bkey k;
- struct bch_val v;
- };
- };
-};
-
-#ifndef __cplusplus
-
-#define KEY(_inode, _offset, _size) \
-((struct bkey) { \
- .u64s = BKEY_U64s, \
- .format = KEY_FORMAT_CURRENT, \
- .p = POS(_inode, _offset), \
- .size = _size, \
-})
-
-#else
-
-static inline struct bkey KEY(__u64 inode, __u64 offset, __u64 size)
-{
- struct bkey ret;
-
- memset(&ret, 0, sizeof(ret));
- ret.u64s = BKEY_U64s;
- ret.format = KEY_FORMAT_CURRENT;
- ret.p.inode = inode;
- ret.p.offset = offset;
- ret.size = size;
-
- return ret;
-}
-
-#endif
-
-static inline void bkey_init(struct bkey *k)
-{
- *k = KEY(0, 0, 0);
-}
-
-#define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64))
-
-#define __BKEY_PADDED(key, pad) \
- struct { struct bkey_i key; __u64 key ## _pad[pad]; }
-
-#define BKEY_VAL_TYPE(name, nr) \
-struct bkey_i_##name { \
- union { \
- struct bkey k; \
- struct bkey_i k_i; \
- }; \
- struct bch_##name v; \
-}
-
-/*
- * - DELETED keys are used internally to mark keys that should be ignored but
- * override keys in composition order. Their version number is ignored.
- *
- * - DISCARDED keys indicate that the data is all 0s because it has been
- * discarded. DISCARDs may have a version; if the version is nonzero the key
- * will be persistent, otherwise the key will be dropped whenever the btree
- * node is rewritten (like DELETED keys).
- *
- * - ERROR: any read of the data returns a read error, as the data was lost due
- * to a failing device. Like DISCARDED keys, they can be removed (overridden)
- * by new writes or cluster-wide GC. Node repair can also overwrite them with
- * the same or a more recent version number, but not with an older version
- * number.
-*/
-#define KEY_TYPE_DELETED 0
-#define KEY_TYPE_DISCARD 1
-#define KEY_TYPE_ERROR 2
-#define KEY_TYPE_COOKIE 3
-#define KEY_TYPE_PERSISTENT_DISCARD 4
-#define KEY_TYPE_GENERIC_NR 128
-
-struct bch_cookie {
- struct bch_val v;
- __le64 cookie;
-};
-BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE);
-
-/* Extents */
-
-/*
- * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
- * preceded by checksum/compression information (bch_extent_crc32 or
- * bch_extent_crc64).
- *
- * One major determining factor in the format of extents is how we handle and
- * represent extents that have been partially overwritten and thus trimmed:
- *
- * If an extent is not checksummed or compressed, when the extent is trimmed we
- * don't have to remember the extent we originally allocated and wrote: we can
- * merely adjust ptr->offset to point to the start of the start of the data that
- * is currently live. The size field in struct bkey records the current (live)
- * size of the extent, and is also used to mean "size of region on disk that we
- * point to" in this case.
- *
- * Thus an extent that is not checksummed or compressed will consist only of a
- * list of bch_extent_ptrs, with none of the fields in
- * bch_extent_crc32/bch_extent_crc64.
- *
- * When an extent is checksummed or compressed, it's not possible to read only
- * the data that is currently live: we have to read the entire extent that was
- * originally written, and then return only the part of the extent that is
- * currently live.
- *
- * Thus, in addition to the current size of the extent in struct bkey, we need
- * to store the size of the originally allocated space - this is the
- * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
- * when the extent is trimmed, instead of modifying the offset field of the
- * pointer, we keep a second smaller offset field - "offset into the original
- * extent of the currently live region".
- *
- * The other major determining factor is replication and data migration:
- *
- * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
- * write, we will initially write all the replicas in the same format, with the
- * same checksum type and compression format - however, when copygc runs later (or
- * tiering/cache promotion, anything that moves data), it is not in general
- * going to rewrite all the pointers at once - one of the replicas may be in a
- * bucket on one device that has very little fragmentation while another lives
- * in a bucket that has become heavily fragmented, and thus is being rewritten
- * sooner than the rest.
- *
- * Thus it will only move a subset of the pointers (or in the case of
- * tiering/cache promotion perhaps add a single pointer without dropping any
- * current pointers), and if the extent has been partially overwritten it must
- * write only the currently live portion (or copygc would not be able to reduce
- * fragmentation!) - which necessitates a different bch_extent_crc format for
- * the new pointer.
- *
- * But in the interests of space efficiency, we don't want to store one
- * bch_extent_crc for each pointer if we don't have to.
- *
- * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
- * bch_extent_ptrs appended arbitrarily one after the other. We determine the
- * type of a given entry with a scheme similar to utf8 (except we're encoding a
- * type, not a size), encoding the type in the position of the first set bit:
- *
- * bch_extent_crc32 - 0b1
- * bch_extent_ptr - 0b10
- * bch_extent_crc64 - 0b100
- *
- * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
- * bch_extent_crc64 is the least constrained).
- *
- * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
- * until the next bch_extent_crc32/64.
- *
- * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
- * is neither checksummed nor compressed.
- */
-
-/* 128 bits, sufficient for cryptographic MACs: */
-struct bch_csum {
- __le64 lo;
- __le64 hi;
-} __attribute__((packed, aligned(8)));
-
-#define BCH_CSUM_NONE 0U
-#define BCH_CSUM_CRC32C 1U
-#define BCH_CSUM_CRC64 2U
-#define BCH_CSUM_CHACHA20_POLY1305_80 3U
-#define BCH_CSUM_CHACHA20_POLY1305_128 4U
-#define BCH_CSUM_NR 5U
-
-static inline _Bool bch_csum_type_is_encryption(unsigned type)
-{
- switch (type) {
- case BCH_CSUM_CHACHA20_POLY1305_80:
- case BCH_CSUM_CHACHA20_POLY1305_128:
- return true;
- default:
- return false;
- }
-}
-
-enum bch_extent_entry_type {
- BCH_EXTENT_ENTRY_ptr = 0,
- BCH_EXTENT_ENTRY_crc32 = 1,
- BCH_EXTENT_ENTRY_crc64 = 2,
- BCH_EXTENT_ENTRY_crc128 = 3,
-};
-
-#define BCH_EXTENT_ENTRY_MAX 4
-
-/* Compressed/uncompressed size are stored biased by 1: */
-struct bch_extent_crc32 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u32 type:2,
- _compressed_size:7,
- _uncompressed_size:7,
- offset:7,
- _unused:1,
- csum_type:4,
- compression_type:4;
- __u32 csum;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u32 csum;
- __u32 compression_type:4,
- csum_type:4,
- _unused:1,
- offset:7,
- _uncompressed_size:7,
- _compressed_size:7,
- type:2;
-#endif
-} __attribute__((packed, aligned(8)));
-
-#define CRC32_SIZE_MAX (1U << 7)
-#define CRC32_NONCE_MAX 0
-
-struct bch_extent_crc64 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u64 type:3,
- _compressed_size:9,
- _uncompressed_size:9,
- offset:9,
- nonce:10,
- csum_type:4,
- compression_type:4,
- csum_hi:16;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u64 csum_hi:16,
- compression_type:4,
- csum_type:4,
- nonce:10,
- offset:9,
- _uncompressed_size:9,
- _compressed_size:9,
- type:3;
-#endif
- __u64 csum_lo;
-} __attribute__((packed, aligned(8)));
-
-#define CRC64_SIZE_MAX (1U << 9)
-#define CRC64_NONCE_MAX ((1U << 10) - 1)
-
-struct bch_extent_crc128 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u64 type:4,
- _compressed_size:13,
- _uncompressed_size:13,
- offset:13,
- nonce:13,
- csum_type:4,
- compression_type:4;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u64 compression_type:4,
- csum_type:4,
- nonce:14,
- offset:13,
- _uncompressed_size:13,
- _compressed_size:13,
- type:3;
-#endif
- struct bch_csum csum;
-} __attribute__((packed, aligned(8)));
-
-#define CRC128_SIZE_MAX (1U << 13)
-#define CRC128_NONCE_MAX ((1U << 13) - 1)
-
-/*
- * Max size of an extent that may require bouncing to read or write
- * (checksummed, compressed): 64k
- */
-#define BCH_ENCODED_EXTENT_MAX 128U
-
-/*
- * @reservation - pointer hasn't been written to, just reserved
- */
-struct bch_extent_ptr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u64 type:1,
- cached:1,
- erasure_coded:1,
- reservation:1,
- offset:44, /* 8 petabytes */
- dev:8,
- gen:8;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u64 gen:8,
- dev:8,
- offset:44,
- reservation:1,
- erasure_coded:1,
- cached:1,
- type:1;
-#endif
-} __attribute__((packed, aligned(8)));
-
-struct bch_extent_reservation {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u64 type:5,
- unused:23,
- replicas:4,
- generation:32;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u64 generation:32,
- replicas:4,
- unused:23,
- type:5;
-#endif
-};
-
-union bch_extent_entry {
-#if defined(__LITTLE_ENDIAN) || __BITS_PER_LONG == 64
- unsigned long type;
-#elif __BITS_PER_LONG == 32
- struct {
- unsigned long pad;
- unsigned long type;
- };
-#else
-#error edit for your odd byteorder.
-#endif
- struct bch_extent_crc32 crc32;
- struct bch_extent_crc64 crc64;
- struct bch_extent_crc128 crc128;
- struct bch_extent_ptr ptr;
-};
-
-enum {
- BCH_EXTENT = 128,
-
- /*
- * This is kind of a hack, we're overloading the type for a boolean that
- * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED
- * have the same value type:
- */
- BCH_EXTENT_CACHED = 129,
-
- /*
- * Persistent reservation:
- */
- BCH_RESERVATION = 130,
-};
-
-struct bch_extent {
- struct bch_val v;
-
- union bch_extent_entry start[0];
- __u64 _data[0];
-} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(extent, BCH_EXTENT);
-
-struct bch_reservation {
- struct bch_val v;
-
- __le32 generation;
- __u8 nr_replicas;
- __u8 pad[3];
-} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(reservation, BCH_RESERVATION);
-
-/* Maximum size (in u64s) a single pointer could be: */
-#define BKEY_EXTENT_PTR_U64s_MAX\
- ((sizeof(struct bch_extent_crc128) + \
- sizeof(struct bch_extent_ptr)) / sizeof(u64))
-
-/* Maximum possible size of an entire extent value: */
-/* There's a hack in the keylist code that needs to be fixed.. */
-#define BKEY_EXTENT_VAL_U64s_MAX \
- (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX)
-
-/* * Maximum possible size of an entire extent, key + value: */
-#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-
-/* Btree pointers don't carry around checksums: */
-#define BKEY_BTREE_PTR_VAL_U64s_MAX \
- ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX)
-#define BKEY_BTREE_PTR_U64s_MAX \
- (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-
-/* Inodes */
-
-#define BLOCKDEV_INODE_MAX 4096
-
-#define BCACHE_ROOT_INO 4096
-
-enum bch_inode_types {
- BCH_INODE_FS = 128,
- BCH_INODE_BLOCKDEV = 129,
-};
-
-struct bch_inode {
- struct bch_val v;
-
- __le64 i_hash_seed;
- __le32 i_flags;
- __le16 i_mode;
- __u8 fields[0];
-} __attribute__((packed));
-BKEY_VAL_TYPE(inode, BCH_INODE_FS);
-
-#define BCH_INODE_FIELDS() \
- BCH_INODE_FIELD(i_atime, 64) \
- BCH_INODE_FIELD(i_ctime, 64) \
- BCH_INODE_FIELD(i_mtime, 64) \
- BCH_INODE_FIELD(i_otime, 64) \
- BCH_INODE_FIELD(i_size, 64) \
- BCH_INODE_FIELD(i_sectors, 64) \
- BCH_INODE_FIELD(i_uid, 32) \
- BCH_INODE_FIELD(i_gid, 32) \
- BCH_INODE_FIELD(i_nlink, 32) \
- BCH_INODE_FIELD(i_generation, 32) \
- BCH_INODE_FIELD(i_dev, 32)
-
-enum {
- /*
- * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
- * flags)
- */
- __BCH_INODE_SYNC = 0,
- __BCH_INODE_IMMUTABLE = 1,
- __BCH_INODE_APPEND = 2,
- __BCH_INODE_NODUMP = 3,
- __BCH_INODE_NOATIME = 4,
-
- __BCH_INODE_I_SIZE_DIRTY= 5,
- __BCH_INODE_I_SECTORS_DIRTY= 6,
-
- /* not implemented yet: */
- __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
-
- /* bits 20+ reserved for packed fields below: */
-};
-
-#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC)
-#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE)
-#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND)
-#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP)
-#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
-
-LE32_BITMASK(INODE_STR_HASH, struct bch_inode, i_flags, 20, 24);
-LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, i_flags, 24, 32);
-
-struct bch_inode_blockdev {
- struct bch_val v;
-
- __le64 i_size;
- __le64 i_flags;
-
- /* Seconds: */
- __le64 i_ctime;
- __le64 i_mtime;
-
- uuid_le i_uuid;
- __u8 i_label[32];
-} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV);
-
-/* Thin provisioned volume, or cache for another block device? */
-LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
-
-/* Dirents */
-
-/*
- * Dirents (and xattrs) have to implement string lookups; since our b-tree
- * doesn't support arbitrary length strings for the key, we instead index by a
- * 64 bit hash (currently truncated sha1) of the string, stored in the offset
- * field of the key - using linear probing to resolve hash collisions. This also
- * provides us with the readdir cookie posix requires.
- *
- * Linear probing requires us to use whiteouts for deletions, in the event of a
- * collision:
- */
-
-enum {
- BCH_DIRENT = 128,
- BCH_DIRENT_WHITEOUT = 129,
-};
-
-struct bch_dirent {
- struct bch_val v;
-
- /* Target inode number: */
- __le64 d_inum;
-
- /*
- * Copy of mode bits 12-15 from the target inode - so userspace can get
- * the filetype without having to do a stat()
- */
- __u8 d_type;
-
- __u8 d_name[];
-} __attribute__((packed));
-BKEY_VAL_TYPE(dirent, BCH_DIRENT);
-
-/* Xattrs */
-
-enum {
- BCH_XATTR = 128,
- BCH_XATTR_WHITEOUT = 129,
-};
-
-#define BCH_XATTR_INDEX_USER 0
-#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1
-#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2
-#define BCH_XATTR_INDEX_TRUSTED 3
-#define BCH_XATTR_INDEX_SECURITY 4
-
-struct bch_xattr {
- struct bch_val v;
- __u8 x_type;
- __u8 x_name_len;
- __le16 x_val_len;
- __u8 x_name[];
-} __attribute__((packed));
-BKEY_VAL_TYPE(xattr, BCH_XATTR);
-
-/* Superblock */
-
-/* Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- * Version 5: All the incompat changes
- * Version 6: Cache device UUIDs all in superblock, another incompat bset change
- * Version 7: Encryption (expanded checksum fields), other random things
- */
-#define BCACHE_SB_VERSION_CDEV_V0 0
-#define BCACHE_SB_VERSION_BDEV 1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
-#define BCACHE_SB_VERSION_CDEV_V2 5
-#define BCACHE_SB_VERSION_CDEV_V3 6
-#define BCACHE_SB_VERSION_CDEV_V4 7
-#define BCACHE_SB_VERSION_CDEV 7
-#define BCACHE_SB_MAX_VERSION 7
-
-#define BCH_SB_SECTOR 8
-#define BCH_SB_LABEL_SIZE 32
-#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
-
-struct bch_member {
- uuid_le uuid;
- __le64 nbuckets; /* device size */
- __le16 first_bucket; /* index of first bucket used */
- __le16 bucket_size; /* sectors */
- __le32 pad;
- __le64 last_mount; /* time_t */
-
- __le64 flags[2];
-};
-
-LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags[0], 0, 4)
-LE64_BITMASK(BCH_MEMBER_TIER, struct bch_member, flags[0], 4, 8)
-LE64_BITMASK(BCH_MEMBER_HAS_METADATA, struct bch_member, flags[0], 8, 9)
-LE64_BITMASK(BCH_MEMBER_HAS_DATA, struct bch_member, flags[0], 9, 10)
-LE64_BITMASK(BCH_MEMBER_REPLACEMENT, struct bch_member, flags[0], 10, 14)
-LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15);
-
-#if 0
-LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
-LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-#endif
-
-enum bch_member_state {
- BCH_MEMBER_STATE_RW = 0,
- BCH_MEMBER_STATE_RO = 1,
- BCH_MEMBER_STATE_FAILED = 2,
- BCH_MEMBER_STATE_SPARE = 3,
- BCH_MEMBER_STATE_NR = 4,
-};
-
-#define BCH_TIER_MAX 4U
-
-enum cache_replacement {
- CACHE_REPLACEMENT_LRU = 0,
- CACHE_REPLACEMENT_FIFO = 1,
- CACHE_REPLACEMENT_RANDOM = 2,
- CACHE_REPLACEMENT_NR = 3,
-};
-
-struct bch_sb_layout {
- uuid_le magic; /* bcache superblock UUID */
- __u8 layout_type;
- __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
- __u8 nr_superblocks;
- __u8 pad[5];
- __u64 sb_offset[61];
-} __attribute__((packed));
-
-#define BCH_SB_LAYOUT_SECTOR 7
-
-struct bch_sb_field {
- __u64 _data[0];
- __le32 u64s;
- __le32 type;
-};
-
-enum bch_sb_field_type {
- BCH_SB_FIELD_journal = 0,
- BCH_SB_FIELD_members = 1,
- BCH_SB_FIELD_crypt = 2,
- BCH_SB_FIELD_NR = 3,
-};
-
-struct bch_sb_field_journal {
- struct bch_sb_field field;
- __le64 buckets[0];
-};
-
-struct bch_sb_field_members {
- struct bch_sb_field field;
- struct bch_member members[0];
-};
-
-/* Crypto: */
-
-struct nonce {
- __le32 d[4];
-};
-
-struct bch_key {
- __le64 key[4];
-};
-
-#define BCH_KEY_MAGIC \
- (((u64) 'b' << 0)|((u64) 'c' << 8)| \
- ((u64) 'h' << 16)|((u64) '*' << 24)| \
- ((u64) '*' << 32)|((u64) 'k' << 40)| \
- ((u64) 'e' << 48)|((u64) 'y' << 56))
-
-struct bch_encrypted_key {
- __le64 magic;
- struct bch_key key;
-};
-
-/*
- * If this field is present in the superblock, it stores an encryption key which
- * is used encrypt all other data/metadata. The key will normally be encrypted
- * with the key userspace provides, but if encryption has been turned off we'll
- * just store the master key unencrypted in the superblock so we can access the
- * previously encrypted data.
- */
-struct bch_sb_field_crypt {
- struct bch_sb_field field;
-
- __le64 flags;
- __le64 kdf_flags;
- struct bch_encrypted_key key;
-};
-
-LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4);
-
-enum bch_kdf_types {
- BCH_KDF_SCRYPT = 0,
- BCH_KDF_NR = 1,
-};
-
-/* stored as base 2 log of scrypt params: */
-LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
-LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
-LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
-
-struct bch_sb_field_replication {
- struct bch_sb_field field;
-};
-
-/*
- * @offset - sector where this sb was written
- * @version - on disk format version
- * @magic - identifies as a bcache superblock (BCACHE_MAGIC)
- * @seq - incremented each time superblock is written
- * @uuid - used for generating various magic numbers and identifying
- * member devices, never changes
- * @user_uuid - user visible UUID, may be changed
- * @label - filesystem label
- * @seq - identifies most recent superblock, incremented each time
- * superblock is written
- * @features - enabled incompatible features
- */
-struct bch_sb {
- struct bch_csum csum;
- __le64 version;
- uuid_le magic;
- uuid_le uuid;
- uuid_le user_uuid;
- __u8 label[BCH_SB_LABEL_SIZE];
- __le64 offset;
- __le64 seq;
-
- __le16 block_size;
- __u8 dev_idx;
- __u8 nr_devices;
- __le32 u64s;
-
- __le64 time_base_lo;
- __le32 time_base_hi;
- __le32 time_precision;
-
- __le64 flags[8];
- __le64 features[2];
- __le64 compat[2];
-
- struct bch_sb_layout layout;
-
- union {
- struct bch_sb_field start[0];
- __le64 _data[0];
- };
-} __attribute__((packed, aligned(8)));
-
-/*
- * Flags:
- * BCH_SB_INITALIZED - set on first mount
- * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect
- * behaviour of mount/recovery path:
- * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits
- * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80
- * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
- * DATA/META_CSUM_TYPE. Also indicates encryption
- * algorithm in use, if/when we get more than one
- */
-
-LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1);
-LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2);
-LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8);
-LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12);
-
-LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28);
-
-LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33);
-LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40);
-
-LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44);
-LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48);
-
-LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
-LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
-
-LE64_BITMASK(BCH_SB_META_REPLICAS_HAVE, struct bch_sb, flags[0], 56, 60);
-LE64_BITMASK(BCH_SB_DATA_REPLICAS_HAVE, struct bch_sb, flags[0], 60, 64);
-
-LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
-LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
-LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
-
-LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
-LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
-LE64_BITMASK(BCH_SB_JOURNAL_ENTRY_SIZE, struct bch_sb, flags[1], 14, 20);
-
-LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
-LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
-
-/* Features: */
-enum bch_sb_features {
- BCH_FEATURE_LZ4 = 0,
- BCH_FEATURE_GZIP = 1,
-};
-
-/* options: */
-
-#define BCH_REPLICAS_MAX 4U
-
-#if 0
-#define BCH_ERROR_ACTIONS() \
- x(BCH_ON_ERROR_CONTINUE, 0, "continue") \
- x(BCH_ON_ERROR_RO, 1, "remount-ro") \
- x(BCH_ON_ERROR_PANIC, 2, "panic") \
- x(BCH_NR_ERROR_ACTIONS, 3, NULL)
-
-enum bch_error_actions {
-#define x(_opt, _nr, _str) _opt = _nr,
- BCH_ERROR_ACTIONS()
-#undef x
-};
-#endif
-
-enum bch_error_actions {
- BCH_ON_ERROR_CONTINUE = 0,
- BCH_ON_ERROR_RO = 1,
- BCH_ON_ERROR_PANIC = 2,
- BCH_NR_ERROR_ACTIONS = 3,
-};
-
-enum bch_csum_opts {
- BCH_CSUM_OPT_NONE = 0,
- BCH_CSUM_OPT_CRC32C = 1,
- BCH_CSUM_OPT_CRC64 = 2,
- BCH_CSUM_OPT_NR = 3,
-};
-
-enum bch_str_hash_opts {
- BCH_STR_HASH_CRC32C = 0,
- BCH_STR_HASH_CRC64 = 1,
- BCH_STR_HASH_SIPHASH = 2,
- BCH_STR_HASH_NR = 3,
-};
-
-enum bch_compression_opts {
- BCH_COMPRESSION_NONE = 0,
- BCH_COMPRESSION_LZ4 = 1,
- BCH_COMPRESSION_GZIP = 2,
- BCH_COMPRESSION_NR = 3,
-};
-
-/* backing device specific stuff: */
-
-struct backingdev_sb {
- __le64 csum;
- __le64 offset; /* sector where this sb was written */
- __le64 version; /* of on disk format */
-
- uuid_le magic; /* bcache superblock UUID */
-
- uuid_le disk_uuid;
-
- /*
- * Internal cache set UUID - xored with various magic numbers and thus
- * must never change:
- */
- union {
- uuid_le set_uuid;
- __le64 set_magic;
- };
- __u8 label[BCH_SB_LABEL_SIZE];
-
- __le64 flags;
-
- /* Incremented each time superblock is written: */
- __le64 seq;
-
- /*
- * User visible UUID for identifying the cache set the user is allowed
- * to change:
- *
- * XXX hooked up?
- */
- uuid_le user_uuid;
- __le64 pad1[6];
-
- __le64 data_offset;
- __le16 block_size; /* sectors */
- __le16 pad2[3];
-
- __le32 last_mount; /* time_t */
- __le16 pad3;
- /* size of variable length portion - always 0 for backingdev superblock */
- __le16 u64s;
- __u64 _data[0];
-};
-
-LE64_BITMASK(BDEV_CACHE_MODE, struct backingdev_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH 0U
-#define CACHE_MODE_WRITEBACK 1U
-#define CACHE_MODE_WRITEAROUND 2U
-#define CACHE_MODE_NONE 3U
-
-LE64_BITMASK(BDEV_STATE, struct backingdev_sb, flags, 61, 63);
-#define BDEV_STATE_NONE 0U
-#define BDEV_STATE_CLEAN 1U
-#define BDEV_STATE_DIRTY 2U
-#define BDEV_STATE_STALE 3U
-
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
-
-static inline _Bool __SB_IS_BDEV(__u64 version)
-{
- return version == BCACHE_SB_VERSION_BDEV
- || version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
-}
-
-static inline _Bool SB_IS_BDEV(const struct bch_sb *sb)
-{
- return __SB_IS_BDEV(sb->version);
-}
-
-/*
- * Magic numbers
- *
- * The various other data structures have their own magic numbers, which are
- * xored with the first part of the cache set's UUID
- */
-
-#define BCACHE_MAGIC \
- UUID_LE(0xf67385c6, 0x1a4e, 0xca45, \
- 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
-
-#define BCACHE_STATFS_MAGIC 0xca451a4e
-
-#define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL)
-#define PSET_MAGIC __cpu_to_le64(0x6750e15f87337f91ULL)
-#define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL)
-
-static inline __le64 __bch_sb_magic(struct bch_sb *sb)
-{
- __le64 ret;
- memcpy(&ret, &sb->uuid, sizeof(ret));
- return ret;
-}
-
-static inline __u64 __jset_magic(struct bch_sb *sb)
-{
- return __le64_to_cpu(__bch_sb_magic(sb) ^ JSET_MAGIC);
-}
-
-static inline __u64 __pset_magic(struct bch_sb *sb)
-{
- return __le64_to_cpu(__bch_sb_magic(sb) ^ PSET_MAGIC);
-}
-
-static inline __u64 __bset_magic(struct bch_sb *sb)
-{
- return __le64_to_cpu(__bch_sb_magic(sb) ^ BSET_MAGIC);
-}
-
-/* Journal */
-
-#define BCACHE_JSET_VERSION_UUIDv1 1
-#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
-#define BCACHE_JSET_VERSION_JKEYS 2
-#define BCACHE_JSET_VERSION 2
-
-struct jset_entry {
- __le16 u64s;
- __u8 btree_id;
- __u8 level;
- __le32 flags; /* designates what this jset holds */
-
- union {
- struct bkey_i start[0];
- __u64 _data[0];
- };
-};
-
-#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
-
-LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8);
-enum {
- JOURNAL_ENTRY_BTREE_KEYS = 0,
- JOURNAL_ENTRY_BTREE_ROOT = 1,
- JOURNAL_ENTRY_PRIO_PTRS = 2,
-
- /*
- * Journal sequence numbers can be blacklisted: bsets record the max
- * sequence number of all the journal entries they contain updates for,
- * so that on recovery we can ignore those bsets that contain index
- * updates newer that what made it into the journal.
- *
- * This means that we can't reuse that journal_seq - we have to skip it,
- * and then record that we skipped it so that the next time we crash and
- * recover we don't think there was a missing journal entry.
- */
- JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3,
-};
-
-/*
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
-struct jset {
- struct bch_csum csum;
-
- __le64 magic;
- __le64 seq;
- __le32 version;
- __le32 flags;
-
- __le32 u64s; /* size of d[] in u64s */
-
- __u8 encrypted_start[0];
-
- __le16 read_clock;
- __le16 write_clock;
-
- /* Sequence number of oldest dirty journal entry */
- __le64 last_seq;
-
-
- union {
- struct jset_entry start[0];
- __u64 _data[0];
- };
-} __attribute__((packed));
-
-LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
-LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
-
-#define BCH_JOURNAL_BUCKETS_MIN 20
-
-/* Bucket prios/gens */
-
-struct prio_set {
- struct bch_csum csum;
-
- __le64 magic;
- __le32 nonce[3];
- __le16 version;
- __le16 flags;
-
- __u8 encrypted_start[0];
-
- __le64 next_bucket;
-
- struct bucket_disk {
- __le16 read_prio;
- __le16 write_prio;
- __u8 gen;
- } __attribute__((packed)) data[];
-} __attribute__((packed));
-
-LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4);
-
-/* Btree: */
-
-#define DEFINE_BCH_BTREE_IDS() \
- DEF_BTREE_ID(EXTENTS, 0, "extents") \
- DEF_BTREE_ID(INODES, 1, "inodes") \
- DEF_BTREE_ID(DIRENTS, 2, "dirents") \
- DEF_BTREE_ID(XATTRS, 3, "xattrs")
-
-#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
-
-enum btree_id {
- DEFINE_BCH_BTREE_IDS()
- BTREE_ID_NR
-};
-
-#undef DEF_BTREE_ID
-
-#define BTREE_MAX_DEPTH 4U
-
-/* Btree nodes */
-
-/* Version 1: Seed pointer into btree node checksum
- */
-#define BCACHE_BSET_CSUM 1
-#define BCACHE_BSET_KEY_v1 2
-#define BCACHE_BSET_JOURNAL_SEQ 3
-#define BCACHE_BSET_VERSION 3
-
-/*
- * Btree nodes
- *
- * On disk a btree node is a list/log of these; within each set the keys are
- * sorted
- */
-struct bset {
- __le64 seq;
-
- /*
- * Highest journal entry this bset contains keys for.
- * If on recovery we don't see that journal entry, this bset is ignored:
- * this allows us to preserve the order of all index updates after a
- * crash, since the journal records a total order of all index updates
- * and anything that didn't make it to the journal doesn't get used.
- */
- __le64 journal_seq;
-
- __le32 flags;
- __le16 version;
- __le16 u64s; /* count of d[] in u64s */
-
- union {
- struct bkey_packed start[0];
- __u64 _data[0];
- };
-} __attribute__((packed));
-
-LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
-
-LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5);
-LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
- struct bset, flags, 5, 6);
-
-struct btree_node {
- struct bch_csum csum;
- __le64 magic;
-
- /* this flags field is encrypted, unlike bset->flags: */
- __le64 flags;
-
- /* Closed interval: */
- struct bpos min_key;
- struct bpos max_key;
- struct bch_extent_ptr ptr;
- struct bkey_format format;
-
- union {
- struct bset keys;
- struct {
- __u8 pad[22];
- __le16 u64s;
- __u64 _data[0];
-
- };
- };
-} __attribute__((packed));
-
-LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
-LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
-
-struct btree_node_entry {
- struct bch_csum csum;
-
- union {
- struct bset keys;
- struct {
- __u8 pad[22];
- __le16 u64s;
- __u64 _data[0];
-
- };
- };
-} __attribute__((packed));
-
-/* OBSOLETE */
-
-#define BITMASK(name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
-static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \
- \
-static inline __u64 name(const type *k) \
-{ return (k->field >> offset) & ~(~0ULL << (end - offset)); } \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- k->field &= ~(~(~0ULL << (end - offset)) << offset); \
- k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
-}
-
-struct bkey_v0 {
- __u64 high;
- __u64 low;
- __u64 ptr[];
-};
-
-#define KEY0_FIELD(name, field, offset, size) \
- BITMASK(name, struct bkey_v0, field, offset, size)
-
-KEY0_FIELD(KEY0_PTRS, high, 60, 63)
-KEY0_FIELD(KEY0_CSUM, high, 56, 58)
-KEY0_FIELD(KEY0_DIRTY, high, 36, 37)
-
-KEY0_FIELD(KEY0_SIZE, high, 20, 36)
-KEY0_FIELD(KEY0_INODE, high, 0, 20)
-
-static inline unsigned long bkey_v0_u64s(const struct bkey_v0 *k)
-{
- return (sizeof(struct bkey_v0) / sizeof(__u64)) + KEY0_PTRS(k);
-}
-
-static inline struct bkey_v0 *bkey_v0_next(const struct bkey_v0 *k)
-{
- __u64 *d = (__u64 *) k;
-
- return (struct bkey_v0 *) (d + bkey_v0_u64s(k));
-}
-
-struct jset_v0 {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 keys;
-
- __u64 last_seq;
-
- __BKEY_PADDED(uuid_bucket, 4);
- __BKEY_PADDED(btree_root, 4);
- __u16 btree_level;
- __u16 pad[3];
-
- __u64 prio_bucket[64];
-
- union {
- struct bkey start[0];
- __u64 d[0];
- };
-};
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry_v0 {
- uuid_le uuid;
- __u8 label[32];
- __u32 first_reg;
- __u32 last_reg;
- __u32 invalidated;
- __u32 pad;
-};
-
-struct uuid_entry {
- union {
- struct {
- uuid_le uuid;
- __u8 label[32];
- __u32 first_reg;
- __u32 last_reg;
- __u32 invalidated;
-
- __u32 flags;
- /* Size of flash only volumes */
- __u64 sectors;
- };
-
- __u8 pad[128];
- };
-};
-
-BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* _LINUX_BCACHE_H */
-
-/* vim: set foldnestmax=2: */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 37a04a32..1c793b51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -85,6 +85,12 @@ struct block_device {
void generic_make_request(struct bio *);
int submit_bio_wait(struct bio *);
+
+static inline void submit_bio(struct bio *bio)
+{
+ generic_make_request(bio);
+}
+
int blkdev_issue_discard(struct block_device *, sector_t,
sector_t, gfp_t, unsigned long);
diff --git a/include/linux/closure.h b/include/linux/closure.h
new file mode 100644
index 00000000..33280d30
--- /dev/null
+++ b/include/linux/closure.h
@@ -0,0 +1,385 @@
+#ifndef _LINUX_CLOSURE_H
+#define _LINUX_CLOSURE_H
+
+#include <linux/llist.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+
+/*
+ * Closure is perhaps the most overused and abused term in computer science, but
+ * since I've been unable to come up with anything better you're stuck with it
+ * again.
+ *
+ * What are closures?
+ *
+ * They embed a refcount. The basic idea is they count "things that are in
+ * progress" - in flight bios, some other thread that's doing something else -
+ * anything you might want to wait on.
+ *
+ * The refcount may be manipulated with closure_get() and closure_put().
+ * closure_put() is where many of the interesting things happen, when it causes
+ * the refcount to go to 0.
+ *
+ * Closures can be used to wait on things both synchronously and asynchronously,
+ * and synchronous and asynchronous use can be mixed without restriction. To
+ * wait synchronously, use closure_sync() - you will sleep until your closure's
+ * refcount hits 1.
+ *
+ * To wait asynchronously, use
+ * continue_at(cl, next_function, workqueue);
+ *
+ * passing it, as you might expect, the function to run when nothing is pending
+ * and the workqueue to run that function out of.
+ *
+ * continue_at() also, critically, requires a 'return' immediately following the
+ * location where this macro is referenced, to return to the calling function.
+ * There's good reason for this.
+ *
+ * To use safely closures asynchronously, they must always have a refcount while
+ * they are running owned by the thread that is running them. Otherwise, suppose
+ * you submit some bios and wish to have a function run when they all complete:
+ *
+ * foo_endio(struct bio *bio)
+ * {
+ * closure_put(cl);
+ * }
+ *
+ * closure_init(cl);
+ *
+ * do_stuff();
+ * closure_get(cl);
+ * bio1->bi_endio = foo_endio;
+ * bio_submit(bio1);
+ *
+ * do_more_stuff();
+ * closure_get(cl);
+ * bio2->bi_endio = foo_endio;
+ * bio_submit(bio2);
+ *
+ * continue_at(cl, complete_some_read, system_wq);
+ *
+ * If closure's refcount started at 0, complete_some_read() could run before the
+ * second bio was submitted - which is almost always not what you want! More
+ * importantly, it wouldn't be possible to say whether the original thread or
+ * complete_some_read()'s thread owned the closure - and whatever state it was
+ * associated with!
+ *
+ * So, closure_init() initializes a closure's refcount to 1 - and when a
+ * closure_fn is run, the refcount will be reset to 1 first.
+ *
+ * Then, the rule is - if you got the refcount with closure_get(), release it
+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
+ * on a closure because you called closure_init() or you were run out of a
+ * closure - _always_ use continue_at(). Doing so consistently will help
+ * eliminate an entire class of particularly pernicious races.
+ *
+ * Lastly, you might have a wait list dedicated to a specific event, and have no
+ * need for specifying the condition - you just want to wait until someone runs
+ * closure_wake_up() on the appropriate wait list. In that case, just use
+ * closure_wait(). It will return either true or false, depending on whether the
+ * closure was already on a wait list or not - a closure can only be on one wait
+ * list at a time.
+ *
+ * Parents:
+ *
+ * closure_init() takes two arguments - it takes the closure to initialize, and
+ * a (possibly null) parent.
+ *
+ * If parent is non null, the new closure will have a refcount for its lifetime;
+ * a closure is considered to be "finished" when its refcount hits 0 and the
+ * function to run is null. Hence
+ *
+ * continue_at(cl, NULL, NULL);
+ *
+ * returns up the (spaghetti) stack of closures, precisely like normal return
+ * returns up the C stack. continue_at() with non null fn is better thought of
+ * as doing a tail call.
+ *
+ * All this implies that a closure should typically be embedded in a particular
+ * struct (which its refcount will normally control the lifetime of), and that
+ * struct can very much be thought of as a stack frame.
+ */
+
+struct closure;
+struct closure_syncer;
+typedef void (closure_fn) (struct closure *);
+
+struct closure_waitlist {
+ struct llist_head list;
+};
+
+enum closure_state {
+ /*
+ * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
+ * the thread that owns the closure, and cleared by the thread that's
+ * waking up the closure.
+ *
+ * The rest are for debugging and don't affect behaviour:
+ *
+ * CLOSURE_RUNNING: Set when a closure is running (i.e. by
+ * closure_init() and when closure_put() runs then next function), and
+ * must be cleared before remaining hits 0. Primarily to help guard
+ * against incorrect usage and accidentally transferring references.
+ * continue_at() and closure_return() clear it for you, if you're doing
+ * something unusual you can use closure_set_dead() which also helps
+ * annotate where references are being transferred.
+ */
+
+ CLOSURE_BITS_START = (1U << 27),
+ CLOSURE_DESTRUCTOR = (1U << 27),
+ CLOSURE_WAITING = (1U << 29),
+ CLOSURE_RUNNING = (1U << 31),
+};
+
+#define CLOSURE_GUARD_MASK \
+ ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+
+#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
+#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
+
+struct closure {
+ union {
+ struct {
+ struct workqueue_struct *wq;
+ struct closure_syncer *s;
+ struct llist_node list;
+ closure_fn *fn;
+ };
+ struct work_struct work;
+ };
+
+ struct closure *parent;
+
+ atomic_t remaining;
+
+#ifdef CONFIG_DEBUG_CLOSURES
+#define CLOSURE_MAGIC_DEAD 0xc054dead
+#define CLOSURE_MAGIC_ALIVE 0xc054a11e
+
+ unsigned magic;
+ struct list_head all;
+ unsigned long ip;
+ unsigned long waiting_on;
+#endif
+};
+
+void closure_sub(struct closure *cl, int v);
+void closure_put(struct closure *cl);
+void __closure_wake_up(struct closure_waitlist *list);
+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
+void __closure_sync(struct closure *cl);
+
+/**
+ * closure_sync - sleep until a closure a closure has nothing left to wait on
+ *
+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
+ * the last refcount.
+ */
+static inline void closure_sync(struct closure *cl)
+{
+ if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
+ __closure_sync(cl);
+}
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+void closure_debug_create(struct closure *cl);
+void closure_debug_destroy(struct closure *cl);
+
+#else
+
+static inline void closure_debug_create(struct closure *cl) {}
+static inline void closure_debug_destroy(struct closure *cl) {}
+
+#endif
+
+static inline void closure_set_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->ip = _THIS_IP_;
+#endif
+}
+
+static inline void closure_set_ret_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->ip = _RET_IP_;
+#endif
+}
+
+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->waiting_on = f;
+#endif
+}
+
+static inline void closure_set_stopped(struct closure *cl)
+{
+ atomic_sub(CLOSURE_RUNNING, &cl->remaining);
+}
+
+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
+ struct workqueue_struct *wq)
+{
+ closure_set_ip(cl);
+ cl->fn = fn;
+ cl->wq = wq;
+ /* between atomic_dec() in closure_put() */
+ smp_mb__before_atomic();
+}
+
+static inline void closure_queue(struct closure *cl)
+{
+ struct workqueue_struct *wq = cl->wq;
+
+ if (wq) {
+ INIT_WORK(&cl->work, cl->work.func);
+ queue_work(wq, &cl->work);
+ } else
+ cl->fn(cl);
+}
+
+/**
+ * closure_get - increment a closure's refcount
+ */
+static inline void closure_get(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ BUG_ON((atomic_inc_return(&cl->remaining) &
+ CLOSURE_REMAINING_MASK) <= 1);
+#else
+ atomic_inc(&cl->remaining);
+#endif
+}
+
+/**
+ * closure_init - Initialize a closure, setting the refcount to 1
+ * @cl: closure to initialize
+ * @parent: parent of the new closure. cl will take a refcount on it for its
+ * lifetime; may be NULL.
+ */
+static inline void closure_init(struct closure *cl, struct closure *parent)
+{
+ cl->fn = NULL;
+ cl->parent = parent;
+ if (parent)
+ closure_get(parent);
+
+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+
+ closure_debug_create(cl);
+ closure_set_ip(cl);
+}
+
+static inline void closure_init_stack(struct closure *cl)
+{
+ memset(cl, 0, sizeof(struct closure));
+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+}
+
+/**
+ * closure_wake_up - wake up all closures on a wait list.
+ */
+static inline void closure_wake_up(struct closure_waitlist *list)
+{
+ smp_mb();
+ __closure_wake_up(list);
+}
+
+#define continue_at_noreturn(_cl, _fn, _wq) \
+do { \
+ set_closure_fn(_cl, _fn, _wq); \
+ closure_sub(_cl, CLOSURE_RUNNING + 1); \
+} while (0)
+
+/**
+ * continue_at - jump to another function with barrier
+ *
+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
+ * been dropped with closure_put()), it will resume execution at @fn running out
+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
+ *
+ * NOTE: This macro expands to a return in the calling function!
+ *
+ * This is because after calling continue_at() you no longer have a ref on @cl,
+ * and whatever @cl owns may be freed out from under you - a running closure fn
+ * has a ref on its own closure which continue_at() drops.
+ */
+#define continue_at(_cl, _fn, _wq) \
+do { \
+ continue_at_noreturn(_cl, _fn, _wq); \
+ return; \
+} while (0)
+
+/**
+ * closure_return - finish execution of a closure
+ *
+ * This is used to indicate that @cl is finished: when all outstanding refs on
+ * @cl have been dropped @cl's ref on its parent closure (as passed to
+ * closure_init()) will be dropped, if one was specified - thus this can be
+ * thought of as returning to the parent closure.
+ */
+#define closure_return(_cl) continue_at((_cl), NULL, NULL)
+
+/**
+ * continue_at_nobarrier - jump to another function without barrier
+ *
+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
+ * @wq is NULL).
+ *
+ * NOTE: like continue_at(), this macro expands to a return in the caller!
+ *
+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
+ * thus it's not safe to touch anything protected by @cl after a
+ * continue_at_nobarrier().
+ */
+#define continue_at_nobarrier(_cl, _fn, _wq) \
+do { \
+ closure_set_ip(_cl); \
+ if (_wq) { \
+ INIT_WORK(&(_cl)->work, (void *) _fn); \
+ queue_work((_wq), &(_cl)->work); \
+ } else { \
+ (_fn)(_cl); \
+ } \
+ return; \
+} while (0)
+
+#define closure_return_with_destructor_noreturn(_cl, _destructor) \
+do { \
+ set_closure_fn(_cl, _destructor, NULL); \
+ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \
+} while (0)
+
+/**
+ * closure_return - finish execution of a closure, with destructor
+ *
+ * Works like closure_return(), except @destructor will be called when all
+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
+ * free the memory occupied by @cl, and it is called with the ref on the parent
+ * closure still held - so @destructor could safely return an item to a
+ * freelist protected by @cl's parent.
+ */
+#define closure_return_with_destructor(_cl, _destructor) \
+do { \
+ closure_return_with_destructor_noreturn(_cl, _destructor); \
+ return; \
+} while (0)
+
+/**
+ * closure_call - execute @fn out of a new, uninitialized closure
+ *
+ * Typically used when running out of one closure, and we want to run @fn
+ * asynchronously out of a new closure - @parent will then wait for @cl to
+ * finish.
+ */
+static inline void closure_call(struct closure *cl, closure_fn fn,
+ struct workqueue_struct *wq,
+ struct closure *parent)
+{
+ closure_init(cl, parent);
+ continue_at_nobarrier(cl, fn, wq);
+}
+
+#endif /* _LINUX_CLOSURE_H */
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcachefs.h
index b39fdde7..7dea9d63 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcachefs.h
@@ -1,52 +1,11 @@
#undef TRACE_SYSTEM
-#define TRACE_SYSTEM bcache
+#define TRACE_SYSTEM bcachefs
#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_BCACHE_H
#include <linux/tracepoint.h>
-struct bcache_device;
-struct bio;
-struct bkey;
-struct btree;
-struct bch_dev;
-struct bch_fs;
-struct keylist;
-struct moving_queue;
-
-DECLARE_EVENT_CLASS(bcache_request,
- TP_PROTO(struct bcache_device *d, struct bio *bio),
- TP_ARGS(d, bio),
-
- TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(unsigned int, orig_major )
- __field(unsigned int, orig_minor )
- __field(sector_t, sector )
- __field(sector_t, orig_sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
- ),
-
- TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
- __entry->orig_major = d->disk->major;
- __entry->orig_minor = d->disk->first_minor;
- __entry->sector = bio->bi_iter.bi_sector;
- __entry->orig_sector = bio->bi_iter.bi_sector - 16;
- __entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
- bio->bi_iter.bi_size);
- ),
-
- TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs, (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
- (unsigned long long)__entry->orig_sector)
-);
-
DECLARE_EVENT_CLASS(bpos,
TP_PROTO(struct bpos p),
TP_ARGS(p),
@@ -84,73 +43,47 @@ DECLARE_EVENT_CLASS(bkey,
__entry->offset, __entry->size)
);
-/* request.c */
-
-DEFINE_EVENT(bcache_request, bcache_request_start,
- TP_PROTO(struct bcache_device *d, struct bio *bio),
- TP_ARGS(d, bio)
-);
-
-DEFINE_EVENT(bcache_request, bcache_request_end,
- TP_PROTO(struct bcache_device *d, struct bio *bio),
- TP_ARGS(d, bio)
-);
-
-DECLARE_EVENT_CLASS(bcache_bio,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio),
+DECLARE_EVENT_CLASS(bch_dev,
+ TP_PROTO(struct bch_dev *ca),
+ TP_ARGS(ca),
TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(sector_t, sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
+ __array(char, uuid, 16 )
+ __field(unsigned, tier )
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
- __entry->sector = bio->bi_iter.bi_sector;
- __entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
- bio->bi_iter.bi_size);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
+ __entry->tier = ca->mi.tier;
),
- TP_printk("%d,%d %s %llu + %u",
- MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
- (unsigned long long)__entry->sector, __entry->nr_sector)
+ TP_printk("%pU tier %u", __entry->uuid, __entry->tier)
);
-DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
+DECLARE_EVENT_CLASS(bch_fs,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c),
-DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ ),
-DEFINE_EVENT(bcache_bio, bcache_promote,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
+ TP_fast_assign(
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+ ),
-DEFINE_EVENT(bkey, bcache_promote_collision,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
+ TP_printk("%pU", __entry->uuid)
);
-TRACE_EVENT(bcache_read,
- TP_PROTO(struct bio *bio, bool hit, bool bypass),
- TP_ARGS(bio, hit, bypass),
+DECLARE_EVENT_CLASS(bio,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
- __field(bool, cache_hit )
- __field(bool, bypass )
),
TP_fast_assign(
@@ -159,49 +92,53 @@ TRACE_EVENT(bcache_read,
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
bio->bi_iter.bi_size);
- __entry->cache_hit = hit;
- __entry->bypass = bypass;
),
- TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs, (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->cache_hit, __entry->bypass)
+ TP_printk("%d,%d %s %llu + %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ (unsigned long long)__entry->sector, __entry->nr_sector)
);
-TRACE_EVENT(bcache_write,
- TP_PROTO(struct bch_fs *c, u64 inode, struct bio *bio,
- bool writeback, bool bypass),
- TP_ARGS(c, inode, bio, writeback, bypass),
+DECLARE_EVENT_CLASS(page_alloc_fail,
+ TP_PROTO(struct bch_fs *c, u64 size),
+ TP_ARGS(c, size),
TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, inode )
- __field(sector_t, sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
- __field(bool, writeback )
- __field(bool, bypass )
+ __array(char, uuid, 16 )
+ __field(u64, size )
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->inode = inode;
- __entry->sector = bio->bi_iter.bi_sector;
- __entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
- bio->bi_iter.bi_size);
- __entry->writeback = writeback;
- __entry->bypass = bypass;
+ __entry->size = size;
),
- TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u",
- __entry->uuid, __entry->inode,
- __entry->rwbs, (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->writeback, __entry->bypass)
+ TP_printk("%pU size %llu", __entry->uuid, __entry->size)
+);
+
+/* io.c: */
+
+DEFINE_EVENT(bio, read_split,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
);
-TRACE_EVENT(bcache_write_throttle,
+DEFINE_EVENT(bio, read_bounce,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, read_retry,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, promote,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+TRACE_EVENT(write_throttle,
TP_PROTO(struct bch_fs *c, u64 inode, struct bio *bio, u64 delay),
TP_ARGS(c, inode, bio, delay),
@@ -230,172 +167,24 @@ TRACE_EVENT(bcache_write_throttle,
__entry->nr_sector, __entry->delay)
);
-DEFINE_EVENT(bcache_bio, bcache_read_retry,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
-
-DECLARE_EVENT_CLASS(page_alloc_fail,
- TP_PROTO(struct bch_fs *c, u64 size),
- TP_ARGS(c, size),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, size )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->size = size;
- ),
-
- TP_printk("%pU size %llu", __entry->uuid, __entry->size)
-);
-
/* Journal */
-DECLARE_EVENT_CLASS(cache_set,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- ),
-
- TP_printk("%pU", __entry->uuid)
-);
-
-DEFINE_EVENT(bkey, bcache_journal_replay_key,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
-);
-
-TRACE_EVENT(bcache_journal_next_bucket,
- TP_PROTO(struct bch_dev *ca, unsigned cur_idx, unsigned last_idx),
- TP_ARGS(ca, cur_idx, last_idx),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(unsigned, cur_idx )
- __field(unsigned, last_idx )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, ca->uuid.b, 16);
- __entry->cur_idx = cur_idx;
- __entry->last_idx = last_idx;
- ),
-
- TP_printk("%pU cur %u last %u", __entry->uuid,
- __entry->cur_idx, __entry->last_idx)
-);
-
-TRACE_EVENT(bcache_journal_write_oldest,
- TP_PROTO(struct bch_fs *c, u64 seq),
- TP_ARGS(c, seq),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, seq )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->seq = seq;
- ),
-
- TP_printk("%pU seq %llu", __entry->uuid, __entry->seq)
-);
-
-TRACE_EVENT(bcache_journal_write_oldest_done,
- TP_PROTO(struct bch_fs *c, u64 seq, unsigned written),
- TP_ARGS(c, seq, written),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, seq )
- __field(unsigned, written )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->seq = seq;
- __entry->written = written;
- ),
-
- TP_printk("%pU seq %llu written %u", __entry->uuid, __entry->seq,
- __entry->written)
-);
-
-DEFINE_EVENT(cache_set, bcache_journal_full,
+DEFINE_EVENT(bch_fs, journal_full,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+DEFINE_EVENT(bch_fs, journal_entry_full,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
+DEFINE_EVENT(bio, journal_write,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
-/* Device state changes */
-
-DEFINE_EVENT(cache_set, fs_read_only,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(cache_set, fs_read_only_done,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DECLARE_EVENT_CLASS(cache,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(unsigned, tier )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, ca->uuid.b, 16);
- __entry->tier = ca->mi.tier;
- ),
-
- TP_printk("%pU tier %u", __entry->uuid, __entry->tier)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_only,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_only_done,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_write,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_cache_read_write_done,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-/* Searching */
+/* bset.c: */
DEFINE_EVENT(bpos, bkey_pack_pos_fail,
TP_PROTO(struct bpos p),
@@ -431,12 +220,12 @@ DECLARE_EVENT_CLASS(btree_node,
__entry->inode, __entry->offset)
);
-DEFINE_EVENT(btree_node, bcache_btree_read,
+DEFINE_EVENT(btree_node, btree_read,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
-TRACE_EVENT(bcache_btree_write,
+TRACE_EVENT(btree_write,
TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
TP_ARGS(b, bytes, sectors),
@@ -456,34 +245,17 @@ TRACE_EVENT(bcache_btree_write,
__entry->type , __entry->bytes, __entry->sectors)
);
-DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+DEFINE_EVENT(btree_node, btree_node_alloc,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
-TRACE_EVENT(bcache_btree_node_alloc_fail,
- TP_PROTO(struct bch_fs *c, enum btree_id id),
- TP_ARGS(c, id),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(enum btree_id, id )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->id = id;
- ),
-
- TP_printk("%pU id %u", __entry->uuid, __entry->id)
-);
-
-DEFINE_EVENT(btree_node, bcache_btree_node_free,
+DEFINE_EVENT(btree_node, btree_node_free,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
-TRACE_EVENT(bcache_mca_reap,
+TRACE_EVENT(btree_node_reap,
TP_PROTO(struct bch_fs *c, struct btree *b, int ret),
TP_ARGS(c, b, ret),
@@ -500,33 +272,7 @@ TRACE_EVENT(bcache_mca_reap,
TP_printk("bucket %llu ret %d", __entry->bucket, __entry->ret)
);
-TRACE_EVENT(bcache_mca_scan,
- TP_PROTO(struct bch_fs *c, unsigned touched, unsigned freed,
- unsigned can_free, unsigned long nr),
- TP_ARGS(c, touched, freed, can_free, nr),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(unsigned long, touched )
- __field(unsigned long, freed )
- __field(unsigned long, can_free )
- __field(unsigned long, nr )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->touched = touched;
- __entry->freed = freed;
- __entry->can_free = can_free;
- __entry->nr = nr;
- ),
-
- TP_printk("%pU touched %lu freed %lu can_free %lu nr %lu",
- __entry->uuid, __entry->touched, __entry->freed,
- __entry->can_free, __entry->nr)
-);
-
-DECLARE_EVENT_CLASS(mca_cannibalize_lock,
+DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c),
@@ -541,27 +287,47 @@ DECLARE_EVENT_CLASS(mca_cannibalize_lock,
TP_printk("%pU", __entry->uuid)
);
-DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize_lock_fail,
+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize_lock,
+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize,
+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_mca_cannibalize_unlock,
+DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-TRACE_EVENT(bcache_btree_insert_key,
+TRACE_EVENT(btree_reserve_get_fail,
+ TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
+ TP_ARGS(c, required, cl),
+
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ __field(size_t, required )
+ __field(struct closure *, cl )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+ __entry->required = required;
+ __entry->cl = cl;
+ ),
+
+ TP_printk("%pU required %zu by %p", __entry->uuid,
+ __entry->required, __entry->cl)
+);
+
+TRACE_EVENT(btree_insert_key,
TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k),
TP_ARGS(c, b, k),
@@ -620,24 +386,24 @@ DECLARE_EVENT_CLASS(btree_split,
__entry->inode, __entry->offset, __entry->keys)
);
-DEFINE_EVENT(btree_split, bcache_btree_node_split,
+DEFINE_EVENT(btree_split, btree_node_split,
TP_PROTO(struct bch_fs *c, struct btree *b, unsigned keys),
TP_ARGS(c, b, keys)
);
-DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+DEFINE_EVENT(btree_split, btree_node_compact,
TP_PROTO(struct bch_fs *c, struct btree *b, unsigned keys),
TP_ARGS(c, b, keys)
);
-DEFINE_EVENT(btree_node, bcache_btree_set_root,
+DEFINE_EVENT(btree_node, btree_set_root,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
/* Garbage collection */
-TRACE_EVENT(bcache_btree_gc_coalesce,
+TRACE_EVENT(btree_gc_coalesce,
TP_PROTO(struct bch_fs *c, struct btree *b, unsigned nodes),
TP_ARGS(c, b, nodes),
@@ -664,7 +430,7 @@ TRACE_EVENT(bcache_btree_gc_coalesce,
__entry->inode, __entry->offset, __entry->nodes)
);
-TRACE_EVENT(bcache_btree_gc_coalesce_fail,
+TRACE_EVENT(btree_gc_coalesce_fail,
TP_PROTO(struct bch_fs *c, int reason),
TP_ARGS(c, reason),
@@ -681,119 +447,54 @@ TRACE_EVENT(bcache_btree_gc_coalesce_fail,
TP_printk("%pU: %u", __entry->uuid, __entry->reason)
);
-TRACE_EVENT(bcache_btree_node_alloc_replacement,
- TP_PROTO(struct bch_fs *c, struct btree *old, struct btree *b),
- TP_ARGS(c, old, b),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u64, bucket )
- __field(u64, old_bucket )
- __field(u8, level )
- __field(u8, id )
- __field(u32, inode )
- __field(u64, offset )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->old_bucket = PTR_BUCKET_NR_TRACE(c,
- &old->key, 0);
- __entry->bucket = PTR_BUCKET_NR_TRACE(c, &b->key, 0);
- __entry->level = b->level;
- __entry->id = b->btree_id;
- __entry->inode = b->key.k.p.inode;
- __entry->offset = b->key.k.p.offset;
- ),
-
- TP_printk("%pU for %llu bucket %llu(%u) id %u: %u:%llu",
- __entry->uuid, __entry->old_bucket, __entry->bucket,
- __entry->level, __entry->id,
- __entry->inode, __entry->offset)
-);
-
-DEFINE_EVENT(btree_node, bcache_btree_gc_rewrite_node,
+DEFINE_EVENT(btree_node, btree_gc_rewrite_node,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
-DEFINE_EVENT(btree_node, bcache_btree_gc_rewrite_node_fail,
+DEFINE_EVENT(btree_node, btree_gc_rewrite_node_fail,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
-DEFINE_EVENT(cache_set, bcache_gc_start,
+DEFINE_EVENT(bch_fs, gc_start,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_end,
+DEFINE_EVENT(bch_fs, gc_end,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_coalesce_start,
+DEFINE_EVENT(bch_fs, gc_coalesce_start,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_coalesce_end,
+DEFINE_EVENT(bch_fs, gc_coalesce_end,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache, bcache_sectors_saturated,
+DEFINE_EVENT(bch_dev, sectors_saturated,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
-DEFINE_EVENT(cache_set, bcache_gc_sectors_saturated,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(cache_set, bcache_gc_cannot_inc_gens,
+DEFINE_EVENT(bch_fs, gc_sectors_saturated,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_gc_periodic,
+DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-TRACE_EVENT(bcache_mark_bucket,
- TP_PROTO(struct bch_dev *ca, const struct bkey *k,
- const struct bch_extent_ptr *ptr,
- int sectors, bool dirty),
- TP_ARGS(ca, k, ptr, sectors, dirty),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(u32, inode )
- __field(u64, offset )
- __field(u32, sectors )
- __field(u64, bucket )
- __field(bool, dirty )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, ca->uuid.b, 16);
- __entry->inode = k->p.inode;
- __entry->offset = k->p.offset;
- __entry->sectors = sectors;
- __entry->bucket = PTR_BUCKET_NR(ca, ptr);
- __entry->dirty = dirty;
- ),
-
- TP_printk("%pU %u:%llu sectors %i bucket %llu dirty %i",
- __entry->uuid, __entry->inode, __entry->offset,
- __entry->sectors, __entry->bucket, __entry->dirty)
-);
-
/* Allocator */
-TRACE_EVENT(bcache_alloc_batch,
+TRACE_EVENT(alloc_batch,
TP_PROTO(struct bch_dev *ca, size_t free, size_t total),
TP_ARGS(ca, free, total),
@@ -813,37 +514,17 @@ TRACE_EVENT(bcache_alloc_batch,
__entry->uuid, __entry->free, __entry->total)
);
-TRACE_EVENT(bcache_btree_reserve_get_fail,
- TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
- TP_ARGS(c, required, cl),
-
- TP_STRUCT__entry(
- __array(char, uuid, 16 )
- __field(size_t, required )
- __field(struct closure *, cl )
- ),
-
- TP_fast_assign(
- memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
- __entry->required = required;
- __entry->cl = cl;
- ),
-
- TP_printk("%pU required %zu by %p", __entry->uuid,
- __entry->required, __entry->cl)
-);
-
-DEFINE_EVENT(cache, bcache_prio_write_start,
+DEFINE_EVENT(bch_dev, prio_write_start,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
-DEFINE_EVENT(cache, bcache_prio_write_end,
+DEFINE_EVENT(bch_dev, prio_write_end,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
-TRACE_EVENT(bcache_invalidate,
+TRACE_EVENT(invalidate,
TP_PROTO(struct bch_dev *ca, size_t bucket, unsigned sectors),
TP_ARGS(ca, bucket, sectors),
@@ -864,12 +545,12 @@ TRACE_EVENT(bcache_invalidate,
MINOR(__entry->dev), __entry->offset)
);
-DEFINE_EVENT(cache_set, bcache_rescale_prios,
+DEFINE_EVENT(bch_fs, rescale_prios,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-DECLARE_EVENT_CLASS(cache_bucket_alloc,
+DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
TP_ARGS(ca, reserve),
@@ -886,17 +567,17 @@ DECLARE_EVENT_CLASS(cache_bucket_alloc,
TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve)
);
-DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc,
+DEFINE_EVENT(bucket_alloc, bucket_alloc,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
TP_ARGS(ca, reserve)
);
-DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc_fail,
+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
TP_ARGS(ca, reserve)
);
-TRACE_EVENT(bcache_freelist_empty_fail,
+TRACE_EVENT(freelist_empty_fail,
TP_PROTO(struct bch_fs *c, enum alloc_reserve reserve,
struct closure *cl),
TP_ARGS(c, reserve, cl),
@@ -935,47 +616,16 @@ DECLARE_EVENT_CLASS(open_bucket_alloc,
__entry->uuid, __entry->cl)
);
-DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc,
+DEFINE_EVENT(open_bucket_alloc, open_bucket_alloc,
TP_PROTO(struct bch_fs *c, struct closure *cl),
TP_ARGS(c, cl)
);
-DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc_fail,
+DEFINE_EVENT(open_bucket_alloc, open_bucket_alloc_fail,
TP_PROTO(struct bch_fs *c, struct closure *cl),
TP_ARGS(c, cl)
);
-/* Keylists */
-
-TRACE_EVENT(bcache_keyscan,
- TP_PROTO(unsigned nr_found,
- unsigned start_inode, u64 start_offset,
- unsigned end_inode, u64 end_offset),
- TP_ARGS(nr_found,
- start_inode, start_offset,
- end_inode, end_offset),
-
- TP_STRUCT__entry(
- __field(__u32, nr_found )
- __field(__u32, start_inode )
- __field(__u64, start_offset )
- __field(__u32, end_inode )
- __field(__u64, end_offset )
- ),
-
- TP_fast_assign(
- __entry->nr_found = nr_found;
- __entry->start_inode = start_inode;
- __entry->start_offset = start_offset;
- __entry->end_inode = end_inode;
- __entry->end_offset = end_offset;
- ),
-
- TP_printk("found %u keys from %u:%llu to %u:%llu", __entry->nr_found,
- __entry->start_inode, __entry->start_offset,
- __entry->end_inode, __entry->end_offset)
-);
-
/* Moving IO */
DECLARE_EVENT_CLASS(moving_io,
@@ -998,44 +648,39 @@ DECLARE_EVENT_CLASS(moving_io,
__entry->inode, __entry->offset, __entry->sectors)
);
-DEFINE_EVENT(moving_io, bcache_move_read,
+DEFINE_EVENT(moving_io, move_read,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
-DEFINE_EVENT(moving_io, bcache_move_read_done,
+DEFINE_EVENT(moving_io, move_read_done,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
-DEFINE_EVENT(moving_io, bcache_move_write,
+DEFINE_EVENT(moving_io, move_write,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
-DEFINE_EVENT(moving_io, bcache_move_write_done,
- TP_PROTO(struct bkey *k),
- TP_ARGS(k)
-);
-
-DEFINE_EVENT(moving_io, bcache_copy_collision,
+DEFINE_EVENT(moving_io, copy_collision,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
/* Copy GC */
-DEFINE_EVENT(page_alloc_fail, bcache_moving_gc_alloc_fail,
+DEFINE_EVENT(page_alloc_fail, moving_gc_alloc_fail,
TP_PROTO(struct bch_fs *c, u64 size),
TP_ARGS(c, size)
);
-DEFINE_EVENT(cache, bcache_moving_gc_start,
+DEFINE_EVENT(bch_dev, moving_gc_start,
TP_PROTO(struct bch_dev *ca),
TP_ARGS(ca)
);
-TRACE_EVENT(bcache_moving_gc_end,
+TRACE_EVENT(moving_gc_end,
TP_PROTO(struct bch_dev *ca, u64 sectors_moved, u64 keys_moved,
u64 buckets_moved),
TP_ARGS(ca, sectors_moved, keys_moved, buckets_moved),
@@ -1059,44 +704,24 @@ TRACE_EVENT(bcache_moving_gc_end,
__entry->buckets_moved)
);
-DEFINE_EVENT(cache, bcache_moving_gc_reserve_empty,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(cache, bcache_moving_gc_no_work,
- TP_PROTO(struct bch_dev *ca),
- TP_ARGS(ca)
-);
-
-DEFINE_EVENT(bkey, bcache_gc_copy,
+DEFINE_EVENT(bkey, gc_copy,
TP_PROTO(const struct bkey *k),
TP_ARGS(k)
);
/* Tiering */
-DEFINE_EVENT(cache_set, bcache_tiering_refill_start,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(cache_set, bcache_tiering_refill_end,
- TP_PROTO(struct bch_fs *c),
- TP_ARGS(c)
-);
-
-DEFINE_EVENT(page_alloc_fail, bcache_tiering_alloc_fail,
+DEFINE_EVENT(page_alloc_fail, tiering_alloc_fail,
TP_PROTO(struct bch_fs *c, u64 size),
TP_ARGS(c, size)
);
-DEFINE_EVENT(cache_set, bcache_tiering_start,
+DEFINE_EVENT(bch_fs, tiering_start,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
-TRACE_EVENT(bcache_tiering_end,
+TRACE_EVENT(tiering_end,
TP_PROTO(struct bch_fs *c, u64 sectors_moved,
u64 keys_moved),
TP_ARGS(c, sectors_moved, keys_moved),
@@ -1117,54 +742,11 @@ TRACE_EVENT(bcache_tiering_end,
__entry->uuid, __entry->sectors_moved, __entry->keys_moved)
);
-DEFINE_EVENT(bkey, bcache_tiering_copy,
+DEFINE_EVENT(bkey, tiering_copy,
TP_PROTO(const struct bkey *k),
TP_ARGS(k)
);
-/* Background writeback */
-
-DEFINE_EVENT(bkey, bcache_writeback,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
-);
-
-DEFINE_EVENT(bkey, bcache_writeback_collision,
- TP_PROTO(const struct bkey *k),
- TP_ARGS(k)
-);
-
-TRACE_EVENT(bcache_writeback_error,
- TP_PROTO(struct bkey *k, bool write, int error),
- TP_ARGS(k, write, error),
-
- TP_STRUCT__entry(
- __field(u32, size )
- __field(u32, inode )
- __field(u64, offset )
- __field(bool, write )
- __field(int, error )
- ),
-
- TP_fast_assign(
- __entry->inode = k->p.inode;
- __entry->offset = k->p.offset;
- __entry->size = k->size;
- __entry->write = write;
- __entry->error = error;
- ),
-
- TP_printk("%u:%llu len %u %s error %d", __entry->inode,
- __entry->offset, __entry->size,
- __entry->write ? "write" : "read",
- __entry->error)
-);
-
-DEFINE_EVENT(page_alloc_fail, bcache_writeback_alloc_fail,
- TP_PROTO(struct bch_fs *c, u64 size),
- TP_ARGS(c, size)
-);
-
#endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */