summaryrefslogtreecommitdiff
path: root/bcache-ondisk.h
diff options
context:
space:
mode:
Diffstat (limited to 'bcache-ondisk.h')
-rw-r--r--bcache-ondisk.h358
1 files changed, 284 insertions, 74 deletions
diff --git a/bcache-ondisk.h b/bcache-ondisk.h
index 8981bbd3..4ceb10bb 100644
--- a/bcache-ondisk.h
+++ b/bcache-ondisk.h
@@ -77,7 +77,7 @@ struct bpos {
#else
#error edit for your odd byteorder.
#endif
-} __attribute__((packed)) __attribute__((aligned(4)));
+} __attribute__((packed, aligned(4)));
#define KEY_INODE_MAX ((__u64)~0ULL)
#define KEY_OFFSET_MAX ((__u64)~0ULL)
@@ -102,6 +102,16 @@ struct bch_val {
__u64 __nothing[0];
};
+struct bversion {
+#if defined(__LITTLE_ENDIAN)
+ __u64 low;
+ __u32 high;
+#elif defined(__BIG_ENDIAN)
+ __u32 high;
+ __u64 low;
+#endif
+} __attribute__((packed, aligned(4)));
+
struct bkey {
__u64 _data[0];
@@ -117,17 +127,17 @@ struct bkey {
#if defined(__LITTLE_ENDIAN)
__u8 pad[1];
- __u32 version;
+ struct bversion version;
__u32 size; /* extent size, in sectors */
struct bpos p;
#elif defined(__BIG_ENDIAN)
struct bpos p;
__u32 size; /* extent size, in sectors */
- __u32 version;
+ struct bversion version;
__u8 pad[1];
#endif
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
struct bkey_packed {
__u64 _data[0];
@@ -149,7 +159,7 @@ struct bkey_packed {
* to the same size as struct bkey should hopefully be safest.
*/
__u8 pad[sizeof(struct bkey) - 3];
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
#define KEY_PACKED_BITS_START 24
@@ -164,7 +174,8 @@ enum bch_bkey_fields {
BKEY_FIELD_OFFSET,
BKEY_FIELD_SNAPSHOT,
BKEY_FIELD_SIZE,
- BKEY_FIELD_VERSION,
+ BKEY_FIELD_VERSION_HIGH,
+ BKEY_FIELD_VERSION_LOW,
BKEY_NR_FIELDS,
};
@@ -180,7 +191,8 @@ enum bch_bkey_fields {
bkey_format_field(OFFSET, p.offset), \
bkey_format_field(SNAPSHOT, p.snapshot), \
bkey_format_field(SIZE, size), \
- bkey_format_field(VERSION, version), \
+ bkey_format_field(VERSION_HIGH, version.high), \
+ bkey_format_field(VERSION_LOW, version.low), \
}, \
})
@@ -358,39 +370,46 @@ struct bch_extent_crc32 {
uncompressed_size:8,
csum_type:4,
compression_type:4;
+ __u32 csum;
#elif defined (__BIG_ENDIAN_BITFIELD)
- __u32 csum_type:4,
- compression_type:4,
+ __u32 csum;
+ __u32 compression_type:4,
+ csum_type:4,
uncompressed_size:8,
compressed_size:8,
offset:7,
type:1;
#endif
- __u32 csum;
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
#define CRC32_EXTENT_SIZE_MAX (1U << 7)
+/* 64k */
+#define BCH_COMPRESSED_EXTENT_MAX 128
+
struct bch_extent_crc64 {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:3,
- compressed_size:18,
- uncompressed_size:18,
- offset:17,
+ compressed_size:10,
+ uncompressed_size:10,
+ offset:10,
+ nonce:23,
csum_type:4,
compression_type:4;
#elif defined (__BIG_ENDIAN_BITFIELD)
- __u64 csum_type:4,
- compression_type:4,
- offset:17,
- uncompressed_size:18,
- compressed_size:18,
+ __u64 compression_type:4,
+ csum_type:4,
+ nonce:23,
+ offset:10,
+ uncompressed_size:10,
+ compressed_size:10,
type:3;
#endif
__u64 csum;
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
-#define CRC64_EXTENT_SIZE_MAX (1U << 17)
+#define CRC64_EXTENT_SIZE_MAX (1U << 10) /* inclusive */
+#define CRC64_NONCE_MAX (1U << 23) /* exclusive */
/*
* @reservation - pointer hasn't been written to, just reserved
@@ -411,10 +430,17 @@ struct bch_extent_ptr {
erasure_coded:1,
type:2;
#endif
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
union bch_extent_entry {
- __u8 type;
+#if defined(__LITTLE_ENDIAN__) || BITS_PER_LONG == 64
+ unsigned long type;
+#elif BITS_PER_LONG == 32
+ struct {
+ unsigned long pad;
+ unsigned long type;
+ };
+#endif
struct bch_extent_crc32 crc32;
struct bch_extent_crc64 crc64;
struct bch_extent_ptr ptr;
@@ -441,9 +467,29 @@ struct bch_extent {
union bch_extent_entry start[0];
__u64 _data[0];
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(extent, BCH_EXTENT);
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+ ((sizeof(struct bch_extent_crc64) + \
+ sizeof(struct bch_extent_ptr)) / sizeof(u64))
+
+/* Maximum possible size of an entire extent value: */
+#if 0
+/* There's a hack in the keylist code that needs to be fixed.. */
+#define BKEY_EXTENT_VAL_U64s_MAX \
+ (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX)
+#else
+#define BKEY_EXTENT_VAL_U64s_MAX 8
+#endif
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+#define BKEY_BTREE_PTR_VAL_U64s_MAX BCH_REPLICAS_MAX
+#define BKEY_BTREE_PTR_U64s_MAX (BKEY_U64s + BCH_REPLICAS_MAX)
+
/* Inodes */
#define BLOCKDEV_INODE_MAX 4096
@@ -453,18 +499,8 @@ BKEY_VAL_TYPE(extent, BCH_EXTENT);
enum bch_inode_types {
BCH_INODE_FS = 128,
BCH_INODE_BLOCKDEV = 129,
- BCH_INODE_CACHED_DEV = 130,
};
-enum {
- BCH_FS_PRIVATE_START = 16,
- __BCH_INODE_I_SIZE_DIRTY = 16,
-};
-
-#define BCH_FL_USER_FLAGS ((1U << BCH_FS_PRIVATE_START) - 1)
-
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-
struct bch_inode {
struct bch_val v;
@@ -478,24 +514,64 @@ struct bch_inode {
__le64 i_mtime;
__le64 i_size;
+ __le64 i_sectors;
__le32 i_uid;
__le32 i_gid;
__le32 i_nlink;
__le32 i_dev;
+
+ __le64 i_hash_seed;
} __attribute__((packed));
BKEY_VAL_TYPE(inode, BCH_INODE_FS);
+enum {
+ /*
+ * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
+ * flags)
+ */
+ __BCH_INODE_SYNC = 0,
+ __BCH_INODE_IMMUTABLE = 1,
+ __BCH_INODE_APPEND = 2,
+ __BCH_INODE_NODUMP = 3,
+ __BCH_INODE_NOATIME = 4,
+
+ __BCH_INODE_I_SIZE_DIRTY= 5,
+ __BCH_INODE_I_SECTORS_DIRTY= 6,
+
+ /* not implemented yet: */
+ __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
+};
+
+LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32);
+
+#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC)
+#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE)
+#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND)
+#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP)
+#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
+#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
+#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
+
struct bch_inode_blockdev {
struct bch_val v;
- struct bch_inode i_inode;
+
+ __le64 i_size;
+ __le64 i_flags;
+
+ /* Seconds: */
+ __le64 i_ctime;
+ __le64 i_mtime;
uuid_le i_uuid;
__u8 i_label[32];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV);
+/* Thin provisioned volume, or cache for another block device? */
+LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
/* Dirents */
/*
@@ -644,7 +720,9 @@ struct cache_sb {
* to change:
*/
uuid_le user_uuid;
- __le64 pad1[6];
+
+ __le64 flags2;
+ __le64 encryption_key[5];
/* Number of cache_member entries: */
__u8 nr_in_set;
@@ -671,9 +749,11 @@ struct cache_sb {
};
};
-LE64_BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
+/* XXX: rename CACHE_SET -> BCH_FS or something? */
+
+LE64_BITMASK(CACHE_SET_SYNC, struct cache_sb, flags, 0, 1);
-LE64_BITMASK(CACHE_ERROR_ACTION, struct cache_sb, flags, 1, 4);
+LE64_BITMASK(CACHE_SET_ERROR_ACTION, struct cache_sb, flags, 1, 4);
#define BCH_ON_ERROR_CONTINUE 0U
#define BCH_ON_ERROR_RO 1U
#define BCH_ON_ERROR_PANIC 2U
@@ -686,35 +766,144 @@ LE64_BITMASK(CACHE_SET_DATA_REPLICAS_WANT,struct cache_sb, flags, 8, 12);
LE64_BITMASK(CACHE_SB_CSUM_TYPE, struct cache_sb, flags, 12, 16);
-LE64_BITMASK(CACHE_META_PREFERRED_CSUM_TYPE,struct cache_sb, flags, 16, 20);
+LE64_BITMASK(CACHE_SET_META_CSUM_TYPE,struct cache_sb, flags, 16, 20);
#define BCH_CSUM_NONE 0U
#define BCH_CSUM_CRC32C 1U
#define BCH_CSUM_CRC64 2U
-#define BCH_CSUM_NR 3U
+#define BCH_CSUM_CHACHA20_POLY1305 3U
+#define BCH_CSUM_NR 4U
+
+static inline _Bool bch_csum_type_is_encryption(unsigned type)
+{
+ switch (type) {
+ case BCH_CSUM_CHACHA20_POLY1305:
+ return 1;
+ default:
+ return 0;
+ }
+}
-LE64_BITMASK(CACHE_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36);
+LE64_BITMASK(CACHE_SET_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36);
LE64_BITMASK(CACHE_SET_META_REPLICAS_HAVE,struct cache_sb, flags, 36, 40);
LE64_BITMASK(CACHE_SET_DATA_REPLICAS_HAVE,struct cache_sb, flags, 40, 44);
-LE64_BITMASK(CACHE_SET_DIRENT_CSUM_TYPE,struct cache_sb, flags, 44, 48);
-enum {
- BCH_DIRENT_CSUM_CRC32C = 0,
- BCH_DIRENT_CSUM_CRC64 = 1,
- BCH_DIRENT_CSUM_SIPHASH = 2,
- BCH_DIRENT_CSUM_SHA1 = 3,
+LE64_BITMASK(CACHE_SET_STR_HASH_TYPE,struct cache_sb, flags, 44, 48);
+enum bch_str_hash_type {
+ BCH_STR_HASH_CRC32C = 0,
+ BCH_STR_HASH_CRC64 = 1,
+ BCH_STR_HASH_SIPHASH = 2,
+ BCH_STR_HASH_SHA1 = 3,
};
-LE64_BITMASK(CACHE_DATA_PREFERRED_CSUM_TYPE, struct cache_sb, flags, 48, 52);
+#define BCH_STR_HASH_NR 4
+
+LE64_BITMASK(CACHE_SET_DATA_CSUM_TYPE, struct cache_sb, flags, 48, 52);
-LE64_BITMASK(CACHE_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56);
+LE64_BITMASK(CACHE_SET_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56);
enum {
BCH_COMPRESSION_NONE = 0,
- BCH_COMPRESSION_LZO1X = 1,
+ BCH_COMPRESSION_LZ4 = 1,
BCH_COMPRESSION_GZIP = 2,
- BCH_COMPRESSION_XZ = 3,
};
+#define BCH_COMPRESSION_NR 3U
+
+/* Limit inode numbers to 32 bits: */
+LE64_BITMASK(CACHE_INODE_32BIT, struct cache_sb, flags, 56, 57);
+
+LE64_BITMASK(CACHE_SET_GC_RESERVE, struct cache_sb, flags, 57, 63);
+
+LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6);
+
+/*
+ * If nonzero, encryption is enabled; overrides DATA/META_CSUM_TYPE. Also
+ * indicates encryption algorithm in use, if/when we get more than one:
+ *
+ */
+LE64_BITMASK(CACHE_SET_ENCRYPTION_TYPE, struct cache_sb, flags2, 6, 10);
+
+/*
+ * If nonzero, we have an encryption key in the superblock, which is the key
+ * used to encrypt all other data/metadata. The key will normally be encrypted
+ * with the key userspace provides, but if encryption has been turned off we'll
+ * just store the master key unencrypted in the superblock so we can access the
+ * previously encrypted data.
+ */
+LE64_BITMASK(CACHE_SET_ENCRYPTION_KEY, struct cache_sb, flags2, 10, 11);
+
+/* options: */
+
+/**
+ * CACHE_SET_OPT(name, choices, min, max, sb_option, sysfs_writeable)
+ *
+ * @name - name of mount option, sysfs attribute, and struct cache_set_opts
+ * member
+ *
+ * @choices - array of strings that the user can select from - option is by
+ * array index
+ *
+ * Booleans are special cased; if @choices is bch_bool_opt the mount
+ * options name and noname will work as expected.
+ *
+ * @min, @max
+ *
+ * @sb_option - name of corresponding superblock option
+ *
+ * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs
+ */
+
+#define CACHE_SET_SB_OPTS() \
+ CACHE_SET_OPT(errors, \
+ bch_error_actions, \
+ 0, BCH_NR_ERROR_ACTIONS, \
+ CACHE_SET_ERROR_ACTION, \
+ true) \
+ CACHE_SET_OPT(metadata_replicas, \
+ bch_uint_opt, \
+ 0, BCH_REPLICAS_MAX, \
+ CACHE_SET_META_REPLICAS_WANT, \
+ false) \
+ CACHE_SET_OPT(data_replicas, \
+ bch_uint_opt, \
+ 0, BCH_REPLICAS_MAX, \
+ CACHE_SET_DATA_REPLICAS_WANT, \
+ false) \
+ CACHE_SET_OPT(metadata_checksum, \
+ bch_csum_types, \
+ 0, BCH_CSUM_NR, \
+ CACHE_SET_META_CSUM_TYPE, \
+ true) \
+ CACHE_SET_OPT(data_checksum, \
+ bch_csum_types, \
+ 0, BCH_CSUM_NR, \
+ CACHE_SET_DATA_CSUM_TYPE, \
+ true) \
+ CACHE_SET_OPT(compression, \
+ bch_compression_types, \
+ 0, BCH_COMPRESSION_NR, \
+ CACHE_SET_COMPRESSION_TYPE, \
+ true) \
+ CACHE_SET_OPT(str_hash, \
+ bch_str_hash_types, \
+ 0, BCH_STR_HASH_NR, \
+ CACHE_SET_STR_HASH_TYPE, \
+ true) \
+ CACHE_SET_OPT(inodes_32bit, \
+ bch_bool_opt, 0, 2, \
+ CACHE_INODE_32BIT, \
+ true) \
+ CACHE_SET_OPT(gc_reserve_percent, \
+ bch_uint_opt, \
+ 5, 21, \
+ CACHE_SET_GC_RESERVE, \
+ false) \
+ CACHE_SET_OPT(root_reserve_percent, \
+ bch_uint_opt, \
+ 0, 21, \
+ CACHE_SET_ROOT_RESERVE, \
+ false)
+
/* backing device specific stuff: */
struct backingdev_sb {
@@ -828,18 +1017,13 @@ static inline __u64 bset_magic(struct cache_sb *sb)
return __le64_to_cpu(sb->set_magic) ^ BSET_MAGIC;
}
-/*
- * Journal
- *
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
+/* 128 bits, sufficient for cryptographic MACs: */
+struct bch_csum {
+ __le64 lo;
+ __le64 hi;
+};
+
+/* Journal */
#define BCACHE_JSET_VERSION_UUIDv1 1
#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
@@ -860,12 +1044,11 @@ struct jset_entry {
#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
-
-LE32_BITMASK(JKEYS_TYPE, struct jset_entry, flags, 0, 8);
+LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8);
enum {
- JKEYS_BTREE_KEYS = 0,
- JKEYS_BTREE_ROOT = 1,
- JKEYS_PRIO_PTRS = 2,
+ JOURNAL_ENTRY_BTREE_KEYS = 0,
+ JOURNAL_ENTRY_BTREE_ROOT = 1,
+ JOURNAL_ENTRY_PRIO_PTRS = 2,
/*
* Journal sequence numbers can be blacklisted: bsets record the max
@@ -877,11 +1060,22 @@ enum {
* and then record that we skipped it so that the next time we crash and
* recover we don't think there was a missing journal entry.
*/
- JKEYS_JOURNAL_SEQ_BLACKLISTED = 3,
+ JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3,
};
+/*
+ * On disk format for a journal entry:
+ * seq is monotonically increasing; every journal entry has its own unique
+ * sequence number.
+ *
+ * last_seq is the oldest journal entry that still has keys the btree hasn't
+ * flushed to disk yet.
+ *
+ * version is for on disk format changes.
+ */
struct jset {
- __le64 csum;
+ struct bch_csum csum;
+
__le64 magic;
__le32 version;
__le32 flags;
@@ -901,11 +1095,15 @@ struct jset {
};
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
+LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
+
+#define BCH_JOURNAL_BUCKETS_MIN 20
/* Bucket prios/gens */
struct prio_set {
- __le64 csum;
+ struct bch_csum csum;
+
__le64 magic;
__le32 version;
__le32 flags;
@@ -985,7 +1183,7 @@ LE32_BITMASK(BSET_BTREE_LEVEL, struct bset, flags, 4, 8);
LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 8, 9);
struct btree_node {
- __le64 csum;
+ struct bch_csum csum;
__le64 magic;
/* Closed interval: */
@@ -997,10 +1195,22 @@ struct btree_node {
} __attribute__((packed));
struct btree_node_entry {
- __le64 csum;
+ struct bch_csum csum;
+
struct bset keys;
} __attribute__((packed));
+/* Crypto: */
+
+struct nonce {
+ __le32 d[4];
+};
+
+#define BCACHE_MASTER_KEY_HEADER "bch**key"
+#define BCACHE_MASTER_KEY_NONCE ((struct nonce) \
+ {{ __cpu_to_le32(1), __cpu_to_le32(2), \
+ __cpu_to_le32(3), __cpu_to_le32(4) }})
+
/* OBSOLETE */
#define BITMASK(name, type, field, offset, end) \