summaryrefslogtreecommitdiff
path: root/libbcache/btree_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcache/btree_io.c')
-rw-r--r--libbcache/btree_io.c201
1 files changed, 133 insertions, 68 deletions
diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c
index 4c295af1..e772c6ad 100644
--- a/libbcache/btree_io.c
+++ b/libbcache/btree_io.c
@@ -13,6 +13,7 @@
#include "extents.h"
#include "io.h"
#include "journal.h"
+#include "super-io.h"
#include <trace/events/bcache.h>
@@ -39,7 +40,7 @@ static void clear_needs_whiteout(struct bset *i)
{
struct bkey_packed *k;
- for (k = i->start; k != bset_bkey_last(i); k = bkey_next(k))
+ for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = false;
}
@@ -47,7 +48,7 @@ static void set_needs_whiteout(struct bset *i)
{
struct bkey_packed *k;
- for (k = i->start; k != bset_bkey_last(i); k = bkey_next(k))
+ for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = true;
}
@@ -341,7 +342,7 @@ bool __bch_compact_whiteouts(struct cache_set *c, struct btree *b,
compacting = true;
u_start = u_pos;
start = i->start;
- end = bset_bkey_last(i);
+ end = vstruct_last(i);
if (src != dst) {
memmove(dst, src, sizeof(*src));
@@ -574,7 +575,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
order = sorting_entire_node
? btree_page_order(c)
- : get_order(__set_bytes(b->data, u64s));
+ : get_order(__vstruct_bytes(struct btree_node, u64s));
out = btree_bounce_alloc(c, order, &used_mempool);
@@ -589,8 +590,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
out->keys.u64s = cpu_to_le16(u64s);
- BUG_ON((void *) bset_bkey_last(&out->keys) >
- (void *) out + (PAGE_SIZE << order));
+ BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
if (sorting_entire_node)
bch_time_stats_update(&c->btree_sort_time, start_time);
@@ -654,7 +654,7 @@ static struct btree_nr_keys sort_repack(struct bset *dst,
bool filter_whiteouts)
{
struct bkey_format *in_f = &src->format;
- struct bkey_packed *in, *out = bset_bkey_last(dst);
+ struct bkey_packed *in, *out = vstruct_last(dst);
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
@@ -723,7 +723,7 @@ static struct btree_nr_keys sort_repack_merge(struct cache_set *c,
btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
- prev = bset_bkey_last(dst);
+ prev = vstruct_last(dst);
}
bkey_copy(prev, &tmp.k);
@@ -734,7 +734,7 @@ static struct btree_nr_keys sort_repack_merge(struct cache_set *c,
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
- out = bset_bkey_last(dst);
+ out = vstruct_last(dst);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@@ -854,22 +854,23 @@ void bch_btree_init_next(struct cache_set *c, struct btree *b,
bch_btree_iter_reinit_node(iter, b);
}
-/*
- * We seed the checksum with the entire first pointer (dev, gen and offset),
- * since for btree nodes we have to store the checksum with the data instead of
- * the pointer - this helps guard against reading a valid btree node that is not
- * the node we actually wanted:
- */
-#define btree_csum_set(_b, _i) \
-({ \
- void *_data = (void *) (_i) + 8; \
- void *_end = bset_bkey_last(&(_i)->keys); \
- \
- bch_checksum_update(BSET_CSUM_TYPE(&(_i)->keys), \
- bkey_i_to_extent_c(&(_b)->key)->v._data[0], \
- _data, \
- _end - _data) ^ 0xffffffffffffffffULL; \
-})
+static struct nonce btree_nonce(struct btree *b,
+ struct bset *i,
+ unsigned offset)
+{
+ return (struct nonce) {{
+ [0] = cpu_to_le32(offset),
+ [1] = ((__le32 *) &i->seq)[0],
+ [2] = ((__le32 *) &i->seq)[1],
+ [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
+ }};
+}
+
+static void bset_encrypt(struct cache_set *c, struct bset *i, struct nonce nonce)
+{
+ bch_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
+ vstruct_end(i) - (void *) i->_data);
+}
#define btree_node_error(b, c, ptr, fmt, ...) \
cache_set_inconsistent(c, \
@@ -877,7 +878,7 @@ void bch_btree_init_next(struct cache_set *c, struct btree *b,
(b)->btree_id, (b)->level, btree_node_root(c, b) \
? btree_node_root(c, b)->level : -1, \
PTR_BUCKET_NR(ca, ptr), (b)->written, \
- (i)->u64s, ##__VA_ARGS__)
+ le16_to_cpu((i)->u64s), ##__VA_ARGS__)
static const char *validate_bset(struct cache_set *c, struct btree *b,
struct cache *ca,
@@ -886,6 +887,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
unsigned *whiteout_u64s)
{
struct bkey_packed *k, *prev = NULL;
+ struct bpos prev_pos = POS_MIN;
bool seen_non_whiteout = false;
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION)
@@ -903,7 +905,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
}
for (k = i->start;
- k != bset_bkey_last(i);) {
+ k != vstruct_last(i);) {
struct bkey_s_c u;
struct bkey tmp;
const char *invalid;
@@ -911,13 +913,13 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
if (!k->u64s) {
btree_node_error(b, c, ptr,
"KEY_U64s 0: %zu bytes of metadata lost",
- (void *) bset_bkey_last(i) - (void *) k);
+ vstruct_end(i) - (void *) k);
i->u64s = cpu_to_le16((u64 *) k - i->_data);
break;
}
- if (bkey_next(k) > bset_bkey_last(i)) {
+ if (bkey_next(k) > vstruct_last(i)) {
btree_node_error(b, c, ptr,
"key extends past end of bset");
@@ -931,7 +933,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
- (u64 *) bset_bkey_last(i) - (u64 *) k);
+ (u64 *) vstruct_end(i) - (u64 *) k);
continue;
}
@@ -951,7 +953,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
- (u64 *) bset_bkey_last(i) - (u64 *) k);
+ (u64 *) vstruct_end(i) - (u64 *) k);
continue;
}
@@ -963,22 +965,40 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
if (!seen_non_whiteout &&
(!bkey_whiteout(k) ||
- (prev && bkey_cmp_left_packed_byval(b, prev,
- bkey_start_pos(u.k)) > 0))) {
+ (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) {
*whiteout_u64s = k->_data - i->_data;
seen_non_whiteout = true;
+ } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
+ btree_node_error(b, c, ptr,
+ "keys out of order: %llu:%llu > %llu:%llu",
+ prev_pos.inode,
+ prev_pos.offset,
+ u.k->p.inode,
+ bkey_start_offset(u.k));
+ /* XXX: repair this */
}
+ prev_pos = u.k->p;
prev = k;
k = bkey_next(k);
}
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-
- b->written += sectors;
return NULL;
}
+static bool extent_contains_ptr(struct bkey_s_c_extent e,
+ struct bch_extent_ptr match)
+{
+ const struct bch_extent_ptr *ptr;
+
+ extent_for_each_ptr(e, ptr)
+ if (!memcmp(ptr, &match, sizeof(*ptr)))
+ return true;
+
+ return false;
+}
+
void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
struct cache *ca,
const struct bch_extent_ptr *ptr)
@@ -990,6 +1010,8 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
bool used_mempool;
unsigned u64s;
const char *err;
+ struct bch_csum csum;
+ struct nonce nonce;
int ret;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
@@ -1005,40 +1027,62 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
if (!b->written) {
i = &b->data->keys;
+ err = "bad magic";
+ if (le64_to_cpu(b->data->magic) != bset_magic(c))
+ goto err;
+
+ err = "bad btree header";
+ if (!b->data->keys.seq)
+ goto err;
+
err = "unknown checksum type";
- if (BSET_CSUM_TYPE(i) >= BCH_CSUM_NR)
+ if (!bch_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
goto err;
/* XXX: retry checksum errors */
+ nonce = btree_nonce(b, i, b->written << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+
err = "bad checksum";
- if (le64_to_cpu(b->data->csum) !=
- btree_csum_set(b, b->data))
+ if (bch_crc_cmp(csum, b->data->csum))
goto err;
- sectors = __set_blocks(b->data,
- le16_to_cpu(b->data->keys.u64s),
- block_bytes(c)) << c->block_bits;
+ bch_encrypt(c, BSET_CSUM_TYPE(i), nonce,
+ &b->data->flags,
+ (void *) &b->data->keys -
+ (void *) &b->data->flags);
+ nonce = nonce_add(nonce,
+ round_up((void *) &b->data->keys -
+ (void *) &b->data->flags,
+ CHACHA20_BLOCK_SIZE));
+ bset_encrypt(c, i, nonce);
- err = "bad magic";
- if (le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb))
- goto err;
-
- err = "bad btree header";
- if (!b->data->keys.seq)
- goto err;
+ sectors = vstruct_sectors(b->data, c->block_bits);
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
+ u64 *p = (u64 *) &b->data->ptr;
+
+ *p = swab64(*p);
bch_bpos_swab(&b->data->min_key);
bch_bpos_swab(&b->data->max_key);
}
+ err = "incorrect btree id";
+ if (BTREE_NODE_ID(b->data) != b->btree_id)
+ goto err;
+
+ err = "incorrect level";
+ if (BTREE_NODE_LEVEL(b->data) != b->level)
+ goto err;
+
err = "incorrect max key";
if (bkey_cmp(b->data->max_key, b->key.k.p))
goto err;
- err = "incorrect level";
- if (BSET_BTREE_LEVEL(i) != b->level)
+ err = "incorrect backpointer";
+ if (!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
+ b->data->ptr))
goto err;
err = bch_bkey_format_validate(&b->data->format);
@@ -1056,23 +1100,27 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
break;
err = "unknown checksum type";
- if (BSET_CSUM_TYPE(i) >= BCH_CSUM_NR)
+ if (!bch_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
goto err;
+ nonce = btree_nonce(b, i, b->written << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
err = "bad checksum";
- if (le64_to_cpu(bne->csum) !=
- btree_csum_set(b, bne))
+ if (memcmp(&csum, &bne->csum, sizeof(csum)))
goto err;
- sectors = __set_blocks(bne,
- le16_to_cpu(bne->keys.u64s),
- block_bytes(c)) << c->block_bits;
+ bset_encrypt(c, i, nonce);
+
+ sectors = vstruct_sectors(bne, c->block_bits);
}
err = validate_bset(c, b, ca, ptr, i, sectors, &whiteout_u64s);
if (err)
goto err;
+ b->written += sectors;
+
err = "insufficient memory";
ret = bch_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
if (ret < 0)
@@ -1083,11 +1131,11 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
__bch_btree_node_iter_push(iter, b,
i->start,
- bkey_idx(i, whiteout_u64s));
+ vstruct_idx(i, whiteout_u64s));
__bch_btree_node_iter_push(iter, b,
- bkey_idx(i, whiteout_u64s),
- bset_bkey_last(i));
+ vstruct_idx(i, whiteout_u64s),
+ vstruct_last(i));
}
err = "corrupted btree";
@@ -1290,6 +1338,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
struct bch_extent_ptr *ptr;
struct cache *ca;
struct sort_iter sort_iter;
+ struct nonce nonce;
unsigned bytes_to_write, sectors_to_write, order, bytes, u64s;
u64 seq = 0;
bool used_mempool;
@@ -1330,7 +1379,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
BUG_ON(b->written >= c->sb.btree_node_size);
BUG_ON(bset_written(b, btree_bset_last(b)));
- BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb));
+ BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
if (lock_type_held == SIX_LOCK_intent) {
@@ -1396,7 +1445,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
b->whiteout_u64s = 0;
u64s = btree_node_is_extents(b)
- ? sort_extents(bset_bkey_last(i), &sort_iter, false)
+ ? sort_extents(vstruct_last(i), &sort_iter, false)
: sort_keys(i->start, &sort_iter, false);
le16_add_cpu(&i->u64s, u64s);
@@ -1413,14 +1462,30 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
BUG_ON(i->seq != b->data->keys.seq);
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
- SET_BSET_CSUM_TYPE(i, c->opts.metadata_checksum);
+ SET_BSET_CSUM_TYPE(i, bch_meta_checksum_type(c));
+
+ nonce = btree_nonce(b, i, b->written << 9);
+
+ if (bn) {
+ bch_encrypt(c, BSET_CSUM_TYPE(i), nonce,
+ &bn->flags,
+ (void *) &b->data->keys -
+ (void *) &b->data->flags);
+ nonce = nonce_add(nonce,
+ round_up((void *) &b->data->keys -
+ (void *) &b->data->flags,
+ CHACHA20_BLOCK_SIZE));
+ bset_encrypt(c, i, nonce);
+
+ nonce = btree_nonce(b, i, b->written << 9);
+ bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
+ } else {
+ bset_encrypt(c, i, nonce);
- if (bn)
- bn->csum = cpu_to_le64(btree_csum_set(b, bn));
- else
- bne->csum = cpu_to_le64(btree_csum_set(b, bne));
+ bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ }
- bytes_to_write = (void *) bset_bkey_last(i) - data;
+ bytes_to_write = vstruct_end(i) - data;
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
memset(data + bytes_to_write, 0,
@@ -1548,7 +1613,7 @@ bool bch_btree_post_write_cleanup(struct cache_set *c, struct btree *b)
* If later we don't unconditionally sort down to a single bset, we have
* to ensure this is still true:
*/
- BUG_ON((void *) bset_bkey_last(btree_bset_last(b)) > write_block(b));
+ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
bne = want_new_bset(c, b);
if (bne)