diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-11-26 17:26:04 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2020-06-09 21:32:39 -0400 |
commit | dfddbe0c58bf924d74b7e135c668f55e9ca0cda4 (patch) | |
tree | 6490a93ce7d949df35617c5bc8aed3fa42626119 /fs/bcachefs/btree_io.c | |
parent | cb58674c8deb0fcfaa8d438ada595da0941d7ca3 (diff) |
bcachefs: Use KEY_TYPE_deleted whitouts for extents
Previously, partial overwrites of existing extents were handled
implicitly by the btree code; when reading in a btree node, we'd do a
mergesort of the different bsets and detect and fix partially
overlapping extents during that mergesort.
That approach won't work with snapshots: this changes extents to work
like regular keys as far as the btree code is concerned, where a 0 size
KEY_TYPE_deleted whiteout will completely overwrite an existing extent.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs/btree_io.c')
-rw-r--r-- | fs/bcachefs/btree_io.c | 36 |
1 files changed, 24 insertions, 12 deletions
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 4b1cd4dd0741..5f1c3183fa85 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -23,7 +23,8 @@ static void verify_no_dups(struct btree *b, struct bkey_packed *start, - struct bkey_packed *end) + struct bkey_packed *end, + bool extents) { #ifdef CONFIG_BCACHEFS_DEBUG struct bkey_packed *k, *p; @@ -37,7 +38,7 @@ static void verify_no_dups(struct btree *b, struct bkey l = bkey_unpack_key(b, p); struct bkey r = bkey_unpack_key(b, k); - BUG_ON(btree_node_is_extents(b) + BUG_ON(extents ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0 : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); //BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0); @@ -148,7 +149,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) } verify_no_dups(b, new_whiteouts, - (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); + (void *) ((u64 *) new_whiteouts + b->whiteout_u64s), + btree_node_old_extent_overwrite(b)); memcpy_u64s(unwritten_whiteouts_start(c, b), new_whiteouts, b->whiteout_u64s); @@ -298,7 +300,8 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, verify_no_dups(b, unwritten_whiteouts_start(c, b), - unwritten_whiteouts_end(c, b)); + unwritten_whiteouts_end(c, b), + true); btree_bounce_free(c, order, used_mempool, whiteouts); @@ -378,7 +381,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, enum compact_mode mode) { - return !btree_node_is_extents(b) + return !btree_node_old_extent_overwrite(b) ? bch2_drop_whiteouts(b, mode) : bch2_compact_extent_whiteouts(c, b, mode); } @@ -418,10 +421,10 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, start_time = local_clock(); - if (btree_node_is_extents(b)) + if (btree_node_old_extent_overwrite(b)) filter_whiteouts = bset_written(b, start_bset); - u64s = (btree_node_is_extents(b) + u64s = (btree_node_old_extent_overwrite(b) ? bch2_sort_extents : bch2_sort_keys)(out->keys.start, &sort_iter, @@ -707,7 +710,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, bool have_retry) { struct bkey_packed *k, *prev = NULL; - struct bpos prev_pos = POS_MIN; + struct bpos prev_pos = POS_MIN; + struct bpos prev_data = POS_MIN; bool seen_non_whiteout = false; unsigned version; const char *err; @@ -840,7 +844,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) { *whiteout_u64s = k->_data - i->_data; seen_non_whiteout = true; - } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) { + } else if (bkey_cmp(prev_data, bkey_start_pos(u.k)) > 0 || + bkey_cmp(prev_pos, u.k->p) > 0) { btree_err(BTREE_ERR_FATAL, c, b, i, "keys out of order: %llu:%llu > %llu:%llu", prev_pos.inode, @@ -850,7 +855,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b, /* XXX: repair this */ } + if (!bkey_deleted(u.k)) + prev_data = u.k->p; prev_pos = u.k->p; + prev = k; k = bkey_next_skip_noops(k, vstruct_last(i)); } @@ -909,6 +917,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry bset_encrypt(c, i, b->written << 9); + if (btree_node_is_extents(b) && + !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) + set_btree_node_old_extent_overwrite(b); + sectors = vstruct_sectors(b->data, c->block_bits); btree_node_set_format(b, b->data->format); @@ -972,7 +984,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry set_btree_bset(b, b->set, &b->data->keys); - b->nr = (btree_node_is_extents(b) + b->nr = (btree_node_old_extent_overwrite(b) ? bch2_extent_sort_fix_overlapping : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter); @@ -1487,7 +1499,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, i->journal_seq = cpu_to_le64(seq); i->u64s = 0; - if (!btree_node_is_extents(b)) { + if (!btree_node_old_extent_overwrite(b)) { sort_iter_add(&sort_iter, unwritten_whiteouts_start(c, b), unwritten_whiteouts_end(c, b)); @@ -1502,7 +1514,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, b->whiteout_u64s = 0; - u64s = btree_node_is_extents(b) + u64s = btree_node_old_extent_overwrite(b) ? bch2_sort_extents(vstruct_last(i), &sort_iter, false) : bch2_sort_keys(i->start, &sort_iter, false); le16_add_cpu(&i->u64s, u64s); |