diff options
Diffstat (limited to 'fs/bcachefs/btree_update_interior.c')
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 130 |
1 files changed, 93 insertions, 37 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 4ddd1697ffde..8f96756ba648 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -11,6 +11,7 @@ #include "btree_iter.h" #include "btree_locking.h" #include "buckets.h" +#include "error.h" #include "extents.h" #include "journal.h" #include "journal_reclaim.h" @@ -48,12 +49,27 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) break; bp = bkey_s_c_to_btree_ptr_v2(k); - BUG_ON(bkey_cmp(next_node, bp.v->min_key)); + if (bkey_cmp(next_node, bp.v->min_key)) { + bch2_dump_btree_node(c, b); + panic("expected next min_key %llu:%llu got %llu:%llu\n", + next_node.inode, + next_node.offset, + bp.v->min_key.inode, + bp.v->min_key.offset); + } bch2_btree_node_iter_advance(&iter, b); if (bch2_btree_node_iter_end(&iter)) { - BUG_ON(bkey_cmp(k.k->p, b->key.k.p)); + + if (bkey_cmp(k.k->p, b->key.k.p)) { + bch2_dump_btree_node(c, b); + panic("expected end %llu:%llu got %llu:%llu\n", + b->key.k.p.inode, + b->key.k.p.offset, + k.k->p.inode, + k.k->p.offset); + } break; } @@ -149,7 +165,7 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) b->ob.nr = 0; - clear_btree_node_dirty(b); + clear_btree_node_dirty(c, b); btree_node_lock_type(c, b, SIX_LOCK_write); __btree_node_free(c, b); @@ -264,7 +280,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b = as->prealloc_nodes[--as->nr_prealloc_nodes]; set_btree_node_accessed(b); - set_btree_node_dirty(b); + set_btree_node_dirty(c, b); set_btree_node_need_write(b); bch2_bset_init_first(b, &b->data->keys); @@ -503,14 +519,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, trans->journal_pin = &as->journal; for_each_keylist_key(&as->new_keys, k) { - ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k), + ret = bch2_trans_mark_key(trans, + bkey_s_c_null, + bkey_i_to_s_c(k), 0, 0, BTREE_TRIGGER_INSERT); if (ret) return ret; } for_each_keylist_key(&as->old_keys, k) { - ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k), + ret = bch2_trans_mark_key(trans, + bkey_i_to_s_c(k), + bkey_s_c_null, 0, 0, BTREE_TRIGGER_OVERWRITE); if (ret) return ret; @@ -523,11 +543,25 @@ static void btree_update_nodes_written(struct btree_update *as) { struct bch_fs *c = as->c; struct btree *b = as->b; + struct btree_trans trans; u64 journal_seq = 0; unsigned i; int ret; /* + * If we're already in an error state, it might be because a btree node + * was never written, and we might be trying to free that same btree + * node here, but it won't have been marked as allocated and we'll see + * spurious disk usage inconsistencies in the transactional part below + * if we don't skip it: + */ + ret = bch2_journal_error(&c->journal); + if (ret) + goto err; + + BUG_ON(!journal_pin_active(&as->journal)); + + /* * We did an update to a parent node where the pointers we added pointed * to child nodes that weren't written yet: now, the child nodes have * been written so we can write out the update to the interior node. @@ -540,16 +574,20 @@ static void btree_update_nodes_written(struct btree_update *as) * journal reclaim does btree updates when flushing bkey_cached entries, * which may require allocations as well. */ - ret = bch2_trans_do(c, &as->disk_res, &journal_seq, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_JOURNAL_RESERVED, - btree_update_nodes_written_trans(&trans, as)); - BUG_ON(ret && !bch2_journal_error(&c->journal)); - + bch2_trans_init(&trans, c, 0, 512); + ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_USE_ALLOC_RESERVE| + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_JOURNAL_RECLAIM| + BTREE_INSERT_JOURNAL_RESERVED, + btree_update_nodes_written_trans(&trans, as)); + bch2_trans_exit(&trans); + + bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, + "error %i in btree_update_nodes_written()", ret); +err: if (b) { /* * @b is the node we did the final insert into: @@ -569,17 +607,30 @@ static void btree_update_nodes_written(struct btree_update *as) list_del(&as->write_blocked_list); - if (!ret && as->b == b) { + /* + * Node might have been freed, recheck under + * btree_interior_update_lock: + */ + if (as->b == b) { struct bset *i = btree_bset_last(b); BUG_ON(!b->c.level); BUG_ON(!btree_node_dirty(b)); - i->journal_seq = cpu_to_le64( - max(journal_seq, - le64_to_cpu(i->journal_seq))); - - bch2_btree_add_journal_pin(c, b, journal_seq); + if (!ret) { + i->journal_seq = cpu_to_le64( + max(journal_seq, + le64_to_cpu(i->journal_seq))); + + bch2_btree_add_journal_pin(c, b, journal_seq); + } else { + /* + * If we didn't get a journal sequence number we + * can't write this btree node, because recovery + * won't know to ignore this write: + */ + set_btree_node_never_write(b); + } } mutex_unlock(&c->btree_interior_update_lock); @@ -680,17 +731,7 @@ static void btree_update_reparent(struct btree_update *as, child->b = NULL; child->mode = BTREE_INTERIOR_UPDATING_AS; - /* - * When we write a new btree root, we have to drop our journal pin - * _before_ the new nodes are technically reachable; see - * btree_update_nodes_written(). - * - * This goes for journal pins that are recursively blocked on us - so, - * just transfer the journal pin to the new interior update so - * btree_update_nodes_written() can drop it. - */ bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL); - bch2_journal_pin_drop(&c->journal, &child->journal); } static void btree_update_updated_root(struct btree_update *as, struct btree *b) @@ -827,7 +868,7 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, closure_wake_up(&c->btree_interior_update_wait); } - clear_btree_node_dirty(b); + clear_btree_node_dirty(c, b); clear_btree_node_need_write(b); /* @@ -937,6 +978,10 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, if (ret) goto err; + bch2_journal_pin_add(&c->journal, + atomic64_read(&c->journal.seq), + &as->journal, NULL); + mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); @@ -1018,7 +1063,19 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b struct bkey_i *insert, struct btree_node_iter *node_iter) { + struct bch_fs *c = as->c; struct bkey_packed *k; + const char *invalid; + + invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?: + bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert)); + if (invalid) { + char buf[160]; + + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert)); + bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf, invalid); + dump_stack(); + } BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) > ARRAY_SIZE(as->journal_entries)); @@ -1034,7 +1091,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bch2_btree_node_iter_advance(node_iter, b); bch2_btree_bset_insert_key(iter, b, node_iter, insert); - set_btree_node_dirty(b); + set_btree_node_dirty(c, b); set_btree_node_need_write(b); } @@ -1353,9 +1410,6 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - if (as->must_rewrite) - goto split; - bch2_btree_node_lock_for_insert(c, b, iter); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { @@ -1363,6 +1417,8 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, goto split; } + btree_node_interior_verify(c, b); + bch2_btree_insert_keys_interior(as, b, iter, keys); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; |