summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_update_interior.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_update_interior.c')
-rw-r--r--fs/bcachefs/btree_update_interior.c130
1 files changed, 93 insertions, 37 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 4ddd1697ffde..8f96756ba648 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -11,6 +11,7 @@
#include "btree_iter.h"
#include "btree_locking.h"
#include "buckets.h"
+#include "error.h"
#include "extents.h"
#include "journal.h"
#include "journal_reclaim.h"
@@ -48,12 +49,27 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
break;
bp = bkey_s_c_to_btree_ptr_v2(k);
- BUG_ON(bkey_cmp(next_node, bp.v->min_key));
+ if (bkey_cmp(next_node, bp.v->min_key)) {
+ bch2_dump_btree_node(c, b);
+ panic("expected next min_key %llu:%llu got %llu:%llu\n",
+ next_node.inode,
+ next_node.offset,
+ bp.v->min_key.inode,
+ bp.v->min_key.offset);
+ }
bch2_btree_node_iter_advance(&iter, b);
if (bch2_btree_node_iter_end(&iter)) {
- BUG_ON(bkey_cmp(k.k->p, b->key.k.p));
+
+ if (bkey_cmp(k.k->p, b->key.k.p)) {
+ bch2_dump_btree_node(c, b);
+ panic("expected end %llu:%llu got %llu:%llu\n",
+ b->key.k.p.inode,
+ b->key.k.p.offset,
+ k.k->p.inode,
+ k.k->p.offset);
+ }
break;
}
@@ -149,7 +165,7 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
b->ob.nr = 0;
- clear_btree_node_dirty(b);
+ clear_btree_node_dirty(c, b);
btree_node_lock_type(c, b, SIX_LOCK_write);
__btree_node_free(c, b);
@@ -264,7 +280,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b = as->prealloc_nodes[--as->nr_prealloc_nodes];
set_btree_node_accessed(b);
- set_btree_node_dirty(b);
+ set_btree_node_dirty(c, b);
set_btree_node_need_write(b);
bch2_bset_init_first(b, &b->data->keys);
@@ -503,14 +519,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
trans->journal_pin = &as->journal;
for_each_keylist_key(&as->new_keys, k) {
- ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+ ret = bch2_trans_mark_key(trans,
+ bkey_s_c_null,
+ bkey_i_to_s_c(k),
0, 0, BTREE_TRIGGER_INSERT);
if (ret)
return ret;
}
for_each_keylist_key(&as->old_keys, k) {
- ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+ ret = bch2_trans_mark_key(trans,
+ bkey_i_to_s_c(k),
+ bkey_s_c_null,
0, 0, BTREE_TRIGGER_OVERWRITE);
if (ret)
return ret;
@@ -523,11 +543,25 @@ static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
struct btree *b = as->b;
+ struct btree_trans trans;
u64 journal_seq = 0;
unsigned i;
int ret;
/*
+ * If we're already in an error state, it might be because a btree node
+ * was never written, and we might be trying to free that same btree
+ * node here, but it won't have been marked as allocated and we'll see
+ * spurious disk usage inconsistencies in the transactional part below
+ * if we don't skip it:
+ */
+ ret = bch2_journal_error(&c->journal);
+ if (ret)
+ goto err;
+
+ BUG_ON(!journal_pin_active(&as->journal));
+
+ /*
* We did an update to a parent node where the pointers we added pointed
* to child nodes that weren't written yet: now, the child nodes have
* been written so we can write out the update to the interior node.
@@ -540,16 +574,20 @@ static void btree_update_nodes_written(struct btree_update *as)
* journal reclaim does btree updates when flushing bkey_cached entries,
* which may require allocations as well.
*/
- ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE|
- BTREE_INSERT_NOCHECK_RW|
- BTREE_INSERT_JOURNAL_RECLAIM|
- BTREE_INSERT_JOURNAL_RESERVED,
- btree_update_nodes_written_trans(&trans, as));
- BUG_ON(ret && !bch2_journal_error(&c->journal));
-
+ bch2_trans_init(&trans, c, 0, 512);
+ ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_USE_ALLOC_RESERVE|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_JOURNAL_RECLAIM|
+ BTREE_INSERT_JOURNAL_RESERVED,
+ btree_update_nodes_written_trans(&trans, as));
+ bch2_trans_exit(&trans);
+
+ bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
+ "error %i in btree_update_nodes_written()", ret);
+err:
if (b) {
/*
* @b is the node we did the final insert into:
@@ -569,17 +607,30 @@ static void btree_update_nodes_written(struct btree_update *as)
list_del(&as->write_blocked_list);
- if (!ret && as->b == b) {
+ /*
+ * Node might have been freed, recheck under
+ * btree_interior_update_lock:
+ */
+ if (as->b == b) {
struct bset *i = btree_bset_last(b);
BUG_ON(!b->c.level);
BUG_ON(!btree_node_dirty(b));
- i->journal_seq = cpu_to_le64(
- max(journal_seq,
- le64_to_cpu(i->journal_seq)));
-
- bch2_btree_add_journal_pin(c, b, journal_seq);
+ if (!ret) {
+ i->journal_seq = cpu_to_le64(
+ max(journal_seq,
+ le64_to_cpu(i->journal_seq)));
+
+ bch2_btree_add_journal_pin(c, b, journal_seq);
+ } else {
+ /*
+ * If we didn't get a journal sequence number we
+ * can't write this btree node, because recovery
+ * won't know to ignore this write:
+ */
+ set_btree_node_never_write(b);
+ }
}
mutex_unlock(&c->btree_interior_update_lock);
@@ -680,17 +731,7 @@ static void btree_update_reparent(struct btree_update *as,
child->b = NULL;
child->mode = BTREE_INTERIOR_UPDATING_AS;
- /*
- * When we write a new btree root, we have to drop our journal pin
- * _before_ the new nodes are technically reachable; see
- * btree_update_nodes_written().
- *
- * This goes for journal pins that are recursively blocked on us - so,
- * just transfer the journal pin to the new interior update so
- * btree_update_nodes_written() can drop it.
- */
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
- bch2_journal_pin_drop(&c->journal, &child->journal);
}
static void btree_update_updated_root(struct btree_update *as, struct btree *b)
@@ -827,7 +868,7 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
closure_wake_up(&c->btree_interior_update_wait);
}
- clear_btree_node_dirty(b);
+ clear_btree_node_dirty(c, b);
clear_btree_node_need_write(b);
/*
@@ -937,6 +978,10 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
if (ret)
goto err;
+ bch2_journal_pin_add(&c->journal,
+ atomic64_read(&c->journal.seq),
+ &as->journal, NULL);
+
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->list, &c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
@@ -1018,7 +1063,19 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
struct bkey_i *insert,
struct btree_node_iter *node_iter)
{
+ struct bch_fs *c = as->c;
struct bkey_packed *k;
+ const char *invalid;
+
+ invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
+ bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
+ if (invalid) {
+ char buf[160];
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert));
+ bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf, invalid);
+ dump_stack();
+ }
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
@@ -1034,7 +1091,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_btree_node_iter_advance(node_iter, b);
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
- set_btree_node_dirty(b);
+ set_btree_node_dirty(c, b);
set_btree_node_need_write(b);
}
@@ -1353,9 +1410,6 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
- if (as->must_rewrite)
- goto split;
-
bch2_btree_node_lock_for_insert(c, b, iter);
if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
@@ -1363,6 +1417,8 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
goto split;
}
+ btree_node_interior_verify(c, b);
+
bch2_btree_insert_keys_interior(as, b, iter, keys);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;