diff options
39 files changed, 640 insertions, 401 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3fc728efbf5c..b6850b15494d 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -344,7 +344,7 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs * struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL; prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); bch2_prt_data_type(out, a->data_type); @@ -367,7 +367,6 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs * if (ca) prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); - printbuf_indent_sub(out, 2); bch2_dev_put(ca); } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index f6ea4a8272d0..3d125ee81663 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1491,10 +1491,9 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, prt_newline(out); - printbuf_indent_add(out, 2); - open_bucket_for_each(c, &wp->ptrs, ob, i) - bch2_open_bucket_to_text(out, c, ob); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + open_bucket_for_each(c, &wp->ptrs, ob, i) + bch2_open_bucket_to_text(out, c, ob); } void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) @@ -1586,9 +1585,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); - printbuf_indent_add(&buf, 2); - bch2_fs_alloc_debug_to_text(&buf, c); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_fs_alloc_debug_to_text(&buf, c); prt_newline(&buf); bch2_printbuf_make_room(&buf, 4096); @@ -1597,23 +1595,20 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) guard(printbuf_atomic)(&buf); for_each_online_member_rcu(c, ca) { prt_printf(&buf, "Dev %u:\n", ca->dev_idx); - printbuf_indent_add(&buf, 2); - bch2_dev_alloc_debug_to_text(&buf, ca); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_dev_alloc_debug_to_text(&buf, ca); prt_newline(&buf); } } prt_printf(&buf, "Copygc debug:\n"); - printbuf_indent_add(&buf, 2); - bch2_copygc_wait_to_text(&buf, c); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_copygc_wait_to_text(&buf, c); prt_newline(&buf); prt_printf(&buf, "Journal debug:\n"); - printbuf_indent_add(&buf, 2); - bch2_journal_debug_to_text(&buf, &c->journal); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_journal_debug_to_text(&buf, &c->journal); bch2_print_str(c, KERN_ERR, buf.buf); } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index cb25cddb759b..6aeb1c876619 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -117,7 +117,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, prt_printf(&buf, "existing backpointer found when inserting "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); prt_newline(&buf); - printbuf_indent_add(&buf, 2); + guard(printbuf_indent)(&buf); prt_printf(&buf, "found "); bch2_bkey_val_to_text(&buf, c, found_bp); @@ -127,7 +127,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, orig_k); } else if (!will_check) { prt_printf(&buf, "backpointer not found when deleting\n"); - printbuf_indent_add(&buf, 2); + guard(printbuf_indent)(&buf); prt_printf(&buf, "searching for "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); @@ -278,9 +278,20 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, bp.v->level - 1, 0); struct btree *b = bch2_btree_iter_peek_node(iter); - if (IS_ERR_OR_NULL(b)) + if (IS_ERR(b)) goto err; + if (!b) { + /* Backpointer for nonexistent tree depth: */ + bkey_init(&iter->k); + iter->k.p = bp.v->pos; + struct bkey_s_c k = { &iter->k }; + + int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); + b = ret ? ERR_PTR(ret) : NULL; + goto err; + } + BUG_ON(b->c.level != bp.v->level - 1); if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, @@ -862,17 +873,25 @@ static int data_type_to_alloc_counter(enum bch_data_type t) } } -static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); +static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos, + struct bkey_buf *last_flushed); static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, bool *had_mismatch, - struct bkey_buf *last_flushed) + struct bkey_buf *last_flushed, + struct bpos *last_pos, + unsigned *nr_iters) { struct bch_fs *c = trans->c; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); bool need_commit = false; + if (!bpos_eq(*last_pos, alloc_k.k->p)) + *nr_iters = 0; + + *last_pos = alloc_k.k->p; + *had_mismatch = false; if (a->data_type == BCH_DATA_sb || @@ -926,6 +945,46 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b return ret; } + if (sectors[ALLOC_dirty] > a->dirty_sectors || + sectors[ALLOC_cached] > a->cached_sectors || + sectors[ALLOC_stripe] > a->stripe_sectors) { + if (*nr_iters) { + CLASS(printbuf, buf)(); + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n" + "bucket %llu:%llu data type %s, counters\n", + alloc_k.k->p.inode, + alloc_k.k->p.offset, + __bch2_data_types[a->data_type]); + if (sectors[ALLOC_dirty] > a->dirty_sectors) + prt_printf(&buf, "dirty: %u > %u\n", + sectors[ALLOC_dirty], a->dirty_sectors); + if (sectors[ALLOC_cached] > a->cached_sectors) + prt_printf(&buf, "cached: %u > %u\n", + sectors[ALLOC_cached], a->cached_sectors); + if (sectors[ALLOC_stripe] > a->stripe_sectors) + prt_printf(&buf, "stripe: %u > %u\n", + sectors[ALLOC_stripe], a->stripe_sectors); + + for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, + bucket_pos_to_bp_start(ca, alloc_k.k->p), + bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { + bch2_bkey_val_to_text(&buf, c, bp_k); + prt_newline(&buf); + } + + bch2_print_str(c, KERN_ERR, buf.buf); + __WARN(); + return ret; + } + + *nr_iters += 1; + + return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p, last_flushed) ?: + bch_err_throw(c, transaction_restart_nested); + } + if (sectors[ALLOC_dirty] != a->dirty_sectors || sectors[ALLOC_cached] != a->cached_sectors || sectors[ALLOC_stripe] != a->stripe_sectors) { @@ -943,13 +1002,6 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b return ret; } - if (sectors[ALLOC_dirty] > a->dirty_sectors || - sectors[ALLOC_cached] > a->cached_sectors || - sectors[ALLOC_stripe] > a->stripe_sectors) { - return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: - bch_err_throw(c, transaction_restart_nested); - } - bool empty = (sectors[ALLOC_dirty] + sectors[ALLOC_stripe] + sectors[ALLOC_cached]) == 0; @@ -1113,6 +1165,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) CLASS(btree_trans, trans)(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; + struct bpos last_pos = POS_MIN; + unsigned nr_iters = 0; bch2_bkey_buf_init(&s.last_flushed); bkey_init(&s.last_flushed.k->k); @@ -1121,7 +1175,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) POS_MIN, BTREE_ITER_prefetch, k, ({ bool had_mismatch; bch2_fs_going_ro(c) ?: - check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed); + check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed, + &last_pos, &nr_iters); })); if (ret) goto err; @@ -1189,7 +1244,11 @@ static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans, if (ret) return ret; - return check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed); + struct bpos last_pos = POS_MIN; + unsigned nr_iters = 0; + return check_bucket_backpointer_mismatch(trans, k, had_mismatch, + last_flushed, + &last_pos, &nr_iters); } int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, @@ -1253,22 +1312,21 @@ static int check_one_backpointer(struct btree_trans *trans, } static int check_bucket_backpointers_to_extents(struct btree_trans *trans, - struct bch_dev *ca, struct bpos bucket) + struct bch_dev *ca, struct bpos bucket, + struct bkey_buf *last_flushed) { u32 restart_count = trans->restart_count; - struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers, bucket_pos_to_bp_start(ca, bucket), bucket_pos_to_bp_end(ca, bucket), 0, k, - check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed) + check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, last_flushed) ); - bch2_bkey_buf_exit(&last_flushed, trans->c); - return ret ?: trans_was_restarted(trans, restart_count); + return ret ?: + bch2_btree_write_buffer_flush_sync(trans) ?: /* make sure bad backpointers that were deleted are visible */ + trans_was_restarted(trans, restart_count); } static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 16d08dfb5f19..0ede47f62129 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -523,6 +523,7 @@ struct discard_in_flight { x(journal_read) \ x(fs_journal_alloc) \ x(fs_resize_on_mount) \ + x(sb_journal_sort) \ x(btree_node_read) \ x(btree_node_read_all_replicas) \ x(btree_node_scrub) \ diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 035b2cb25077..49d0be6405d8 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -166,7 +166,7 @@ void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *); do { \ if (trace_##event##_enabled()) { \ CLASS(printbuf, buf)(); \ - printbuf_indent_add(&buf, 2); \ + guard(printbuf_indent)(&buf); \ bch2_btree_pos_to_text(&buf, c, b); \ trace_##event(c, buf.buf); \ } \ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2e3dd9bacac5..34ec1a90980d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -2523,7 +2523,7 @@ do_write: if (trace_btree_node_write_enabled()) { CLASS(printbuf, buf)(); - printbuf_indent_add(&buf, 2); + guard(printbuf_indent)(&buf); prt_printf(&buf, "offset %u sectors %u bytes %u\n", b->written, sectors_to_write, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1e152c671bd7..b72ed543d9c0 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -137,18 +137,8 @@ static void __bch2_btree_path_verify_cached(struct btree_trans *trans, static void __bch2_btree_path_verify_level(struct btree_trans *trans, struct btree_path *path, unsigned level) { - struct btree_path_level *l; - struct btree_node_iter tmp; - bool locked; - struct bkey_packed *p, *k; - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; - struct printbuf buf3 = PRINTBUF; - const char *msg; - - l = &path->l[level]; - tmp = l->iter; - locked = btree_node_locked(path, level); + struct btree_path_level *l = &path->l[level]; + bool locked = btree_node_locked(path, level); if (path->cached) { if (!level) @@ -166,51 +156,68 @@ static void __bch2_btree_path_verify_level(struct btree_trans *trans, bch2_btree_node_iter_verify(&l->iter, l->b); - /* - * For interior nodes, the iterator will have skipped past deleted keys: - */ - p = level + /* For interior nodes, the iterator may have skipped past deleted keys: */ + struct btree_node_iter tmp = l->iter; + const struct bkey_packed *p = level ? bch2_btree_node_iter_prev(&tmp, l->b) : bch2_btree_node_iter_prev_all(&tmp, l->b); - k = bch2_btree_node_iter_peek_all(&l->iter, l->b); + tmp = l->iter; + const struct bkey_packed *k = level + ? bch2_btree_node_iter_peek(&tmp, l->b) + : bch2_btree_node_iter_peek_all(&tmp, l->b); - if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { - msg = "before"; - goto err; - } + const char *msg; + if (!(level > path->level && trans->journal_replay_not_finished)) { + /* + * We can't run these checks for interior nodes when we're still + * using the journal overlay because there might be a key in + * the interior node that points midway through the current leaf + * node - which is deleted in the journal overlay, but set_pos() + * will skip past it and cause the interior node iterators to be + * inconsistent in a way that doesn't matter and it can't check + * for. + */ - if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { - msg = "after"; - goto err; + if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { + msg = "before"; + goto err; + } + + if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { + msg = "after"; + goto err; + } } if (!locked) btree_node_unlock(trans, path, level); return; err: - bch2_bpos_to_text(&buf1, path->pos); + { + CLASS(printbuf, buf)(); + prt_printf(&buf, "path should be %s key at level %u", msg, level); - if (p) { - struct bkey uk = bkey_unpack_key(l->b, p); + prt_str(&buf, "\npath pos "); + bch2_bpos_to_text(&buf, path->pos); - bch2_bkey_to_text(&buf2, &uk); - } else { - prt_printf(&buf2, "(none)"); - } + prt_str(&buf, "\nprev key "); + if (p) { + struct bkey uk = bkey_unpack_key(l->b, p); + bch2_bkey_to_text(&buf, &uk); + } else { + prt_printf(&buf, "(none)"); + } - if (k) { - struct bkey uk = bkey_unpack_key(l->b, k); + prt_str(&buf, "\ncur key "); + if (k) { + struct bkey uk = bkey_unpack_key(l->b, k); + bch2_bkey_to_text(&buf, &uk); + } else { + prt_printf(&buf, "(none)"); + } - bch2_bkey_to_text(&buf3, &uk); - } else { - prt_printf(&buf3, "(none)"); + panic("%s\n", buf.buf); } - - panic("path should be %s key at level %u:\n" - "path pos %s\n" - "prev key %s\n" - "cur key %s\n", - msg, level, buf1.buf, buf2.buf, buf3.buf); } static void __bch2_btree_path_verify(struct btree_trans *trans, @@ -886,28 +893,53 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, btree_node_unlock(trans, path, plevel); } +static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, + struct btree_path *path) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + + prt_str(&buf, "node not found at pos: "); + bch2_bpos_to_text(&buf, path->pos); + prt_str(&buf, "\n within parent node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); + prt_newline(&buf); + + return __bch2_topology_error(c, &buf); +} + +static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans, + struct btree_path *path, + struct bkey_i *k) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + + prt_str(&buf, "node doesn't cover expected range at pos: "); + bch2_bpos_to_text(&buf, path->pos); + prt_str(&buf, "\n within parent node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); + prt_str(&buf, "\n but got node: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + prt_newline(&buf); + + return __bch2_topology_error(c, &buf); +} + static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, struct btree_path *path, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; struct btree_path_level *l = path_l(path); - struct btree_and_journal_iter jiter; - struct bkey_s_c k; int ret = 0; + struct btree_and_journal_iter jiter; __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos); - k = bch2_btree_and_journal_iter_peek(c, &jiter); + struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &jiter); if (!k.k) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "node not found at pos "); - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, " at btree "); - bch2_btree_pos_to_text(&buf, c, l->b); - - ret = bch2_fs_topology_error(c, "%s", buf.buf); + ret = btree_node_missing_err(trans, path); goto err; } @@ -922,20 +954,16 @@ err: return ret; } -static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, - struct btree_path *path) +static inline bool bpos_in_btree_node_key(struct bpos pos, const struct bkey_i *k) { - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); + if (bpos_gt(pos, k->k.p)) + return false; - prt_str(&buf, "node not found at pos "); - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, " within parent node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); + if (k->k.type == KEY_TYPE_btree_ptr_v2 && + bpos_lt(pos, bkey_i_to_btree_ptr_v2_c(k)->v.min_key)) + return false; - bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); - return bch_err_throw(c, btree_need_topology_repair); + return true; } static __always_inline int btree_path_down(struct btree_trans *trans, @@ -971,6 +999,9 @@ static __always_inline int btree_path_down(struct btree_trans *trans, } } + if (unlikely(!bpos_in_btree_node_key(path->pos, &trans->btree_path_down))) + return btree_node_gap_err(trans, path, &trans->btree_path_down); + b = bch2_btree_node_get(trans, path, &trans->btree_path_down, level, lock_type, trace_ip); ret = PTR_ERR_OR_ZERO(b); @@ -1476,7 +1507,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) { prt_printf(buf, "%u transaction updates for %s journal seq %llu\n", trans->nr_updates, trans->fn, trans->journal_res.seq); - printbuf_indent_add(buf, 2); + guard(printbuf_indent)(buf); trans_for_each_update(trans, i) { struct bkey_s_c old = { &i->old_k, i->old_v }; @@ -1502,8 +1533,6 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) bch2_journal_entry_to_text(buf, trans->c, e); prt_newline(buf); } - - printbuf_indent_sub(buf, 2); } static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) @@ -1556,8 +1585,8 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt prt_printf(out, " uptodate %u locks_want %u", path->uptodate, path->locks_want); prt_newline(out); + guard(printbuf_indent)(out); - printbuf_indent_add(out, 2); for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) { prt_printf(out, "l=%u locks %s seq %u node ", l, btree_node_locked_str(btree_node_locked_type(path, l)), @@ -1570,7 +1599,6 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt prt_printf(out, "%px", path->l[l].b); prt_newline(out); } - printbuf_indent_sub(out, 2); } static noinline __cold diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 38c5643e8a78..a4f8aac448c0 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -205,9 +205,8 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) bch2_btree_trans_to_text(&buf, trans); prt_printf(&buf, "backtrace:\n"); - printbuf_indent_add(&buf, 2); - bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); prt_newline(&buf); } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index b618a0bd1186..c0dff992ad60 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -42,12 +42,11 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes) { - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); darray_for_each(nodes, i) { found_btree_node_to_text(out, c, i); prt_newline(out); } - printbuf_indent_sub(out, 2); } static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index a9877a47bfc6..a8cd7a5a6e7d 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -324,9 +324,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct btree *b; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim - ? BTREE_NODE_RESERVE - : 0; int ret; b = bch2_btree_node_mem_alloc(trans, interior_node); @@ -334,41 +331,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, return b; BUG_ON(b->ob.nr); - - mutex_lock(&c->btree_reserve_cache_lock); - if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { - guard(spinlock)(&c->freelist_lock); - if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) { - if (cl) - closure_wait(&c->open_buckets_wait, cl); - - ret = cl - ? bch_err_throw(c, bucket_alloc_blocked) - : bch_err_throw(c, open_buckets_empty); - mutex_unlock(&c->btree_reserve_cache_lock); - goto err; - } - } - - if (c->btree_reserve_cache_nr > nr_reserve) { - for (struct btree_alloc *a = c->btree_reserve_cache; - a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { - /* check if it has sufficient durability */ - - if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { - bch2_open_buckets_put(c, &a->ob); - *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - continue; - } - - bkey_copy(&b->key, &a->k); - b->ob = a->ob; - *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - mutex_unlock(&c->btree_reserve_cache_lock); - goto out; - } - } - mutex_unlock(&c->btree_reserve_cache_lock); retry: ret = bch2_alloc_sectors_start_trans(trans, target ?: @@ -398,12 +360,29 @@ retry: goto retry; } + mutex_lock(&c->btree_reserve_cache_lock); + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = c->btree_reserve_cache + --c->btree_reserve_cache_nr; + + /* check if it has sufficient durability */ + + if (can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { + bkey_copy(&b->key, &a->k); + b->ob = a->ob; + mutex_unlock(&c->btree_reserve_cache_lock); + goto out; + } + + bch2_open_buckets_put(c, &a->ob); + } + mutex_unlock(&c->btree_reserve_cache_lock); + bkey_btree_ptr_v2_init(&b->key); bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false); bch2_open_bucket_get(c, wp, &b->ob); - bch2_alloc_sectors_done(c, wp); out: + bch2_alloc_sectors_done(c, wp); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -2810,7 +2789,7 @@ static void bch2_btree_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct btree_alloc *a) { - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&a->k)); prt_newline(out); @@ -2818,8 +2797,6 @@ static void bch2_btree_alloc_to_text(struct printbuf *out, unsigned i; open_bucket_for_each(c, &a->ob, ob, i) bch2_open_bucket_to_text(out, c, ob); - - printbuf_indent_sub(out, 2); } void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 2c997fddefb3..20b900bee32d 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -657,25 +657,26 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) prt_str_indented(out, "old key:\t"); bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); + + bch2_write_op_to_text(out, &m->op); } void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m) { bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); if (!m->read_done) { prt_printf(out, "read:\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_read_bio_to_text(out, m->op.c, &m->rbio); } else { prt_printf(out, "write:\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_write_op_to_text(out, &m->op); } - printbuf_indent_sub(out, 4); } int bch2_extent_drop_ptrs(struct btree_trans *trans, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 33cb94f70b19..ebfb68e2e035 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -282,16 +282,13 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, le64_to_cpu(i->journal_seq)); offset += sectors; - printbuf_indent_add(out, 4); + scoped_guard(printbuf_indent, out) + for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { + struct bkey u; - for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { - struct bkey u; - - bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); - prt_newline(out); - } - - printbuf_indent_sub(out, 4); + bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); + prt_newline(out); + } } out: if (bio) @@ -468,7 +465,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); prt_printf(out, "\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); prt_newline(out); @@ -488,8 +485,6 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * &b->writes[1].journal, b->writes[1].journal.seq); prt_printf(out, "ob:\t%u\n", b->ob.nr); - - printbuf_indent_sub(out, 2); } static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, @@ -605,9 +600,8 @@ restart: bch2_btree_trans_to_text(&i->buf, trans); prt_printf(&i->buf, "backtrace:\n"); - printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); prt_newline(&i->buf); closure_put(&trans->ref); @@ -765,40 +759,35 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, break; prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]); - printbuf_indent_add(&i->buf, 2); + guard(printbuf_indent)(&i->buf); guard(mutex)(&s->lock); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE - printbuf_indent_add(&i->buf, 2); - bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace); #endif prt_printf(&i->buf, "Transaction duration:\n"); - printbuf_indent_add(&i->buf, 2); - bch2_time_stats_to_text(&i->buf, &s->duration); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_time_stats_to_text(&i->buf, &s->duration); if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { prt_printf(&i->buf, "Lock hold times:\n"); - printbuf_indent_add(&i->buf, 2); - bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); } if (s->max_paths_text) { prt_printf(&i->buf, "Maximum allocated btree paths (%u):\n", s->nr_max_paths); - printbuf_indent_add(&i->buf, 2); - prt_str_indented(&i->buf, s->max_paths_text); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + prt_str_indented(&i->buf, s->max_paths_text); } - printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); i->iter++; } diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index d6c91abcdc41..f0ebf91cd5fd 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -735,10 +735,12 @@ invalid_device: goto fsck_err; } -static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i) +static struct journal_key *accumulate_newer_accounting_keys(struct btree_trans *trans, struct journal_key *i) { + struct bch_fs *c = trans->c; struct journal_keys *keys = &c->journal_keys; struct bkey_i *k = journal_key_k(c, i); + int ret = 0; darray_for_each_from(*keys, j, i + 1) { if (journal_key_cmp(c, i, j)) @@ -746,7 +748,18 @@ static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, st struct bkey_i *n = journal_key_k(c, j); if (n->k.type == KEY_TYPE_accounting) { - WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0); + if (bversion_cmp(k->k.bversion, n->k.bversion) >= 0) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "accounting keys with out of order versions:"); + + prt_newline(&buf); + prt_printf(&buf, "%u.%u ", i->journal_seq_offset, i->journal_offset); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + prt_newline(&buf); + prt_printf(&buf, "%u.%u ", j->journal_seq_offset, j->journal_offset); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(n)); + fsck_err(trans, accounting_key_version_out_of_order, "%s", buf.buf); + } bch2_accounting_accumulate(bkey_i_to_accounting(k), bkey_i_to_s_c_accounting(n)); @@ -755,14 +768,16 @@ static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, st } return &darray_top(*keys); +fsck_err: + return ERR_PTR(ret); } static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i) { - struct bch_fs *c = trans->c; - struct journal_key *next = accumulate_newer_accounting_keys(c, i); + struct journal_key *next = accumulate_newer_accounting_keys(trans, i); - int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i))); + int ret = PTR_ERR_OR_ZERO(next) ?: + accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(trans->c, i))); return ret ? ERR_PTR(ret) : next; } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index e33f3166c48a..9e69263eb796 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -394,7 +394,7 @@ int bch2_fsck_err_opt(struct bch_fs *c, flags |= fsck_flags_extra[err]; if (test_bit(BCH_FS_in_fsck, &c->flags) || - test_bit(BCH_FS_in_recovery, &c->flags)) { + c->opts.fix_errors != FSCK_FIX_exit) { if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) return bch_err_throw(c, fsck_repair_unimplemented); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 68a61f7bc737..c0d00a692c18 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1270,14 +1270,14 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc guard(rcu)(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); if (!ca) { - prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, + prt_printf(out, "%u:%llu gen %u%s", ptr->dev, (u64) ptr->offset, ptr->gen, ptr->cached ? " cached" : ""); } else { u32 offset; u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - prt_printf(out, "ptr: %u:%llu:%u gen %u", + prt_printf(out, "%u:%llu:%u gen %u", ptr->dev, b, offset, ptr->gen); if (ca->mi.durability != 1) prt_printf(out, " d=%u", ca->mi.durability); @@ -1295,7 +1295,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc) { - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ", + prt_printf(out, "c_size %u size %u offset %u nonce %u csum ", crc->compressed_size, crc->uncompressed_size, crc->offset, crc->nonce); @@ -1305,72 +1305,34 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr bch2_prt_compression_type(out, crc->compression_type); } -static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, - const struct bch_extent_rebalance *r) -{ - prt_str(out, "rebalance:"); - - prt_printf(out, " replicas=%u", r->data_replicas); - if (r->data_replicas_from_inode) - prt_str(out, " (inode)"); - - prt_str(out, " checksum="); - bch2_prt_csum_opt(out, r->data_checksum); - if (r->data_checksum_from_inode) - prt_str(out, " (inode)"); - - if (r->background_compression || r->background_compression_from_inode) { - prt_str(out, " background_compression="); - bch2_compression_opt_to_text(out, r->background_compression); - - if (r->background_compression_from_inode) - prt_str(out, " (inode)"); - } - - if (r->background_target || r->background_target_from_inode) { - prt_str(out, " background_target="); - if (c) - bch2_target_to_text(out, c, r->background_target); - else - prt_printf(out, "%u", r->background_target); - - if (r->background_target_from_inode) - prt_str(out, " (inode)"); - } - - if (r->promote_target || r->promote_target_from_inode) { - prt_str(out, " promote_target="); - if (c) - bch2_target_to_text(out, c, r->promote_target); - else - prt_printf(out, "%u", r->promote_target); - - if (r->promote_target_from_inode) - prt_str(out, " (inode)"); - } - - if (r->erasure_code || r->erasure_code_from_inode) { - prt_printf(out, " ec=%u", r->erasure_code); - if (r->erasure_code_from_inode) - prt_str(out, " (inode)"); - } -} +static const char * const extent_entry_types[] = { +#define x(t, n, ...) [n] = #t, + BCH_EXTENT_ENTRY_TYPES() +#undef x + NULL +}; void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - bool first = true; if (c) prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k)); + guard(printbuf_indent)(out); + bkey_extent_entry_for_each(ptrs, entry) { - if (!first) - prt_printf(out, " "); + prt_newline(out); - switch (__extent_entry_type(entry)) { + unsigned type = __extent_entry_type(entry); + if (type < BCH_EXTENT_ENTRY_MAX) { + prt_str(out, extent_entry_types[__extent_entry_type(entry)]); + prt_str(out, ": "); + } + + switch (type) { case BCH_EXTENT_ENTRY_ptr: bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry)); break; @@ -1387,8 +1349,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, case BCH_EXTENT_ENTRY_stripe_ptr: { const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr; - prt_printf(out, "ec: idx %llu block %u", - (u64) ec->idx, ec->block); + prt_printf(out, "idx %llu block %u", (u64) ec->idx, ec->block); break; } case BCH_EXTENT_ENTRY_rebalance: @@ -1403,8 +1364,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; } - - first = false; } } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 4aa130ff7cf6..bba273d55c37 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -598,7 +598,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { prt_printf(out, "\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "mode=%o\n", inode->bi_mode); prt_str(out, "flags="); @@ -620,7 +620,6 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, #undef x bch2_printbuf_strip_trailing_newline(out); - printbuf_indent_sub(out, 2); } void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index e7d53ab1cf55..330db82aba8b 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -408,9 +408,8 @@ void bch2_promote_op_to_text(struct printbuf *out, { if (!op->write.read_done) { prt_printf(out, "parent read: %px\n", op->write.rbio.parent); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_read_bio_to_text(out, c, op->write.rbio.parent); - printbuf_indent_sub(out, 2); } bch2_data_update_to_text(out, &op->write); @@ -1076,8 +1075,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, } if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) && - !orig->data_update) - return bch_err_throw(c, extent_poisoned); + !orig->data_update) { + ret = bch_err_throw(c, extent_poisoned); + goto err; + } retry_pick: ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); @@ -1517,7 +1518,7 @@ void bch2_read_bio_to_text(struct printbuf *out, /* Are we in a retry? */ - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); u64 now = local_clock(); prt_printf(out, "start_time:\t"); @@ -1551,7 +1552,6 @@ void bch2_read_bio_to_text(struct printbuf *out, prt_newline(out); bch2_bio_to_text(out, &rbio->bio); - printbuf_indent_sub(out, 2); } void bch2_fs_io_read_exit(struct bch_fs *c) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 1d83dcc9731e..aed22fc7759b 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1742,7 +1742,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) prt_printf(out, "pos:\t"); bch2_bpos_to_text(out, op->pos); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "started:\t"); bch2_pr_time_units(out, local_clock() - op->start_time); @@ -1754,11 +1754,12 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) prt_printf(out, "nr_replicas:\t%u\n", op->nr_replicas); prt_printf(out, "nr_replicas_required:\t%u\n", op->nr_replicas_required); + prt_printf(out, "devs_have:\t"); + bch2_devs_list_to_text(out, &op->devs_have); + prt_newline(out); prt_printf(out, "ref:\t%u\n", closure_nr_remaining(&op->cl)); prt_printf(out, "ret\t%s\n", bch2_err_str(op->error)); - - printbuf_indent_sub(out, 2); } void bch2_fs_io_write_exit(struct bch_fs *c) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 93ac0faedf7d..6505c79f8516 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -48,7 +48,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 struct journal_buf *buf = j->buf + i; prt_printf(out, "seq:\t%llu\n", seq); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); if (!buf->write_started) prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK)); @@ -81,8 +81,6 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 if (buf->write_done) prt_str(out, "write_done"); prt_newline(out); - - printbuf_indent_sub(out, 2); } static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) @@ -1767,20 +1765,20 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) bch2_journal_bufs_to_text(out, j); prt_printf(out, "space:\n"); - printbuf_indent_add(out, 2); - prt_printf(out, "discarded\t%u:%u\n", - j->space[journal_space_discarded].next_entry, - j->space[journal_space_discarded].total); - prt_printf(out, "clean ondisk\t%u:%u\n", - j->space[journal_space_clean_ondisk].next_entry, - j->space[journal_space_clean_ondisk].total); - prt_printf(out, "clean\t%u:%u\n", - j->space[journal_space_clean].next_entry, - j->space[journal_space_clean].total); - prt_printf(out, "total\t%u:%u\n", - j->space[journal_space_total].next_entry, - j->space[journal_space_total].total); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) { + prt_printf(out, "discarded\t%u:%u\n", + j->space[journal_space_discarded].next_entry, + j->space[journal_space_discarded].total); + prt_printf(out, "clean ondisk\t%u:%u\n", + j->space[journal_space_clean_ondisk].next_entry, + j->space[journal_space_clean_ondisk].total); + prt_printf(out, "clean\t%u:%u\n", + j->space[journal_space_clean].next_entry, + j->space[journal_space_clean].total); + prt_printf(out, "total\t%u:%u\n", + j->space[journal_space_total].next_entry, + j->space[journal_space_total].total); + } for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { if (!ca->mi.durability) @@ -1796,7 +1794,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "dev %u:\n", ca->dev_idx); prt_printf(out, "durability %u:\n", ca->mi.durability); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "nr\t%u\n", ja->nr); prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size); prt_printf(out, "available\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free); @@ -1804,7 +1802,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "dirty_ondisk\t%u (seq %llu)\n",ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]); prt_printf(out, "dirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]); prt_printf(out, "cur_idx\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]); - printbuf_indent_sub(out, 2); } prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 44328d02cf67..e6f778bf7763 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -760,8 +760,8 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs return; prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); + guard(printbuf_indent)(out); - printbuf_indent_add(out, 2); for (i = 0; i < nr_types; i++) { prt_newline(out); bch2_prt_data_type(out, i); @@ -770,7 +770,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].sectors), le64_to_cpu(u->d[i].fragmented)); } - printbuf_indent_sub(out, 2); } static int journal_entry_log_validate(struct bch_fs *c, diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index bd1885607d3e..ae747c87fcf9 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -1019,7 +1019,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 pin_list = journal_seq_pin(j, *seq); prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "unflushed:\n"); for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++) @@ -1031,8 +1031,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 list_for_each_entry(pin, &pin_list->flushed[i], list) prt_printf(out, "\t%px %ps\n", pin, pin->flush); - printbuf_indent_sub(out, 2); - return false; } diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index 0cb9b93f13e7..dc0ecedb3a8f 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -30,7 +30,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct bch_sb_field *f, if (!nr) return 0; - b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL); + b = kvmalloc_array(nr, sizeof(u64), GFP_KERNEL); if (!b) return -BCH_ERR_ENOMEM_sb_journal_validate; @@ -64,7 +64,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct bch_sb_field *f, ret = 0; err: - kfree(b); + kvfree(b); return ret; } @@ -113,7 +113,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f if (!nr) return 0; - b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL); + b = kvmalloc_array(nr, sizeof(*b), GFP_KERNEL); if (!b) return -BCH_ERR_ENOMEM_sb_journal_v2_validate; @@ -165,7 +165,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f ret = 0; err: - kfree(b); + kvfree(b); return ret; } @@ -230,3 +230,40 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca, BUG_ON(dst + 1 != nr_compacted); return 0; } + +static inline bool journal_v2_unsorted(struct bch_sb_field_journal_v2 *j) +{ + unsigned nr = bch2_sb_field_journal_v2_nr_entries(j); + for (unsigned i = 0; i + 1 < nr; i++) + if (le64_to_cpu(j->d[i].start) > le64_to_cpu(j->d[i + 1].start)) + return true; + return false; +} + +int bch2_sb_journal_sort(struct bch_fs *c) +{ + BUG_ON(!c->sb.clean); + BUG_ON(test_bit(BCH_FS_rw, &c->flags)); + + guard(mutex)(&c->sb_lock); + bool write_sb = false; + + for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_journal_sort) { + struct bch_sb_field_journal_v2 *j = bch2_sb_field_get(ca->disk_sb.sb, journal_v2); + if (!j) + continue; + + if ((j && journal_v2_unsorted(j)) || + bch2_sb_field_get(ca->disk_sb.sb, journal)) { + struct journal_device *ja = &ca->journal; + + sort(ja->buckets, ja->nr, sizeof(ja->buckets[0]), u64_cmp, NULL); + bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr); + write_sb = true; + } + } + + return write_sb + ? bch2_write_super(c) + : 0; +} diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h index ba40a7e8d90a..e0fc40652607 100644 --- a/fs/bcachefs/journal_sb.h +++ b/fs/bcachefs/journal_sb.h @@ -22,3 +22,4 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_journal; extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2; int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned); +int bch2_sb_journal_sort(struct bch_fs *); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 4f41f1f6ec6c..c46a8965a7eb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -121,6 +121,7 @@ struct moving_io { static void move_free(struct moving_io *io) { struct moving_context *ctxt = io->write.ctxt; + struct bch_fs *c = io->write.op.c; if (io->b) atomic_dec(&io->b->count); @@ -132,8 +133,9 @@ static void move_free(struct moving_io *io) if (!io->write.data_opts.scrub) { bch2_data_update_exit(&io->write); } else { - bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio); + bch2_bio_free_pages_pool(c, &io->write.op.wbio.bio); kfree(io->write.bvecs); + bch2_bkey_buf_exit(&io->write.k, c); } kfree(io); } @@ -427,7 +429,9 @@ int bch2_move_extent(struct moving_context *ctxt, data_opts.scrub ? data_opts.read_dev : -1); return 0; err: + bch2_bkey_buf_exit(&io->write.k, c); kfree(io); + if (bch2_err_matches(ret, EROFS) || bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; @@ -863,7 +867,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, u64 check_mismatch_done = bucket_start; int ret = 0; - CLASS(bch2_dev_tryget, ca)(c, dev); + /* Userspace might have supplied @dev: */ + CLASS(bch2_dev_tryget_noerror, ca)(c, dev); if (!ca) return 0; @@ -1400,7 +1405,7 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) prt_str(out, " pos="); bch2_bbpos_to_text(out, stats->pos); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved)); prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced)); @@ -1415,8 +1420,6 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) prt_printf(out, "bytes raced:\t"); prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); prt_newline(out); - - printbuf_indent_sub(out, 2); } static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) @@ -1425,7 +1428,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str printbuf_tabstop_push(out, 32); bch2_move_stats_to_text(out, ctxt->stats); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "reads: ios %u/%u sectors %u/%u\n", atomic_read(&ctxt->read_ios), @@ -1439,15 +1442,13 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str atomic_read(&ctxt->write_sectors), c->opts.move_bytes_in_flight >> 9); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); scoped_guard(mutex, &ctxt->lock) { struct moving_io *io; list_for_each_entry(io, &ctxt->ios, io_list) bch2_data_update_inflight_to_text(out, &io->write); } - - printbuf_indent_sub(out, 4); } void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h index 907e5c97550b..5fa5265d7ba8 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -299,4 +299,18 @@ DEFINE_GUARD(printbuf_atomic, struct printbuf *, printbuf_atomic_inc(_T), printbuf_atomic_dec(_T)); +static inline void printbuf_indent_add_2(struct printbuf *out) +{ + bch2_printbuf_indent_add(out, 2); +} + +static inline void printbuf_indent_sub_2(struct printbuf *out) +{ + bch2_printbuf_indent_sub(out, 2); +} + +DEFINE_GUARD(printbuf_indent, struct printbuf *, + printbuf_indent_add_2(_T), + printbuf_indent_sub_2(_T)); + #endif /* _BCACHEFS_PRINTBUF_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 25bf72dc6488..35aff96bf12a 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -43,6 +43,55 @@ static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k)); } +void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, + const struct bch_extent_rebalance *r) +{ + prt_printf(out, "replicas=%u", r->data_replicas); + if (r->data_replicas_from_inode) + prt_str(out, " (inode)"); + + prt_str(out, " checksum="); + bch2_prt_csum_opt(out, r->data_checksum); + if (r->data_checksum_from_inode) + prt_str(out, " (inode)"); + + if (r->background_compression || r->background_compression_from_inode) { + prt_str(out, " background_compression="); + bch2_compression_opt_to_text(out, r->background_compression); + + if (r->background_compression_from_inode) + prt_str(out, " (inode)"); + } + + if (r->background_target || r->background_target_from_inode) { + prt_str(out, " background_target="); + if (c) + bch2_target_to_text(out, c, r->background_target); + else + prt_printf(out, "%u", r->background_target); + + if (r->background_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->promote_target || r->promote_target_from_inode) { + prt_str(out, " promote_target="); + if (c) + bch2_target_to_text(out, c, r->promote_target); + else + prt_printf(out, "%u", r->promote_target); + + if (r->promote_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->erasure_code || r->erasure_code_from_inode) { + prt_printf(out, " ec=%u", r->erasure_code); + if (r->erasure_code_from_inode) + prt_str(out, " (inode)"); + } +} + static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, struct bch_io_opts *opts, struct bkey_s_c k, @@ -661,7 +710,7 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) prt_str(out, bch2_rebalance_state_strs[r->state]); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); switch (r->state) { case BCH_REBALANCE_waiting: { @@ -700,8 +749,6 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); put_task_struct(t); } - - printbuf_indent_sub(out, 2); } void bch2_rebalance_stop(struct bch_fs *c) diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 7a565ea7dbfc..4a8812a65c61 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -26,6 +26,9 @@ static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_f return r; }; +void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *, + const struct bch_extent_rebalance *); + u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); int bch2_get_update_rebalance_opts(struct btree_trans *, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 6319144a440c..8679c8aad0e7 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -15,6 +15,7 @@ #include "error.h" #include "journal_io.h" #include "journal_reclaim.h" +#include "journal_sb.h" #include "journal_seq_blacklist.h" #include "logged_ops.h" #include "move.h" @@ -67,9 +68,12 @@ int bch2_btree_lost_data(struct bch_fs *c, #endif write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent); switch (btree) { case BTREE_ID_alloc: @@ -644,6 +648,10 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "recovering from clean shutdown, journal seq %llu", le64_to_cpu(clean->journal_seq)); + + ret = bch2_sb_journal_sort(c); + if (ret) + goto err; } else { bch_info(c, "recovering from unclean shutdown"); } diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h index 2696eee00345..d5654de64e4c 100644 --- a/fs/bcachefs/recovery_passes_format.h +++ b/fs/bcachefs/recovery_passes_format.h @@ -29,6 +29,7 @@ x(stripes_read, 1, 0) \ x(initialize_subvolumes, 2, 0) \ x(snapshots_read, 3, PASS_ALWAYS) \ + x(delete_dead_interior_snapshots, 44, 0) \ x(check_allocations, 5, PASS_FSCK_ALLOC) \ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index 41a259eab4fb..b356e80135fd 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -54,23 +54,41 @@ static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f, return 0; } +static int error_entry_cmp(const void *_l, const void *_r) +{ + const struct bch_sb_field_error_entry *l = _l; + const struct bch_sb_field_error_entry *r = _r; + + return -cmp_int(l->last_error_time, r->last_error_time); +} + static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) { struct bch_sb_field_errors *e = field_to_type(f, errors); - unsigned i, nr = bch2_sb_field_errors_nr_entries(e); + unsigned nr = bch2_sb_field_errors_nr_entries(e); + + struct bch_sb_field_error_entry *sorted = kvmalloc_array(nr, sizeof(*sorted), GFP_KERNEL); + + if (sorted) + sort(sorted, nr, sizeof(*sorted), error_entry_cmp, NULL); + else + sorted = e->entries; if (out->nr_tabstops <= 1) printbuf_tabstop_push(out, 16); - for (i = 0; i < nr; i++) { - bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); + for (struct bch_sb_field_error_entry *i = sorted; i < sorted + nr; i++) { + bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i)); prt_tab(out); - prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i])); + prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i)); prt_tab(out); - bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time)); + bch2_prt_datetime(out, le64_to_cpu(i->last_error_time)); prt_newline(out); } + + if (sorted != e->entries) + kvfree(sorted); } const struct bch_sb_field_ops bch_sb_field_ops_errors = { diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index aa0ea1ec9f10..7c6f18a1ee2a 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -329,6 +329,7 @@ enum bch_fsck_flags { x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(accounting_key_nr_counters_wrong, 307, FSCK_AUTOFIX) \ x(accounting_key_underflow, 325, FSCK_AUTOFIX) \ + x(accounting_key_version_out_of_order, 326, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ @@ -337,7 +338,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 326, 0) + x(MAX, 327, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index d26a0ca4a59d..963f8c2690c9 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -36,12 +36,10 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { - if (dev != BCH_SB_MEMBER_INVALID) { + if (dev != BCH_SB_MEMBER_INVALID) bch2_fs_inconsistent(c, "pointer to %s device %u", test_bit(dev, c->devs_removed.d) ? "removed" : "nonexistent", dev); - dump_stack(); - } } void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) @@ -287,10 +285,9 @@ static void member_to_text(struct printbuf *out, return; prt_printf(out, "Device:\t%u\n", idx); + guard(printbuf_indent)(out); - printbuf_indent_add(out, 2); bch2_member_to_text(out, &m, gi, sb, idx); - printbuf_indent_sub(out, 2); } static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -437,21 +434,19 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) prt_str(out, "IO errors since filesystem creation"); prt_newline(out); - printbuf_indent_add(out, 2); - for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) - prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) + prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); prt_str(out, "IO errors since "); bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); prt_str(out, " ago"); prt_newline(out); - printbuf_indent_add(out, 2); - for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) - prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], - atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) + prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], + atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); } void bch2_dev_errors_reset(struct bch_dev *ca) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index eab0c1e3ff56..00546b59dca6 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -309,7 +309,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); - t->state = !BCH_SNAPSHOT_DELETED(s.v) + t->state = !BCH_SNAPSHOT_DELETED(s.v) && !BCH_SNAPSHOT_NO_KEYS(s.v) ? SNAPSHOT_ID_live : SNAPSHOT_ID_deleted; t->parent = le32_to_cpu(s.v->parent); @@ -1101,6 +1101,20 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) return 0; } +static int bch2_snapshot_node_set_no_keys(struct btree_trans *trans, u32 id) +{ + struct bkey_i_snapshot *s = + bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot); + int ret = PTR_ERR_OR_ZERO(s); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", id); + if (unlikely(ret)) + return ret; + + SET_BCH_SNAPSHOT_NO_KEYS(&s->v, true); + s->v.subvol = 0; + return 0; +} + static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) { if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) @@ -1783,22 +1797,9 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) if (ret) goto err; } - - /* - * Fixing children of deleted snapshots can't be done completely - * atomically, if we crash between here and when we delete the interior - * nodes some depth fields will be off: - */ - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); - if (ret) - goto err; - darray_for_each(d->delete_interior, i) { ret = commit_do(trans, NULL, NULL, 0, - bch2_snapshot_node_delete(trans, i->id)); + bch2_snapshot_node_set_no_keys(trans, i->id)); if (!bch2_err_matches(ret, EROFS)) bch_err_msg(c, ret, "deleting snapshot %u", i->id); if (ret) @@ -1887,6 +1888,66 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, return ret; } +static int bch2_get_dead_interior_snapshots(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + + if (k.k->type == KEY_TYPE_snapshot && + BCH_SNAPSHOT_NO_KEYS(bkey_s_c_to_snapshot(k).v)) { + struct snapshot_interior_delete n = { + .id = k.k->p.offset, + .live_child = live_child(c, k.k->p.offset), + }; + + if (!n.live_child) { + bch_err(c, "error finding live child of snapshot %u", n.id); + return -EINVAL; + } + + return darray_push(&c->snapshot_delete.delete_interior, n); + } + + return 0; +} + +int bch2_delete_dead_interior_snapshots(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MAX, 0, k, + bch2_get_dead_interior_snapshots(trans, k)); + if (ret) + goto err; + + struct snapshot_delete *d = &c->snapshot_delete; + if (d->delete_interior.nr) { + /* + * Fixing children of deleted snapshots can't be done completely + * atomically, if we crash between here and when we delete the interior + * nodes some depth fields will be off: + */ + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, + BTREE_ITER_intent, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); + if (ret) + goto err; + + darray_for_each(d->delete_interior, i) { + ret = commit_do(trans, NULL, NULL, 0, + bch2_snapshot_node_delete(trans, i->id)); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "deleting snapshot %u", i->id); + if (ret) + goto err; + } + + darray_exit(&d->delete_interior); + } +err: + bch_err_fn(c, ret); + return ret; +} + static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) { /* If there's one child, it's redundant and keys will be moved to the child */ @@ -1895,13 +1956,18 @@ static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) { + struct bch_fs *c = trans->c; + if (k.k->type != KEY_TYPE_snapshot) return 0; - struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_WILL_DELETE(snap.v) || - interior_snapshot_needs_delete(snap)) - set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); + struct bkey_s_c_snapshot s= bkey_s_c_to_snapshot(k); + + if (BCH_SNAPSHOT_NO_KEYS(s.v)) + c->recovery.passes_to_run |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_interior_snapshots); + if (BCH_SNAPSHOT_WILL_DELETE(s.v) || + interior_snapshot_needs_delete(s)) + set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); return 0; } @@ -1909,6 +1975,15 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct int bch2_snapshots_read(struct bch_fs *c) { /* + * It's important that we check if we need to reconstruct snapshots + * before going RW, so we mark that pass as required in the superblock - + * otherwise, we could end up deleting keys with missing snapshot nodes + * instead + */ + BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && + test_bit(BCH_FS_may_go_rw, &c->flags)); + + /* * Initializing the is_ancestor bitmaps requires ancestors to already be * initialized - so mark in reverse: */ @@ -1919,15 +1994,6 @@ int bch2_snapshots_read(struct bch_fs *c) bch2_check_snapshot_needs_deletion(trans, k)); bch_err_fn(c, ret); - /* - * It's important that we check if we need to reconstruct snapshots - * before going RW, so we mark that pass as required in the superblock - - * otherwise, we could end up deleting keys with missing snapshot nodes - * instead - */ - BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && - test_bit(BCH_FS_may_go_rw, &c->flags)); - return ret; } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 28d9a29a1fd0..65d43a7ab877 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -291,6 +291,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *); void bch2_delete_dead_snapshots_async(struct bch_fs *); void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *); +int bch2_delete_dead_interior_snapshots(struct bch_fs *); int bch2_snapshots_read(struct bch_fs *); void bch2_fs_snapshots_exit(struct bch_fs *); void bch2_fs_snapshots_init_early(struct bch_fs *); diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h index 9bccae1f3590..444885106140 100644 --- a/fs/bcachefs/snapshot_format.h +++ b/fs/bcachefs/snapshot_format.h @@ -15,10 +15,35 @@ struct bch_snapshot { bch_le128 btime; }; +/* + * WILL_DELETE: leaf node that's no longer referenced by a subvolume, still has + * keys, will be deleted by delete_dead_snapshots + * + * SUBVOL: true if a subvol points to this snapshot (why do we have this? + * subvols are nonzero) + * + * DELETED: we never delete snapshot keys, we mark them as deleted so that we + * can distinguish between a key for a missing snapshot (and we have no idea + * what happened) and a key for a deleted snapshot (delete_dead_snapshots() missed + * something, key should be deleted) + * + * NO_KEYS: we don't remove interior snapshot nodes from snapshot trees at + * runtime, since we can't do the adjustements for the depth/skiplist field + * atomically - and that breaks e.g. is_ancestor(). Instead, we mark it to be + * deleted at the next remount; this tells us that we don't need to run the full + * delete_dead_snapshots(). + * + * + * XXX - todo item: + * + * We should guard against a bitflip causing us to delete a snapshot incorrectly + * by cross checking with the subvolume btree: delete_dead_snapshots() can take + * out more data than any other codepath if it runs incorrectly + */ LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1) -/* True if a subvolume points to this snapshot node: */ LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3) +LE32_BITMASK(BCH_SNAPSHOT_NO_KEYS, struct bch_snapshot, flags, 3, 4) /* * Snapshot trees: diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 61eeac671283..98d31a1f9630 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1516,8 +1516,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_newline(out); prt_printf(out, "Options:"); prt_newline(out); - printbuf_indent_add(out, 2); - { + scoped_guard(printbuf_indent, out) { enum bch_opt_id id; for (id = 0; id < bch2_opts_nr; id++) { @@ -1534,15 +1533,12 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, } } - printbuf_indent_sub(out, 2); - if (print_layout) { prt_newline(out); prt_printf(out, "layout:"); prt_newline(out); - printbuf_indent_add(out, 2); - bch2_sb_layout_to_text(out, &sb->layout); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + bch2_sb_layout_to_text(out, &sb->layout); } vstruct_for_each(sb, f) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index d640ae188722..32b12311928e 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -277,6 +277,17 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) return c; } +void bch2_devs_list_to_text(struct printbuf *out, struct bch_devs_list *d) +{ + prt_char(out, '['); + darray_for_each(*d, i) { + if (i != d->data) + prt_char(out, ' '); + prt_printf(out, "%u", *i); + } + prt_char(out, ']'); +} + /* Filesystem RO/RW: */ /* @@ -461,9 +472,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou bch2_fs_read_only_async(c); wake_up(&bch2_read_only_wait); - if (ret) + if (ret) { prt_printf(out, "emergency read only at seq %llu\n", journal_cur_seq(&c->journal)); + bch2_prt_task_backtrace(out, current, 2, out->atomic ? GFP_ATOMIC : GFP_KERNEL); + } return ret; } @@ -1769,7 +1782,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb, struct printbuf *err) { - unsigned ret; + int ret; if (bch2_dev_is_online(ca)) { prt_printf(err, "already have device online in slot %u\n", diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index d13dbf2b8227..351dc5911645 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -16,6 +16,8 @@ extern const char * const bch2_dev_write_refs[]; struct bch_fs *bch2_dev_to_fs(dev_t); struct bch_fs *bch2_uuid_to_fs(__uuid_t); +void bch2_devs_list_to_text(struct printbuf *, struct bch_devs_list *); + bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, enum bch_member_state, int, struct printbuf *); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 2ded7f3c835f..2a9462275f92 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -415,45 +415,41 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats printbuf_tabstop_push(out, TABSTOP_SIZE); prt_printf(out, "duration of events\n"); - printbuf_indent_add(out, 2); - - pr_name_and_units(out, "min:", stats->min_duration); - pr_name_and_units(out, "max:", stats->max_duration); - pr_name_and_units(out, "total:", stats->total_duration); - - prt_printf(out, "mean:\t"); - bch2_pr_time_units_aligned(out, d_mean); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); - prt_newline(out); - - prt_printf(out, "stddev:\t"); - bch2_pr_time_units_aligned(out, d_stddev); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + scoped_guard(printbuf_indent, out) { + pr_name_and_units(out, "min:", stats->min_duration); + pr_name_and_units(out, "max:", stats->max_duration); + pr_name_and_units(out, "total:", stats->total_duration); + + prt_printf(out, "mean:\t"); + bch2_pr_time_units_aligned(out, d_mean); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); - printbuf_indent_sub(out, 2); - prt_newline(out); + prt_printf(out, "stddev:\t"); + bch2_pr_time_units_aligned(out, d_stddev); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); + } prt_printf(out, "time between events\n"); - printbuf_indent_add(out, 2); - - pr_name_and_units(out, "min:", stats->min_freq); - pr_name_and_units(out, "max:", stats->max_freq); - - prt_printf(out, "mean:\t"); - bch2_pr_time_units_aligned(out, f_mean); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); - prt_newline(out); - - prt_printf(out, "stddev:\t"); - bch2_pr_time_units_aligned(out, f_stddev); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + scoped_guard(printbuf_indent, out) { + pr_name_and_units(out, "min:", stats->min_freq); + pr_name_and_units(out, "max:", stats->max_freq); + + prt_printf(out, "mean:\t"); + bch2_pr_time_units_aligned(out, f_mean); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); - printbuf_indent_sub(out, 2); - prt_newline(out); + prt_printf(out, "stddev:\t"); + bch2_pr_time_units_aligned(out, f_stddev); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); + } printbuf_tabstops_reset(out); |