summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/alloc_background.c3
-rw-r--r--fs/bcachefs/alloc_foreground.c27
-rw-r--r--fs/bcachefs/backpointers.c100
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/btree_cache.h2
-rw-r--r--fs/bcachefs/btree_io.c2
-rw-r--r--fs/bcachefs/btree_iter.c166
-rw-r--r--fs/bcachefs/btree_locking.c5
-rw-r--r--fs/bcachefs/btree_node_scan.c3
-rw-r--r--fs/bcachefs/btree_update_interior.c61
-rw-r--r--fs/bcachefs/data_update.c9
-rw-r--r--fs/bcachefs/debug.c47
-rw-r--r--fs/bcachefs/disk_accounting.c25
-rw-r--r--fs/bcachefs/error.c2
-rw-r--r--fs/bcachefs/extents.c81
-rw-r--r--fs/bcachefs/inode.c3
-rw-r--r--fs/bcachefs/io_read.c12
-rw-r--r--fs/bcachefs/io_write.c7
-rw-r--r--fs/bcachefs/journal.c35
-rw-r--r--fs/bcachefs/journal_io.c3
-rw-r--r--fs/bcachefs/journal_reclaim.c4
-rw-r--r--fs/bcachefs/journal_sb.c45
-rw-r--r--fs/bcachefs/journal_sb.h1
-rw-r--r--fs/bcachefs/move.c19
-rw-r--r--fs/bcachefs/printbuf.h14
-rw-r--r--fs/bcachefs/rebalance.c53
-rw-r--r--fs/bcachefs/rebalance.h3
-rw-r--r--fs/bcachefs/recovery.c8
-rw-r--r--fs/bcachefs/recovery_passes_format.h1
-rw-r--r--fs/bcachefs/sb-errors.c28
-rw-r--r--fs/bcachefs/sb-errors_format.h3
-rw-r--r--fs/bcachefs/sb-members.c23
-rw-r--r--fs/bcachefs/snapshot.c122
-rw-r--r--fs/bcachefs/snapshot.h1
-rw-r--r--fs/bcachefs/snapshot_format.h27
-rw-r--r--fs/bcachefs/super-io.c10
-rw-r--r--fs/bcachefs/super.c17
-rw-r--r--fs/bcachefs/super.h2
-rw-r--r--fs/bcachefs/util.c66
39 files changed, 640 insertions, 401 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 3fc728efbf5c..b6850b15494d 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -344,7 +344,7 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *
struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL;
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
bch2_prt_data_type(out, a->data_type);
@@ -367,7 +367,6 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *
if (ca)
prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca));
prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a));
- printbuf_indent_sub(out, 2);
bch2_dev_put(ca);
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index f6ea4a8272d0..3d125ee81663 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1491,10 +1491,9 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
prt_newline(out);
- printbuf_indent_add(out, 2);
- open_bucket_for_each(c, &wp->ptrs, ob, i)
- bch2_open_bucket_to_text(out, c, ob);
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ bch2_open_bucket_to_text(out, c, ob);
}
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
@@ -1586,9 +1585,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n");
- printbuf_indent_add(&buf, 2);
- bch2_fs_alloc_debug_to_text(&buf, c);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_fs_alloc_debug_to_text(&buf, c);
prt_newline(&buf);
bch2_printbuf_make_room(&buf, 4096);
@@ -1597,23 +1595,20 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
guard(printbuf_atomic)(&buf);
for_each_online_member_rcu(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
- printbuf_indent_add(&buf, 2);
- bch2_dev_alloc_debug_to_text(&buf, ca);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_dev_alloc_debug_to_text(&buf, ca);
prt_newline(&buf);
}
}
prt_printf(&buf, "Copygc debug:\n");
- printbuf_indent_add(&buf, 2);
- bch2_copygc_wait_to_text(&buf, c);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_copygc_wait_to_text(&buf, c);
prt_newline(&buf);
prt_printf(&buf, "Journal debug:\n");
- printbuf_indent_add(&buf, 2);
- bch2_journal_debug_to_text(&buf, &c->journal);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_journal_debug_to_text(&buf, &c->journal);
bch2_print_str(c, KERN_ERR, buf.buf);
}
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index cb25cddb759b..6aeb1c876619 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -117,7 +117,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
prt_printf(&buf, "existing backpointer found when inserting ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
prt_newline(&buf);
- printbuf_indent_add(&buf, 2);
+ guard(printbuf_indent)(&buf);
prt_printf(&buf, "found ");
bch2_bkey_val_to_text(&buf, c, found_bp);
@@ -127,7 +127,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, orig_k);
} else if (!will_check) {
prt_printf(&buf, "backpointer not found when deleting\n");
- printbuf_indent_add(&buf, 2);
+ guard(printbuf_indent)(&buf);
prt_printf(&buf, "searching for ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
@@ -278,9 +278,20 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
bp.v->level - 1,
0);
struct btree *b = bch2_btree_iter_peek_node(iter);
- if (IS_ERR_OR_NULL(b))
+ if (IS_ERR(b))
goto err;
+ if (!b) {
+ /* Backpointer for nonexistent tree depth: */
+ bkey_init(&iter->k);
+ iter->k.p = bp.v->pos;
+ struct bkey_s_c k = { &iter->k };
+
+ int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit);
+ b = ret ? ERR_PTR(ret) : NULL;
+ goto err;
+ }
+
BUG_ON(b->c.level != bp.v->level - 1);
if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
@@ -862,17 +873,25 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
}
}
-static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
+static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos,
+ struct bkey_buf *last_flushed);
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
bool *had_mismatch,
- struct bkey_buf *last_flushed)
+ struct bkey_buf *last_flushed,
+ struct bpos *last_pos,
+ unsigned *nr_iters)
{
struct bch_fs *c = trans->c;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
bool need_commit = false;
+ if (!bpos_eq(*last_pos, alloc_k.k->p))
+ *nr_iters = 0;
+
+ *last_pos = alloc_k.k->p;
+
*had_mismatch = false;
if (a->data_type == BCH_DATA_sb ||
@@ -926,6 +945,46 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
return ret;
}
+ if (sectors[ALLOC_dirty] > a->dirty_sectors ||
+ sectors[ALLOC_cached] > a->cached_sectors ||
+ sectors[ALLOC_stripe] > a->stripe_sectors) {
+ if (*nr_iters) {
+ CLASS(printbuf, buf)();
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n"
+ "bucket %llu:%llu data type %s, counters\n",
+ alloc_k.k->p.inode,
+ alloc_k.k->p.offset,
+ __bch2_data_types[a->data_type]);
+ if (sectors[ALLOC_dirty] > a->dirty_sectors)
+ prt_printf(&buf, "dirty: %u > %u\n",
+ sectors[ALLOC_dirty], a->dirty_sectors);
+ if (sectors[ALLOC_cached] > a->cached_sectors)
+ prt_printf(&buf, "cached: %u > %u\n",
+ sectors[ALLOC_cached], a->cached_sectors);
+ if (sectors[ALLOC_stripe] > a->stripe_sectors)
+ prt_printf(&buf, "stripe: %u > %u\n",
+ sectors[ALLOC_stripe], a->stripe_sectors);
+
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_start(ca, alloc_k.k->p),
+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+ prt_newline(&buf);
+ }
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ __WARN();
+ return ret;
+ }
+
+ *nr_iters += 1;
+
+ return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p, last_flushed) ?:
+ bch_err_throw(c, transaction_restart_nested);
+ }
+
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
sectors[ALLOC_cached] != a->cached_sectors ||
sectors[ALLOC_stripe] != a->stripe_sectors) {
@@ -943,13 +1002,6 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
return ret;
}
- if (sectors[ALLOC_dirty] > a->dirty_sectors ||
- sectors[ALLOC_cached] > a->cached_sectors ||
- sectors[ALLOC_stripe] > a->stripe_sectors) {
- return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
- bch_err_throw(c, transaction_restart_nested);
- }
-
bool empty = (sectors[ALLOC_dirty] +
sectors[ALLOC_stripe] +
sectors[ALLOC_cached]) == 0;
@@ -1113,6 +1165,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
CLASS(btree_trans, trans)(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
+ struct bpos last_pos = POS_MIN;
+ unsigned nr_iters = 0;
bch2_bkey_buf_init(&s.last_flushed);
bkey_init(&s.last_flushed.k->k);
@@ -1121,7 +1175,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
POS_MIN, BTREE_ITER_prefetch, k, ({
bool had_mismatch;
bch2_fs_going_ro(c) ?:
- check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
+ check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed,
+ &last_pos, &nr_iters);
}));
if (ret)
goto err;
@@ -1189,7 +1244,11 @@ static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
if (ret)
return ret;
- return check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
+ struct bpos last_pos = POS_MIN;
+ unsigned nr_iters = 0;
+ return check_bucket_backpointer_mismatch(trans, k, had_mismatch,
+ last_flushed,
+ &last_pos, &nr_iters);
}
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
@@ -1253,22 +1312,21 @@ static int check_one_backpointer(struct btree_trans *trans,
}
static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
- struct bch_dev *ca, struct bpos bucket)
+ struct bch_dev *ca, struct bpos bucket,
+ struct bkey_buf *last_flushed)
{
u32 restart_count = trans->restart_count;
- struct bkey_buf last_flushed;
- bch2_bkey_buf_init(&last_flushed);
- bkey_init(&last_flushed.k->k);
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket),
bucket_pos_to_bp_end(ca, bucket),
0, k,
- check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
+ check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, last_flushed)
);
- bch2_bkey_buf_exit(&last_flushed, trans->c);
- return ret ?: trans_was_restarted(trans, restart_count);
+ return ret ?:
+ bch2_btree_write_buffer_flush_sync(trans) ?: /* make sure bad backpointers that were deleted are visible */
+ trans_was_restarted(trans, restart_count);
}
static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 16d08dfb5f19..0ede47f62129 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -523,6 +523,7 @@ struct discard_in_flight {
x(journal_read) \
x(fs_journal_alloc) \
x(fs_resize_on_mount) \
+ x(sb_journal_sort) \
x(btree_node_read) \
x(btree_node_read_all_replicas) \
x(btree_node_scrub) \
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 035b2cb25077..49d0be6405d8 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -166,7 +166,7 @@ void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
do { \
if (trace_##event##_enabled()) { \
CLASS(printbuf, buf)(); \
- printbuf_indent_add(&buf, 2); \
+ guard(printbuf_indent)(&buf); \
bch2_btree_pos_to_text(&buf, c, b); \
trace_##event(c, buf.buf); \
} \
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 2e3dd9bacac5..34ec1a90980d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -2523,7 +2523,7 @@ do_write:
if (trace_btree_node_write_enabled()) {
CLASS(printbuf, buf)();
- printbuf_indent_add(&buf, 2);
+ guard(printbuf_indent)(&buf);
prt_printf(&buf, "offset %u sectors %u bytes %u\n",
b->written,
sectors_to_write,
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 1e152c671bd7..b72ed543d9c0 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -137,18 +137,8 @@ static void __bch2_btree_path_verify_cached(struct btree_trans *trans,
static void __bch2_btree_path_verify_level(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
- struct btree_path_level *l;
- struct btree_node_iter tmp;
- bool locked;
- struct bkey_packed *p, *k;
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
- struct printbuf buf3 = PRINTBUF;
- const char *msg;
-
- l = &path->l[level];
- tmp = l->iter;
- locked = btree_node_locked(path, level);
+ struct btree_path_level *l = &path->l[level];
+ bool locked = btree_node_locked(path, level);
if (path->cached) {
if (!level)
@@ -166,51 +156,68 @@ static void __bch2_btree_path_verify_level(struct btree_trans *trans,
bch2_btree_node_iter_verify(&l->iter, l->b);
- /*
- * For interior nodes, the iterator will have skipped past deleted keys:
- */
- p = level
+ /* For interior nodes, the iterator may have skipped past deleted keys: */
+ struct btree_node_iter tmp = l->iter;
+ const struct bkey_packed *p = level
? bch2_btree_node_iter_prev(&tmp, l->b)
: bch2_btree_node_iter_prev_all(&tmp, l->b);
- k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+ tmp = l->iter;
+ const struct bkey_packed *k = level
+ ? bch2_btree_node_iter_peek(&tmp, l->b)
+ : bch2_btree_node_iter_peek_all(&tmp, l->b);
- if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
- msg = "before";
- goto err;
- }
+ const char *msg;
+ if (!(level > path->level && trans->journal_replay_not_finished)) {
+ /*
+ * We can't run these checks for interior nodes when we're still
+ * using the journal overlay because there might be a key in
+ * the interior node that points midway through the current leaf
+ * node - which is deleted in the journal overlay, but set_pos()
+ * will skip past it and cause the interior node iterators to be
+ * inconsistent in a way that doesn't matter and it can't check
+ * for.
+ */
- if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
- msg = "after";
- goto err;
+ if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
+ msg = "before";
+ goto err;
+ }
+
+ if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
+ msg = "after";
+ goto err;
+ }
}
if (!locked)
btree_node_unlock(trans, path, level);
return;
err:
- bch2_bpos_to_text(&buf1, path->pos);
+ {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "path should be %s key at level %u", msg, level);
- if (p) {
- struct bkey uk = bkey_unpack_key(l->b, p);
+ prt_str(&buf, "\npath pos ");
+ bch2_bpos_to_text(&buf, path->pos);
- bch2_bkey_to_text(&buf2, &uk);
- } else {
- prt_printf(&buf2, "(none)");
- }
+ prt_str(&buf, "\nprev key ");
+ if (p) {
+ struct bkey uk = bkey_unpack_key(l->b, p);
+ bch2_bkey_to_text(&buf, &uk);
+ } else {
+ prt_printf(&buf, "(none)");
+ }
- if (k) {
- struct bkey uk = bkey_unpack_key(l->b, k);
+ prt_str(&buf, "\ncur key ");
+ if (k) {
+ struct bkey uk = bkey_unpack_key(l->b, k);
+ bch2_bkey_to_text(&buf, &uk);
+ } else {
+ prt_printf(&buf, "(none)");
+ }
- bch2_bkey_to_text(&buf3, &uk);
- } else {
- prt_printf(&buf3, "(none)");
+ panic("%s\n", buf.buf);
}
-
- panic("path should be %s key at level %u:\n"
- "path pos %s\n"
- "prev key %s\n"
- "cur key %s\n",
- msg, level, buf1.buf, buf2.buf, buf3.buf);
}
static void __bch2_btree_path_verify(struct btree_trans *trans,
@@ -886,28 +893,53 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
btree_node_unlock(trans, path, plevel);
}
+static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ CLASS(printbuf, buf)();
+
+ prt_str(&buf, "node not found at pos: ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, "\n within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+ prt_newline(&buf);
+
+ return __bch2_topology_error(c, &buf);
+}
+
+static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans,
+ struct btree_path *path,
+ struct bkey_i *k)
+{
+ struct bch_fs *c = trans->c;
+ CLASS(printbuf, buf)();
+
+ prt_str(&buf, "node doesn't cover expected range at pos: ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, "\n within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+ prt_str(&buf, "\n but got node: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+ prt_newline(&buf);
+
+ return __bch2_topology_error(c, &buf);
+}
+
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
struct btree_path *path,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
struct btree_path_level *l = path_l(path);
- struct btree_and_journal_iter jiter;
- struct bkey_s_c k;
int ret = 0;
+ struct btree_and_journal_iter jiter;
__bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
- k = bch2_btree_and_journal_iter_peek(c, &jiter);
+ struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &jiter);
if (!k.k) {
- CLASS(printbuf, buf)();
-
- prt_str(&buf, "node not found at pos ");
- bch2_bpos_to_text(&buf, path->pos);
- prt_str(&buf, " at btree ");
- bch2_btree_pos_to_text(&buf, c, l->b);
-
- ret = bch2_fs_topology_error(c, "%s", buf.buf);
+ ret = btree_node_missing_err(trans, path);
goto err;
}
@@ -922,20 +954,16 @@ err:
return ret;
}
-static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
- struct btree_path *path)
+static inline bool bpos_in_btree_node_key(struct bpos pos, const struct bkey_i *k)
{
- struct bch_fs *c = trans->c;
- CLASS(printbuf, buf)();
+ if (bpos_gt(pos, k->k.p))
+ return false;
- prt_str(&buf, "node not found at pos ");
- bch2_bpos_to_text(&buf, path->pos);
- prt_str(&buf, " within parent node ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+ if (k->k.type == KEY_TYPE_btree_ptr_v2 &&
+ bpos_lt(pos, bkey_i_to_btree_ptr_v2_c(k)->v.min_key))
+ return false;
- bch2_fs_fatal_error(c, "%s", buf.buf);
- printbuf_exit(&buf);
- return bch_err_throw(c, btree_need_topology_repair);
+ return true;
}
static __always_inline int btree_path_down(struct btree_trans *trans,
@@ -971,6 +999,9 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
}
}
+ if (unlikely(!bpos_in_btree_node_key(path->pos, &trans->btree_path_down)))
+ return btree_node_gap_err(trans, path, &trans->btree_path_down);
+
b = bch2_btree_node_get(trans, path, &trans->btree_path_down,
level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
@@ -1476,7 +1507,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
{
prt_printf(buf, "%u transaction updates for %s journal seq %llu\n",
trans->nr_updates, trans->fn, trans->journal_res.seq);
- printbuf_indent_add(buf, 2);
+ guard(printbuf_indent)(buf);
trans_for_each_update(trans, i) {
struct bkey_s_c old = { &i->old_k, i->old_v };
@@ -1502,8 +1533,6 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
bch2_journal_entry_to_text(buf, trans->c, e);
prt_newline(buf);
}
-
- printbuf_indent_sub(buf, 2);
}
static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
@@ -1556,8 +1585,8 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt
prt_printf(out, " uptodate %u locks_want %u", path->uptodate, path->locks_want);
prt_newline(out);
+ guard(printbuf_indent)(out);
- printbuf_indent_add(out, 2);
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) {
prt_printf(out, "l=%u locks %s seq %u node ", l,
btree_node_locked_str(btree_node_locked_type(path, l)),
@@ -1570,7 +1599,6 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt
prt_printf(out, "%px", path->l[l].b);
prt_newline(out);
}
- printbuf_indent_sub(out, 2);
}
static noinline __cold
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 38c5643e8a78..a4f8aac448c0 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -205,9 +205,8 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
bch2_btree_trans_to_text(&buf, trans);
prt_printf(&buf, "backtrace:\n");
- printbuf_indent_add(&buf, 2);
- bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
prt_newline(&buf);
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index b618a0bd1186..c0dff992ad60 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -42,12 +42,11 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con
static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
{
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
darray_for_each(nodes, i) {
found_btree_node_to_text(out, c, i);
prt_newline(out);
}
- printbuf_indent_sub(out, 2);
}
static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index a9877a47bfc6..a8cd7a5a6e7d 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -324,9 +324,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct btree *b;
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
- ? BTREE_NODE_RESERVE
- : 0;
int ret;
b = bch2_btree_node_mem_alloc(trans, interior_node);
@@ -334,41 +331,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
return b;
BUG_ON(b->ob.nr);
-
- mutex_lock(&c->btree_reserve_cache_lock);
- if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) {
- guard(spinlock)(&c->freelist_lock);
- if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) {
- if (cl)
- closure_wait(&c->open_buckets_wait, cl);
-
- ret = cl
- ? bch_err_throw(c, bucket_alloc_blocked)
- : bch_err_throw(c, open_buckets_empty);
- mutex_unlock(&c->btree_reserve_cache_lock);
- goto err;
- }
- }
-
- if (c->btree_reserve_cache_nr > nr_reserve) {
- for (struct btree_alloc *a = c->btree_reserve_cache;
- a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) {
- /* check if it has sufficient durability */
-
- if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) {
- bch2_open_buckets_put(c, &a->ob);
- *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- continue;
- }
-
- bkey_copy(&b->key, &a->k);
- b->ob = a->ob;
- *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- mutex_unlock(&c->btree_reserve_cache_lock);
- goto out;
- }
- }
- mutex_unlock(&c->btree_reserve_cache_lock);
retry:
ret = bch2_alloc_sectors_start_trans(trans,
target ?:
@@ -398,12 +360,29 @@ retry:
goto retry;
}
+ mutex_lock(&c->btree_reserve_cache_lock);
+ while (c->btree_reserve_cache_nr) {
+ struct btree_alloc *a = c->btree_reserve_cache + --c->btree_reserve_cache_nr;
+
+ /* check if it has sufficient durability */
+
+ if (can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) {
+ bkey_copy(&b->key, &a->k);
+ b->ob = a->ob;
+ mutex_unlock(&c->btree_reserve_cache_lock);
+ goto out;
+ }
+
+ bch2_open_buckets_put(c, &a->ob);
+ }
+ mutex_unlock(&c->btree_reserve_cache_lock);
+
bkey_btree_ptr_v2_init(&b->key);
bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false);
bch2_open_bucket_get(c, wp, &b->ob);
- bch2_alloc_sectors_done(c, wp);
out:
+ bch2_alloc_sectors_done(c, wp);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -2810,7 +2789,7 @@ static void bch2_btree_alloc_to_text(struct printbuf *out,
struct bch_fs *c,
struct btree_alloc *a)
{
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&a->k));
prt_newline(out);
@@ -2818,8 +2797,6 @@ static void bch2_btree_alloc_to_text(struct printbuf *out,
unsigned i;
open_bucket_for_each(c, &a->ob, ob, i)
bch2_open_bucket_to_text(out, c, ob);
-
- printbuf_indent_sub(out, 2);
}
void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 2c997fddefb3..20b900bee32d 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -657,25 +657,26 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
prt_str_indented(out, "old key:\t");
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
+
+ bch2_write_op_to_text(out, &m->op);
}
void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m)
{
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
if (!m->read_done) {
prt_printf(out, "read:\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_read_bio_to_text(out, m->op.c, &m->rbio);
} else {
prt_printf(out, "write:\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_write_op_to_text(out, &m->op);
}
- printbuf_indent_sub(out, 4);
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 33cb94f70b19..ebfb68e2e035 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -282,16 +282,13 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
le64_to_cpu(i->journal_seq));
offset += sectors;
- printbuf_indent_add(out, 4);
+ scoped_guard(printbuf_indent, out)
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
+ struct bkey u;
- for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
- struct bkey u;
-
- bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
- prt_newline(out);
- }
-
- printbuf_indent_sub(out, 4);
+ bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
+ prt_newline(out);
+ }
}
out:
if (bio)
@@ -468,7 +465,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level);
prt_printf(out, "\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
prt_newline(out);
@@ -488,8 +485,6 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
&b->writes[1].journal, b->writes[1].journal.seq);
prt_printf(out, "ob:\t%u\n", b->ob.nr);
-
- printbuf_indent_sub(out, 2);
}
static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
@@ -605,9 +600,8 @@ restart:
bch2_btree_trans_to_text(&i->buf, trans);
prt_printf(&i->buf, "backtrace:\n");
- printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
prt_newline(&i->buf);
closure_put(&trans->ref);
@@ -765,40 +759,35 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
break;
prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]);
- printbuf_indent_add(&i->buf, 2);
+ guard(printbuf_indent)(&i->buf);
guard(mutex)(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
- printbuf_indent_add(&i->buf, 2);
- bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
#endif
prt_printf(&i->buf, "Transaction duration:\n");
- printbuf_indent_add(&i->buf, 2);
- bch2_time_stats_to_text(&i->buf, &s->duration);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_time_stats_to_text(&i->buf, &s->duration);
if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
prt_printf(&i->buf, "Lock hold times:\n");
- printbuf_indent_add(&i->buf, 2);
- bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
}
if (s->max_paths_text) {
prt_printf(&i->buf, "Maximum allocated btree paths (%u):\n", s->nr_max_paths);
- printbuf_indent_add(&i->buf, 2);
- prt_str_indented(&i->buf, s->max_paths_text);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ prt_str_indented(&i->buf, s->max_paths_text);
}
- printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
i->iter++;
}
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index d6c91abcdc41..f0ebf91cd5fd 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -735,10 +735,12 @@ invalid_device:
goto fsck_err;
}
-static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i)
+static struct journal_key *accumulate_newer_accounting_keys(struct btree_trans *trans, struct journal_key *i)
{
+ struct bch_fs *c = trans->c;
struct journal_keys *keys = &c->journal_keys;
struct bkey_i *k = journal_key_k(c, i);
+ int ret = 0;
darray_for_each_from(*keys, j, i + 1) {
if (journal_key_cmp(c, i, j))
@@ -746,7 +748,18 @@ static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, st
struct bkey_i *n = journal_key_k(c, j);
if (n->k.type == KEY_TYPE_accounting) {
- WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0);
+ if (bversion_cmp(k->k.bversion, n->k.bversion) >= 0) {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "accounting keys with out of order versions:");
+
+ prt_newline(&buf);
+ prt_printf(&buf, "%u.%u ", i->journal_seq_offset, i->journal_offset);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+ prt_newline(&buf);
+ prt_printf(&buf, "%u.%u ", j->journal_seq_offset, j->journal_offset);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(n));
+ fsck_err(trans, accounting_key_version_out_of_order, "%s", buf.buf);
+ }
bch2_accounting_accumulate(bkey_i_to_accounting(k),
bkey_i_to_s_c_accounting(n));
@@ -755,14 +768,16 @@ static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, st
}
return &darray_top(*keys);
+fsck_err:
+ return ERR_PTR(ret);
}
static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i)
{
- struct bch_fs *c = trans->c;
- struct journal_key *next = accumulate_newer_accounting_keys(c, i);
+ struct journal_key *next = accumulate_newer_accounting_keys(trans, i);
- int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i)));
+ int ret = PTR_ERR_OR_ZERO(next) ?:
+ accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(trans->c, i)));
return ret ? ERR_PTR(ret) : next;
}
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index e33f3166c48a..9e69263eb796 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -394,7 +394,7 @@ int bch2_fsck_err_opt(struct bch_fs *c,
flags |= fsck_flags_extra[err];
if (test_bit(BCH_FS_in_fsck, &c->flags) ||
- test_bit(BCH_FS_in_recovery, &c->flags)) {
+ c->opts.fix_errors != FSCK_FIX_exit) {
if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
return bch_err_throw(c, fsck_repair_unimplemented);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 68a61f7bc737..c0d00a692c18 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -1270,14 +1270,14 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
if (!ca) {
- prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ prt_printf(out, "%u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
ptr->cached ? " cached" : "");
} else {
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
- prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ prt_printf(out, "%u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
if (ca->mi.durability != 1)
prt_printf(out, " d=%u", ca->mi.durability);
@@ -1295,7 +1295,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
{
- prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
+ prt_printf(out, "c_size %u size %u offset %u nonce %u csum ",
crc->compressed_size,
crc->uncompressed_size,
crc->offset, crc->nonce);
@@ -1305,72 +1305,34 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr
bch2_prt_compression_type(out, crc->compression_type);
}
-static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
- const struct bch_extent_rebalance *r)
-{
- prt_str(out, "rebalance:");
-
- prt_printf(out, " replicas=%u", r->data_replicas);
- if (r->data_replicas_from_inode)
- prt_str(out, " (inode)");
-
- prt_str(out, " checksum=");
- bch2_prt_csum_opt(out, r->data_checksum);
- if (r->data_checksum_from_inode)
- prt_str(out, " (inode)");
-
- if (r->background_compression || r->background_compression_from_inode) {
- prt_str(out, " background_compression=");
- bch2_compression_opt_to_text(out, r->background_compression);
-
- if (r->background_compression_from_inode)
- prt_str(out, " (inode)");
- }
-
- if (r->background_target || r->background_target_from_inode) {
- prt_str(out, " background_target=");
- if (c)
- bch2_target_to_text(out, c, r->background_target);
- else
- prt_printf(out, "%u", r->background_target);
-
- if (r->background_target_from_inode)
- prt_str(out, " (inode)");
- }
-
- if (r->promote_target || r->promote_target_from_inode) {
- prt_str(out, " promote_target=");
- if (c)
- bch2_target_to_text(out, c, r->promote_target);
- else
- prt_printf(out, "%u", r->promote_target);
-
- if (r->promote_target_from_inode)
- prt_str(out, " (inode)");
- }
-
- if (r->erasure_code || r->erasure_code_from_inode) {
- prt_printf(out, " ec=%u", r->erasure_code);
- if (r->erasure_code_from_inode)
- prt_str(out, " (inode)");
- }
-}
+static const char * const extent_entry_types[] = {
+#define x(t, n, ...) [n] = #t,
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+ NULL
+};
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
- bool first = true;
if (c)
prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
+ guard(printbuf_indent)(out);
+
bkey_extent_entry_for_each(ptrs, entry) {
- if (!first)
- prt_printf(out, " ");
+ prt_newline(out);
- switch (__extent_entry_type(entry)) {
+ unsigned type = __extent_entry_type(entry);
+ if (type < BCH_EXTENT_ENTRY_MAX) {
+ prt_str(out, extent_entry_types[__extent_entry_type(entry)]);
+ prt_str(out, ": ");
+ }
+
+ switch (type) {
case BCH_EXTENT_ENTRY_ptr:
bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
break;
@@ -1387,8 +1349,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
case BCH_EXTENT_ENTRY_stripe_ptr: {
const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
- prt_printf(out, "ec: idx %llu block %u",
- (u64) ec->idx, ec->block);
+ prt_printf(out, "idx %llu block %u", (u64) ec->idx, ec->block);
break;
}
case BCH_EXTENT_ENTRY_rebalance:
@@ -1403,8 +1364,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
return;
}
-
- first = false;
}
}
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 4aa130ff7cf6..bba273d55c37 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -598,7 +598,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
prt_printf(out, "\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "mode=%o\n", inode->bi_mode);
prt_str(out, "flags=");
@@ -620,7 +620,6 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
#undef x
bch2_printbuf_strip_trailing_newline(out);
- printbuf_indent_sub(out, 2);
}
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index e7d53ab1cf55..330db82aba8b 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -408,9 +408,8 @@ void bch2_promote_op_to_text(struct printbuf *out,
{
if (!op->write.read_done) {
prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_read_bio_to_text(out, c, op->write.rbio.parent);
- printbuf_indent_sub(out, 2);
}
bch2_data_update_to_text(out, &op->write);
@@ -1076,8 +1075,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
}
if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
- !orig->data_update)
- return bch_err_throw(c, extent_poisoned);
+ !orig->data_update) {
+ ret = bch_err_throw(c, extent_poisoned);
+ goto err;
+ }
retry_pick:
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
@@ -1517,7 +1518,7 @@ void bch2_read_bio_to_text(struct printbuf *out,
/* Are we in a retry? */
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
u64 now = local_clock();
prt_printf(out, "start_time:\t");
@@ -1551,7 +1552,6 @@ void bch2_read_bio_to_text(struct printbuf *out,
prt_newline(out);
bch2_bio_to_text(out, &rbio->bio);
- printbuf_indent_sub(out, 2);
}
void bch2_fs_io_read_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 1d83dcc9731e..aed22fc7759b 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1742,7 +1742,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
prt_printf(out, "pos:\t");
bch2_bpos_to_text(out, op->pos);
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "started:\t");
bch2_pr_time_units(out, local_clock() - op->start_time);
@@ -1754,11 +1754,12 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
prt_printf(out, "nr_replicas:\t%u\n", op->nr_replicas);
prt_printf(out, "nr_replicas_required:\t%u\n", op->nr_replicas_required);
+ prt_printf(out, "devs_have:\t");
+ bch2_devs_list_to_text(out, &op->devs_have);
+ prt_newline(out);
prt_printf(out, "ref:\t%u\n", closure_nr_remaining(&op->cl));
prt_printf(out, "ret\t%s\n", bch2_err_str(op->error));
-
- printbuf_indent_sub(out, 2);
}
void bch2_fs_io_write_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 93ac0faedf7d..6505c79f8516 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -48,7 +48,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
struct journal_buf *buf = j->buf + i;
prt_printf(out, "seq:\t%llu\n", seq);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
if (!buf->write_started)
prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK));
@@ -81,8 +81,6 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
if (buf->write_done)
prt_str(out, "write_done");
prt_newline(out);
-
- printbuf_indent_sub(out, 2);
}
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
@@ -1767,20 +1765,20 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
bch2_journal_bufs_to_text(out, j);
prt_printf(out, "space:\n");
- printbuf_indent_add(out, 2);
- prt_printf(out, "discarded\t%u:%u\n",
- j->space[journal_space_discarded].next_entry,
- j->space[journal_space_discarded].total);
- prt_printf(out, "clean ondisk\t%u:%u\n",
- j->space[journal_space_clean_ondisk].next_entry,
- j->space[journal_space_clean_ondisk].total);
- prt_printf(out, "clean\t%u:%u\n",
- j->space[journal_space_clean].next_entry,
- j->space[journal_space_clean].total);
- prt_printf(out, "total\t%u:%u\n",
- j->space[journal_space_total].next_entry,
- j->space[journal_space_total].total);
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out) {
+ prt_printf(out, "discarded\t%u:%u\n",
+ j->space[journal_space_discarded].next_entry,
+ j->space[journal_space_discarded].total);
+ prt_printf(out, "clean ondisk\t%u:%u\n",
+ j->space[journal_space_clean_ondisk].next_entry,
+ j->space[journal_space_clean_ondisk].total);
+ prt_printf(out, "clean\t%u:%u\n",
+ j->space[journal_space_clean].next_entry,
+ j->space[journal_space_clean].total);
+ prt_printf(out, "total\t%u:%u\n",
+ j->space[journal_space_total].next_entry,
+ j->space[journal_space_total].total);
+ }
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
if (!ca->mi.durability)
@@ -1796,7 +1794,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "dev %u:\n", ca->dev_idx);
prt_printf(out, "durability %u:\n", ca->mi.durability);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "nr\t%u\n", ja->nr);
prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size);
prt_printf(out, "available\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free);
@@ -1804,7 +1802,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "dirty_ondisk\t%u (seq %llu)\n",ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]);
prt_printf(out, "dirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]);
prt_printf(out, "cur_idx\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
- printbuf_indent_sub(out, 2);
}
prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 44328d02cf67..e6f778bf7763 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -760,8 +760,8 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
return;
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
+ guard(printbuf_indent)(out);
- printbuf_indent_add(out, 2);
for (i = 0; i < nr_types; i++) {
prt_newline(out);
bch2_prt_data_type(out, i);
@@ -770,7 +770,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
le64_to_cpu(u->d[i].sectors),
le64_to_cpu(u->d[i].fragmented));
}
- printbuf_indent_sub(out, 2);
}
static int journal_entry_log_validate(struct bch_fs *c,
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index bd1885607d3e..ae747c87fcf9 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -1019,7 +1019,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
pin_list = journal_seq_pin(j, *seq);
prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "unflushed:\n");
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++)
@@ -1031,8 +1031,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
list_for_each_entry(pin, &pin_list->flushed[i], list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);
- printbuf_indent_sub(out, 2);
-
return false;
}
diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c
index 0cb9b93f13e7..dc0ecedb3a8f 100644
--- a/fs/bcachefs/journal_sb.c
+++ b/fs/bcachefs/journal_sb.c
@@ -30,7 +30,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct bch_sb_field *f,
if (!nr)
return 0;
- b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL);
+ b = kvmalloc_array(nr, sizeof(u64), GFP_KERNEL);
if (!b)
return -BCH_ERR_ENOMEM_sb_journal_validate;
@@ -64,7 +64,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct bch_sb_field *f,
ret = 0;
err:
- kfree(b);
+ kvfree(b);
return ret;
}
@@ -113,7 +113,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
if (!nr)
return 0;
- b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL);
+ b = kvmalloc_array(nr, sizeof(*b), GFP_KERNEL);
if (!b)
return -BCH_ERR_ENOMEM_sb_journal_v2_validate;
@@ -165,7 +165,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
ret = 0;
err:
- kfree(b);
+ kvfree(b);
return ret;
}
@@ -230,3 +230,40 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(dst + 1 != nr_compacted);
return 0;
}
+
+static inline bool journal_v2_unsorted(struct bch_sb_field_journal_v2 *j)
+{
+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(j);
+ for (unsigned i = 0; i + 1 < nr; i++)
+ if (le64_to_cpu(j->d[i].start) > le64_to_cpu(j->d[i + 1].start))
+ return true;
+ return false;
+}
+
+int bch2_sb_journal_sort(struct bch_fs *c)
+{
+ BUG_ON(!c->sb.clean);
+ BUG_ON(test_bit(BCH_FS_rw, &c->flags));
+
+ guard(mutex)(&c->sb_lock);
+ bool write_sb = false;
+
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_journal_sort) {
+ struct bch_sb_field_journal_v2 *j = bch2_sb_field_get(ca->disk_sb.sb, journal_v2);
+ if (!j)
+ continue;
+
+ if ((j && journal_v2_unsorted(j)) ||
+ bch2_sb_field_get(ca->disk_sb.sb, journal)) {
+ struct journal_device *ja = &ca->journal;
+
+ sort(ja->buckets, ja->nr, sizeof(ja->buckets[0]), u64_cmp, NULL);
+ bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr);
+ write_sb = true;
+ }
+ }
+
+ return write_sb
+ ? bch2_write_super(c)
+ : 0;
+}
diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h
index ba40a7e8d90a..e0fc40652607 100644
--- a/fs/bcachefs/journal_sb.h
+++ b/fs/bcachefs/journal_sb.h
@@ -22,3 +22,4 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned);
+int bch2_sb_journal_sort(struct bch_fs *);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 4f41f1f6ec6c..c46a8965a7eb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -121,6 +121,7 @@ struct moving_io {
static void move_free(struct moving_io *io)
{
struct moving_context *ctxt = io->write.ctxt;
+ struct bch_fs *c = io->write.op.c;
if (io->b)
atomic_dec(&io->b->count);
@@ -132,8 +133,9 @@ static void move_free(struct moving_io *io)
if (!io->write.data_opts.scrub) {
bch2_data_update_exit(&io->write);
} else {
- bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio);
+ bch2_bio_free_pages_pool(c, &io->write.op.wbio.bio);
kfree(io->write.bvecs);
+ bch2_bkey_buf_exit(&io->write.k, c);
}
kfree(io);
}
@@ -427,7 +429,9 @@ int bch2_move_extent(struct moving_context *ctxt,
data_opts.scrub ? data_opts.read_dev : -1);
return 0;
err:
+ bch2_bkey_buf_exit(&io->write.k, c);
kfree(io);
+
if (bch2_err_matches(ret, EROFS) ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ret;
@@ -863,7 +867,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
u64 check_mismatch_done = bucket_start;
int ret = 0;
- CLASS(bch2_dev_tryget, ca)(c, dev);
+ /* Userspace might have supplied @dev: */
+ CLASS(bch2_dev_tryget_noerror, ca)(c, dev);
if (!ca)
return 0;
@@ -1400,7 +1405,7 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
prt_str(out, " pos=");
bch2_bbpos_to_text(out, stats->pos);
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved));
prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced));
@@ -1415,8 +1420,6 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
prt_printf(out, "bytes raced:\t");
prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
prt_newline(out);
-
- printbuf_indent_sub(out, 2);
}
static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
@@ -1425,7 +1428,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
printbuf_tabstop_push(out, 32);
bch2_move_stats_to_text(out, ctxt->stats);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "reads: ios %u/%u sectors %u/%u\n",
atomic_read(&ctxt->read_ios),
@@ -1439,15 +1442,13 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
atomic_read(&ctxt->write_sectors),
c->opts.move_bytes_in_flight >> 9);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
scoped_guard(mutex, &ctxt->lock) {
struct moving_io *io;
list_for_each_entry(io, &ctxt->ios, io_list)
bch2_data_update_inflight_to_text(out, &io->write);
}
-
- printbuf_indent_sub(out, 4);
}
void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
index 907e5c97550b..5fa5265d7ba8 100644
--- a/fs/bcachefs/printbuf.h
+++ b/fs/bcachefs/printbuf.h
@@ -299,4 +299,18 @@ DEFINE_GUARD(printbuf_atomic, struct printbuf *,
printbuf_atomic_inc(_T),
printbuf_atomic_dec(_T));
+static inline void printbuf_indent_add_2(struct printbuf *out)
+{
+ bch2_printbuf_indent_add(out, 2);
+}
+
+static inline void printbuf_indent_sub_2(struct printbuf *out)
+{
+ bch2_printbuf_indent_sub(out, 2);
+}
+
+DEFINE_GUARD(printbuf_indent, struct printbuf *,
+ printbuf_indent_add_2(_T),
+ printbuf_indent_sub_2(_T));
+
#endif /* _BCACHEFS_PRINTBUF_H */
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 25bf72dc6488..35aff96bf12a 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -43,6 +43,55 @@ static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s
return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k));
}
+void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
+ const struct bch_extent_rebalance *r)
+{
+ prt_printf(out, "replicas=%u", r->data_replicas);
+ if (r->data_replicas_from_inode)
+ prt_str(out, " (inode)");
+
+ prt_str(out, " checksum=");
+ bch2_prt_csum_opt(out, r->data_checksum);
+ if (r->data_checksum_from_inode)
+ prt_str(out, " (inode)");
+
+ if (r->background_compression || r->background_compression_from_inode) {
+ prt_str(out, " background_compression=");
+ bch2_compression_opt_to_text(out, r->background_compression);
+
+ if (r->background_compression_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->background_target || r->background_target_from_inode) {
+ prt_str(out, " background_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->background_target);
+ else
+ prt_printf(out, "%u", r->background_target);
+
+ if (r->background_target_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->promote_target || r->promote_target_from_inode) {
+ prt_str(out, " promote_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->promote_target);
+ else
+ prt_printf(out, "%u", r->promote_target);
+
+ if (r->promote_target_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->erasure_code || r->erasure_code_from_inode) {
+ prt_printf(out, " ec=%u", r->erasure_code);
+ if (r->erasure_code_from_inode)
+ prt_str(out, " (inode)");
+ }
+}
+
static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c,
struct bch_io_opts *opts,
struct bkey_s_c k,
@@ -661,7 +710,7 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
prt_str(out, bch2_rebalance_state_strs[r->state]);
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
switch (r->state) {
case BCH_REBALANCE_waiting: {
@@ -700,8 +749,6 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL);
put_task_struct(t);
}
-
- printbuf_indent_sub(out, 2);
}
void bch2_rebalance_stop(struct bch_fs *c)
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index 7a565ea7dbfc..4a8812a65c61 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -26,6 +26,9 @@ static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_f
return r;
};
+void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *,
+ const struct bch_extent_rebalance *);
+
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *);
int bch2_get_update_rebalance_opts(struct btree_trans *,
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 6319144a440c..8679c8aad0e7 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -15,6 +15,7 @@
#include "error.h"
#include "journal_io.h"
#include "journal_reclaim.h"
+#include "journal_sb.h"
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
@@ -67,9 +68,12 @@ int bch2_btree_lost_data(struct bch_fs *c,
#endif
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent);
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent);
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent);
switch (btree) {
case BTREE_ID_alloc:
@@ -644,6 +648,10 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq));
+
+ ret = bch2_sb_journal_sort(c);
+ if (ret)
+ goto err;
} else {
bch_info(c, "recovering from unclean shutdown");
}
diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h
index 2696eee00345..d5654de64e4c 100644
--- a/fs/bcachefs/recovery_passes_format.h
+++ b/fs/bcachefs/recovery_passes_format.h
@@ -29,6 +29,7 @@
x(stripes_read, 1, 0) \
x(initialize_subvolumes, 2, 0) \
x(snapshots_read, 3, PASS_ALWAYS) \
+ x(delete_dead_interior_snapshots, 44, 0) \
x(check_allocations, 5, PASS_FSCK_ALLOC) \
x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \
x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index 41a259eab4fb..b356e80135fd 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -54,23 +54,41 @@ static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f,
return 0;
}
+static int error_entry_cmp(const void *_l, const void *_r)
+{
+ const struct bch_sb_field_error_entry *l = _l;
+ const struct bch_sb_field_error_entry *r = _r;
+
+ return -cmp_int(l->last_error_time, r->last_error_time);
+}
+
static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_errors *e = field_to_type(f, errors);
- unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
+ unsigned nr = bch2_sb_field_errors_nr_entries(e);
+
+ struct bch_sb_field_error_entry *sorted = kvmalloc_array(nr, sizeof(*sorted), GFP_KERNEL);
+
+ if (sorted)
+ sort(sorted, nr, sizeof(*sorted), error_entry_cmp, NULL);
+ else
+ sorted = e->entries;
if (out->nr_tabstops <= 1)
printbuf_tabstop_push(out, 16);
- for (i = 0; i < nr; i++) {
- bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
+ for (struct bch_sb_field_error_entry *i = sorted; i < sorted + nr; i++) {
+ bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i));
prt_tab(out);
- prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
+ prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i));
prt_tab(out);
- bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time));
+ bch2_prt_datetime(out, le64_to_cpu(i->last_error_time));
prt_newline(out);
}
+
+ if (sorted != e->entries)
+ kvfree(sorted);
}
const struct bch_sb_field_ops bch_sb_field_ops_errors = {
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index aa0ea1ec9f10..7c6f18a1ee2a 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -329,6 +329,7 @@ enum bch_fsck_flags {
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(accounting_key_nr_counters_wrong, 307, FSCK_AUTOFIX) \
x(accounting_key_underflow, 325, FSCK_AUTOFIX) \
+ x(accounting_key_version_out_of_order, 326, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
@@ -337,7 +338,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 326, 0)
+ x(MAX, 327, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index d26a0ca4a59d..963f8c2690c9 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -36,12 +36,10 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
{
- if (dev != BCH_SB_MEMBER_INVALID) {
+ if (dev != BCH_SB_MEMBER_INVALID)
bch2_fs_inconsistent(c, "pointer to %s device %u",
test_bit(dev, c->devs_removed.d)
? "removed" : "nonexistent", dev);
- dump_stack();
- }
}
void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
@@ -287,10 +285,9 @@ static void member_to_text(struct printbuf *out,
return;
prt_printf(out, "Device:\t%u\n", idx);
+ guard(printbuf_indent)(out);
- printbuf_indent_add(out, 2);
bch2_member_to_text(out, &m, gi, sb, idx);
- printbuf_indent_sub(out, 2);
}
static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f,
@@ -437,21 +434,19 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
prt_str(out, "IO errors since filesystem creation");
prt_newline(out);
- printbuf_indent_add(out, 2);
- for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
- prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
+ prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
prt_str(out, "IO errors since ");
bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
prt_str(out, " ago");
prt_newline(out);
- printbuf_indent_add(out, 2);
- for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
- prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
- atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
+ prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
+ atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
}
void bch2_dev_errors_reset(struct bch_dev *ca)
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index eab0c1e3ff56..00546b59dca6 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -309,7 +309,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
- t->state = !BCH_SNAPSHOT_DELETED(s.v)
+ t->state = !BCH_SNAPSHOT_DELETED(s.v) && !BCH_SNAPSHOT_NO_KEYS(s.v)
? SNAPSHOT_ID_live
: SNAPSHOT_ID_deleted;
t->parent = le32_to_cpu(s.v->parent);
@@ -1101,6 +1101,20 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
return 0;
}
+static int bch2_snapshot_node_set_no_keys(struct btree_trans *trans, u32 id)
+{
+ struct bkey_i_snapshot *s =
+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot);
+ int ret = PTR_ERR_OR_ZERO(s);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", id);
+ if (unlikely(ret))
+ return ret;
+
+ SET_BCH_SNAPSHOT_NO_KEYS(&s->v, true);
+ s->v.subvol = 0;
+ return 0;
+}
+
static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
{
if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
@@ -1783,22 +1797,9 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c)
if (ret)
goto err;
}
-
- /*
- * Fixing children of deleted snapshots can't be done completely
- * atomically, if we crash between here and when we delete the interior
- * nodes some depth fields will be off:
- */
- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
- BTREE_ITER_intent, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
- if (ret)
- goto err;
-
darray_for_each(d->delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
- bch2_snapshot_node_delete(trans, i->id));
+ bch2_snapshot_node_set_no_keys(trans, i->id));
if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "deleting snapshot %u", i->id);
if (ret)
@@ -1887,6 +1888,66 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return ret;
}
+static int bch2_get_dead_interior_snapshots(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+
+ if (k.k->type == KEY_TYPE_snapshot &&
+ BCH_SNAPSHOT_NO_KEYS(bkey_s_c_to_snapshot(k).v)) {
+ struct snapshot_interior_delete n = {
+ .id = k.k->p.offset,
+ .live_child = live_child(c, k.k->p.offset),
+ };
+
+ if (!n.live_child) {
+ bch_err(c, "error finding live child of snapshot %u", n.id);
+ return -EINVAL;
+ }
+
+ return darray_push(&c->snapshot_delete.delete_interior, n);
+ }
+
+ return 0;
+}
+
+int bch2_delete_dead_interior_snapshots(struct bch_fs *c)
+{
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MAX, 0, k,
+ bch2_get_dead_interior_snapshots(trans, k));
+ if (ret)
+ goto err;
+
+ struct snapshot_delete *d = &c->snapshot_delete;
+ if (d->delete_interior.nr) {
+ /*
+ * Fixing children of deleted snapshots can't be done completely
+ * atomically, if we crash between here and when we delete the interior
+ * nodes some depth fields will be off:
+ */
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
+ BTREE_ITER_intent, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
+ if (ret)
+ goto err;
+
+ darray_for_each(d->delete_interior, i) {
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, i->id));
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting snapshot %u", i->id);
+ if (ret)
+ goto err;
+ }
+
+ darray_exit(&d->delete_interior);
+ }
+err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
{
/* If there's one child, it's redundant and keys will be moved to the child */
@@ -1895,13 +1956,18 @@ static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
{
+ struct bch_fs *c = trans->c;
+
if (k.k->type != KEY_TYPE_snapshot)
return 0;
- struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
- if (BCH_SNAPSHOT_WILL_DELETE(snap.v) ||
- interior_snapshot_needs_delete(snap))
- set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
+ struct bkey_s_c_snapshot s= bkey_s_c_to_snapshot(k);
+
+ if (BCH_SNAPSHOT_NO_KEYS(s.v))
+ c->recovery.passes_to_run |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_interior_snapshots);
+ if (BCH_SNAPSHOT_WILL_DELETE(s.v) ||
+ interior_snapshot_needs_delete(s))
+ set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
return 0;
}
@@ -1909,6 +1975,15 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
int bch2_snapshots_read(struct bch_fs *c)
{
/*
+ * It's important that we check if we need to reconstruct snapshots
+ * before going RW, so we mark that pass as required in the superblock -
+ * otherwise, we could end up deleting keys with missing snapshot nodes
+ * instead
+ */
+ BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+ test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ /*
* Initializing the is_ancestor bitmaps requires ancestors to already be
* initialized - so mark in reverse:
*/
@@ -1919,15 +1994,6 @@ int bch2_snapshots_read(struct bch_fs *c)
bch2_check_snapshot_needs_deletion(trans, k));
bch_err_fn(c, ret);
- /*
- * It's important that we check if we need to reconstruct snapshots
- * before going RW, so we mark that pass as required in the superblock -
- * otherwise, we could end up deleting keys with missing snapshot nodes
- * instead
- */
- BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
- test_bit(BCH_FS_may_go_rw, &c->flags));
-
return ret;
}
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 28d9a29a1fd0..65d43a7ab877 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -291,6 +291,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *);
+int bch2_delete_dead_interior_snapshots(struct bch_fs *);
int bch2_snapshots_read(struct bch_fs *);
void bch2_fs_snapshots_exit(struct bch_fs *);
void bch2_fs_snapshots_init_early(struct bch_fs *);
diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h
index 9bccae1f3590..444885106140 100644
--- a/fs/bcachefs/snapshot_format.h
+++ b/fs/bcachefs/snapshot_format.h
@@ -15,10 +15,35 @@ struct bch_snapshot {
bch_le128 btime;
};
+/*
+ * WILL_DELETE: leaf node that's no longer referenced by a subvolume, still has
+ * keys, will be deleted by delete_dead_snapshots
+ *
+ * SUBVOL: true if a subvol points to this snapshot (why do we have this?
+ * subvols are nonzero)
+ *
+ * DELETED: we never delete snapshot keys, we mark them as deleted so that we
+ * can distinguish between a key for a missing snapshot (and we have no idea
+ * what happened) and a key for a deleted snapshot (delete_dead_snapshots() missed
+ * something, key should be deleted)
+ *
+ * NO_KEYS: we don't remove interior snapshot nodes from snapshot trees at
+ * runtime, since we can't do the adjustements for the depth/skiplist field
+ * atomically - and that breaks e.g. is_ancestor(). Instead, we mark it to be
+ * deleted at the next remount; this tells us that we don't need to run the full
+ * delete_dead_snapshots().
+ *
+ *
+ * XXX - todo item:
+ *
+ * We should guard against a bitflip causing us to delete a snapshot incorrectly
+ * by cross checking with the subvolume btree: delete_dead_snapshots() can take
+ * out more data than any other codepath if it runs incorrectly
+ */
LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1)
-/* True if a subvolume points to this snapshot node: */
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3)
+LE32_BITMASK(BCH_SNAPSHOT_NO_KEYS, struct bch_snapshot, flags, 3, 4)
/*
* Snapshot trees:
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 61eeac671283..98d31a1f9630 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1516,8 +1516,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
prt_newline(out);
prt_printf(out, "Options:");
prt_newline(out);
- printbuf_indent_add(out, 2);
- {
+ scoped_guard(printbuf_indent, out) {
enum bch_opt_id id;
for (id = 0; id < bch2_opts_nr; id++) {
@@ -1534,15 +1533,12 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
}
}
- printbuf_indent_sub(out, 2);
-
if (print_layout) {
prt_newline(out);
prt_printf(out, "layout:");
prt_newline(out);
- printbuf_indent_add(out, 2);
- bch2_sb_layout_to_text(out, &sb->layout);
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ bch2_sb_layout_to_text(out, &sb->layout);
}
vstruct_for_each(sb, f)
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index d640ae188722..32b12311928e 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -277,6 +277,17 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
return c;
}
+void bch2_devs_list_to_text(struct printbuf *out, struct bch_devs_list *d)
+{
+ prt_char(out, '[');
+ darray_for_each(*d, i) {
+ if (i != d->data)
+ prt_char(out, ' ');
+ prt_printf(out, "%u", *i);
+ }
+ prt_char(out, ']');
+}
+
/* Filesystem RO/RW: */
/*
@@ -461,9 +472,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou
bch2_fs_read_only_async(c);
wake_up(&bch2_read_only_wait);
- if (ret)
+ if (ret) {
prt_printf(out, "emergency read only at seq %llu\n",
journal_cur_seq(&c->journal));
+ bch2_prt_task_backtrace(out, current, 2, out->atomic ? GFP_ATOMIC : GFP_KERNEL);
+ }
return ret;
}
@@ -1769,7 +1782,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb,
struct printbuf *err)
{
- unsigned ret;
+ int ret;
if (bch2_dev_is_online(ca)) {
prt_printf(err, "already have device online in slot %u\n",
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index d13dbf2b8227..351dc5911645 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -16,6 +16,8 @@ extern const char * const bch2_dev_write_refs[];
struct bch_fs *bch2_dev_to_fs(dev_t);
struct bch_fs *bch2_uuid_to_fs(__uuid_t);
+void bch2_devs_list_to_text(struct printbuf *, struct bch_devs_list *);
+
bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
enum bch_member_state, int,
struct printbuf *);
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 2ded7f3c835f..2a9462275f92 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -415,45 +415,41 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
printbuf_tabstop_push(out, TABSTOP_SIZE);
prt_printf(out, "duration of events\n");
- printbuf_indent_add(out, 2);
-
- pr_name_and_units(out, "min:", stats->min_duration);
- pr_name_and_units(out, "max:", stats->max_duration);
- pr_name_and_units(out, "total:", stats->total_duration);
-
- prt_printf(out, "mean:\t");
- bch2_pr_time_units_aligned(out, d_mean);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
- prt_newline(out);
-
- prt_printf(out, "stddev:\t");
- bch2_pr_time_units_aligned(out, d_stddev);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ scoped_guard(printbuf_indent, out) {
+ pr_name_and_units(out, "min:", stats->min_duration);
+ pr_name_and_units(out, "max:", stats->max_duration);
+ pr_name_and_units(out, "total:", stats->total_duration);
+
+ prt_printf(out, "mean:\t");
+ bch2_pr_time_units_aligned(out, d_mean);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
- printbuf_indent_sub(out, 2);
- prt_newline(out);
+ prt_printf(out, "stddev:\t");
+ bch2_pr_time_units_aligned(out, d_stddev);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
+ }
prt_printf(out, "time between events\n");
- printbuf_indent_add(out, 2);
-
- pr_name_and_units(out, "min:", stats->min_freq);
- pr_name_and_units(out, "max:", stats->max_freq);
-
- prt_printf(out, "mean:\t");
- bch2_pr_time_units_aligned(out, f_mean);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
- prt_newline(out);
-
- prt_printf(out, "stddev:\t");
- bch2_pr_time_units_aligned(out, f_stddev);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ scoped_guard(printbuf_indent, out) {
+ pr_name_and_units(out, "min:", stats->min_freq);
+ pr_name_and_units(out, "max:", stats->max_freq);
+
+ prt_printf(out, "mean:\t");
+ bch2_pr_time_units_aligned(out, f_mean);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
- printbuf_indent_sub(out, 2);
- prt_newline(out);
+ prt_printf(out, "stddev:\t");
+ bch2_pr_time_units_aligned(out, f_stddev);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
+ }
printbuf_tabstops_reset(out);