summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/alloc_background.c8
-rw-r--r--fs/bcachefs/alloc_foreground.c27
-rw-r--r--fs/bcachefs/backpointers.c102
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/bcachefs_format.h1
-rw-r--r--fs/bcachefs/btree_cache.h2
-rw-r--r--fs/bcachefs/btree_gc.c26
-rw-r--r--fs/bcachefs/btree_io.c13
-rw-r--r--fs/bcachefs/btree_iter.c140
-rw-r--r--fs/bcachefs/btree_locking.c5
-rw-r--r--fs/bcachefs/btree_node_scan.c3
-rw-r--r--fs/bcachefs/btree_trans_commit.c4
-rw-r--r--fs/bcachefs/btree_types.h12
-rw-r--r--fs/bcachefs/btree_update_interior.c61
-rw-r--r--fs/bcachefs/checksum.h2
-rw-r--r--fs/bcachefs/data_update.c27
-rw-r--r--fs/bcachefs/data_update.h8
-rw-r--r--fs/bcachefs/debug.c47
-rw-r--r--fs/bcachefs/disk_accounting.c176
-rw-r--r--fs/bcachefs/disk_accounting.h16
-rw-r--r--fs/bcachefs/error.c4
-rw-r--r--fs/bcachefs/extents.c87
-rw-r--r--fs/bcachefs/extents.h4
-rw-r--r--fs/bcachefs/fs-io-buffered.c12
-rw-r--r--fs/bcachefs/fs-io-direct.c8
-rw-r--r--fs/bcachefs/fs-io.c4
-rw-r--r--fs/bcachefs/inode.c50
-rw-r--r--fs/bcachefs/inode.h9
-rw-r--r--fs/bcachefs/io_misc.c14
-rw-r--r--fs/bcachefs/io_misc.h2
-rw-r--r--fs/bcachefs/io_read.c8
-rw-r--r--fs/bcachefs/io_read.h4
-rw-r--r--fs/bcachefs/io_write.c57
-rw-r--r--fs/bcachefs/io_write.h4
-rw-r--r--fs/bcachefs/io_write_types.h2
-rw-r--r--fs/bcachefs/journal.c35
-rw-r--r--fs/bcachefs/journal_io.c3
-rw-r--r--fs/bcachefs/journal_reclaim.c4
-rw-r--r--fs/bcachefs/journal_sb.c37
-rw-r--r--fs/bcachefs/journal_sb.h1
-rw-r--r--fs/bcachefs/lru.c45
-rw-r--r--fs/bcachefs/lru.h5
-rw-r--r--fs/bcachefs/migrate.c5
-rw-r--r--fs/bcachefs/move.c209
-rw-r--r--fs/bcachefs/move.h34
-rw-r--r--fs/bcachefs/opts.c40
-rw-r--r--fs/bcachefs/opts.h14
-rw-r--r--fs/bcachefs/printbuf.h14
-rw-r--r--fs/bcachefs/progress.c2
-rw-r--r--fs/bcachefs/rebalance.c276
-rw-r--r--fs/bcachefs/rebalance.h55
-rw-r--r--fs/bcachefs/recovery.c22
-rw-r--r--fs/bcachefs/recovery_passes_format.h1
-rw-r--r--fs/bcachefs/reflink.c16
-rw-r--r--fs/bcachefs/sb-errors.c28
-rw-r--r--fs/bcachefs/sb-members.c23
-rw-r--r--fs/bcachefs/snapshot.c122
-rw-r--r--fs/bcachefs/snapshot.h1
-rw-r--r--fs/bcachefs/snapshot_format.h27
-rw-r--r--fs/bcachefs/super-io.c10
-rw-r--r--fs/bcachefs/super.c22
-rw-r--r--fs/bcachefs/super.h2
-rw-r--r--fs/bcachefs/sysfs.c4
-rw-r--r--fs/bcachefs/util.c66
-rw-r--r--fs/bcachefs/xattr.c7
65 files changed, 1211 insertions, 872 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 3fc728efbf5c..cab4d6798dd7 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -344,7 +344,7 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *
struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL;
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
bch2_prt_data_type(out, a->data_type);
@@ -367,7 +367,6 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *
if (ca)
prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca));
prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a));
- printbuf_indent_sub(out, 2);
bch2_dev_put(ca);
}
@@ -2385,8 +2384,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
* We clear the LRU and need_discard btrees first so that we don't race
* with bch2_do_invalidates() and bch2_do_discards()
*/
- ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
- BTREE_TRIGGER_norun, NULL) ?:
+ ret = bch2_dev_remove_lrus(c, ca) ?:
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
BTREE_TRIGGER_norun, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
@@ -2397,7 +2395,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
BTREE_TRIGGER_norun, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
BTREE_TRIGGER_norun, NULL) ?:
- bch2_dev_usage_remove(c, ca->dev_idx);
+ bch2_dev_usage_remove(c, ca);
bch_err_msg(ca, ret, "removing dev alloc info");
return ret;
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index f6ea4a8272d0..3d125ee81663 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1491,10 +1491,9 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
prt_newline(out);
- printbuf_indent_add(out, 2);
- open_bucket_for_each(c, &wp->ptrs, ob, i)
- bch2_open_bucket_to_text(out, c, ob);
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ bch2_open_bucket_to_text(out, c, ob);
}
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
@@ -1586,9 +1585,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n");
- printbuf_indent_add(&buf, 2);
- bch2_fs_alloc_debug_to_text(&buf, c);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_fs_alloc_debug_to_text(&buf, c);
prt_newline(&buf);
bch2_printbuf_make_room(&buf, 4096);
@@ -1597,23 +1595,20 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
guard(printbuf_atomic)(&buf);
for_each_online_member_rcu(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
- printbuf_indent_add(&buf, 2);
- bch2_dev_alloc_debug_to_text(&buf, ca);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_dev_alloc_debug_to_text(&buf, ca);
prt_newline(&buf);
}
}
prt_printf(&buf, "Copygc debug:\n");
- printbuf_indent_add(&buf, 2);
- bch2_copygc_wait_to_text(&buf, c);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_copygc_wait_to_text(&buf, c);
prt_newline(&buf);
prt_printf(&buf, "Journal debug:\n");
- printbuf_indent_add(&buf, 2);
- bch2_journal_debug_to_text(&buf, &c->journal);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_journal_debug_to_text(&buf, &c->journal);
bch2_print_str(c, KERN_ERR, buf.buf);
}
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index cb25cddb759b..c662eeba66ab 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -117,7 +117,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
prt_printf(&buf, "existing backpointer found when inserting ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
prt_newline(&buf);
- printbuf_indent_add(&buf, 2);
+ guard(printbuf_indent)(&buf);
prt_printf(&buf, "found ");
bch2_bkey_val_to_text(&buf, c, found_bp);
@@ -127,7 +127,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, orig_k);
} else if (!will_check) {
prt_printf(&buf, "backpointer not found when deleting\n");
- printbuf_indent_add(&buf, 2);
+ guard(printbuf_indent)(&buf);
prt_printf(&buf, "searching for ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
@@ -278,9 +278,20 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
bp.v->level - 1,
0);
struct btree *b = bch2_btree_iter_peek_node(iter);
- if (IS_ERR_OR_NULL(b))
+ if (IS_ERR(b))
goto err;
+ if (!b) {
+ /* Backpointer for nonexistent tree depth: */
+ bkey_init(&iter->k);
+ iter->k.p = bp.v->pos;
+ struct bkey_s_c k = { &iter->k };
+
+ int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit);
+ b = ret ? ERR_PTR(ret) : NULL;
+ goto err;
+ }
+
BUG_ON(b->c.level != bp.v->level - 1);
if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
@@ -809,7 +820,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
for (enum btree_id btree_id = 0;
btree_id < btree_id_nr_alive(c);
btree_id++) {
- int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
+ int level, depth = btree_type_has_data_ptrs(btree_id) ? 0 : 1;
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
@@ -862,17 +873,25 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
}
}
-static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
+static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos,
+ struct bkey_buf *last_flushed);
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
bool *had_mismatch,
- struct bkey_buf *last_flushed)
+ struct bkey_buf *last_flushed,
+ struct bpos *last_pos,
+ unsigned *nr_iters)
{
struct bch_fs *c = trans->c;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
bool need_commit = false;
+ if (!bpos_eq(*last_pos, alloc_k.k->p))
+ *nr_iters = 0;
+
+ *last_pos = alloc_k.k->p;
+
*had_mismatch = false;
if (a->data_type == BCH_DATA_sb ||
@@ -926,6 +945,46 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
return ret;
}
+ if (sectors[ALLOC_dirty] > a->dirty_sectors ||
+ sectors[ALLOC_cached] > a->cached_sectors ||
+ sectors[ALLOC_stripe] > a->stripe_sectors) {
+ if (*nr_iters) {
+ CLASS(printbuf, buf)();
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n"
+ "bucket %llu:%llu data type %s, counters\n",
+ alloc_k.k->p.inode,
+ alloc_k.k->p.offset,
+ __bch2_data_types[a->data_type]);
+ if (sectors[ALLOC_dirty] > a->dirty_sectors)
+ prt_printf(&buf, "dirty: %u > %u\n",
+ sectors[ALLOC_dirty], a->dirty_sectors);
+ if (sectors[ALLOC_cached] > a->cached_sectors)
+ prt_printf(&buf, "cached: %u > %u\n",
+ sectors[ALLOC_cached], a->cached_sectors);
+ if (sectors[ALLOC_stripe] > a->stripe_sectors)
+ prt_printf(&buf, "stripe: %u > %u\n",
+ sectors[ALLOC_stripe], a->stripe_sectors);
+
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_start(ca, alloc_k.k->p),
+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+ prt_newline(&buf);
+ }
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ __WARN();
+ return ret;
+ }
+
+ *nr_iters += 1;
+
+ return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p, last_flushed) ?:
+ bch_err_throw(c, transaction_restart_nested);
+ }
+
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
sectors[ALLOC_cached] != a->cached_sectors ||
sectors[ALLOC_stripe] != a->stripe_sectors) {
@@ -943,13 +1002,6 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
return ret;
}
- if (sectors[ALLOC_dirty] > a->dirty_sectors ||
- sectors[ALLOC_cached] > a->cached_sectors ||
- sectors[ALLOC_stripe] > a->stripe_sectors) {
- return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
- bch_err_throw(c, transaction_restart_nested);
- }
-
bool empty = (sectors[ALLOC_dirty] +
sectors[ALLOC_stripe] +
sectors[ALLOC_cached]) == 0;
@@ -1113,6 +1165,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
CLASS(btree_trans, trans)(c);
struct extents_to_bp_state s = { .bp_start = POS_MIN };
+ struct bpos last_pos = POS_MIN;
+ unsigned nr_iters = 0;
bch2_bkey_buf_init(&s.last_flushed);
bkey_init(&s.last_flushed.k->k);
@@ -1121,7 +1175,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
POS_MIN, BTREE_ITER_prefetch, k, ({
bool had_mismatch;
bch2_fs_going_ro(c) ?:
- check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
+ check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed,
+ &last_pos, &nr_iters);
}));
if (ret)
goto err;
@@ -1189,7 +1244,11 @@ static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
if (ret)
return ret;
- return check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
+ struct bpos last_pos = POS_MIN;
+ unsigned nr_iters = 0;
+ return check_bucket_backpointer_mismatch(trans, k, had_mismatch,
+ last_flushed,
+ &last_pos, &nr_iters);
}
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
@@ -1253,22 +1312,21 @@ static int check_one_backpointer(struct btree_trans *trans,
}
static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
- struct bch_dev *ca, struct bpos bucket)
+ struct bch_dev *ca, struct bpos bucket,
+ struct bkey_buf *last_flushed)
{
u32 restart_count = trans->restart_count;
- struct bkey_buf last_flushed;
- bch2_bkey_buf_init(&last_flushed);
- bkey_init(&last_flushed.k->k);
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket),
bucket_pos_to_bp_end(ca, bucket),
0, k,
- check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
+ check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, last_flushed)
);
- bch2_bkey_buf_exit(&last_flushed, trans->c);
- return ret ?: trans_was_restarted(trans, restart_count);
+ return ret ?:
+ bch2_btree_write_buffer_flush_sync(trans) ?: /* make sure bad backpointers that were deleted are visible */
+ trans_was_restarted(trans, restart_count);
}
static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 16d08dfb5f19..553031a3b06a 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -523,6 +523,7 @@ struct discard_in_flight {
x(journal_read) \
x(fs_journal_alloc) \
x(fs_resize_on_mount) \
+ x(sb_journal_sort) \
x(btree_node_read) \
x(btree_node_read_all_replicas) \
x(btree_node_scrub) \
@@ -674,6 +675,7 @@ struct bch_dev {
x(error) \
x(topology_error) \
x(errors_fixed) \
+ x(errors_fixed_silent) \
x(errors_not_fixed) \
x(no_invalid_checks) \
x(discard_mount_opt_set) \
@@ -807,6 +809,8 @@ struct bch_fs {
struct bch_disk_groups_cpu __rcu *disk_groups;
struct bch_opts opts;
+ atomic_t opt_change_cookie;
+
unsigned loglevel;
unsigned prev_loglevel;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index b2de993d802b..0839397105a9 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -654,7 +654,6 @@ struct bch_sb_field_ext {
/*
* field 1: version name
* field 2: BCH_VERSION(major, minor)
- * field 3: recovery passess required on upgrade
*/
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10)) \
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 035b2cb25077..49d0be6405d8 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -166,7 +166,7 @@ void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
do { \
if (trace_##event##_enabled()) { \
CLASS(printbuf, buf)(); \
- printbuf_indent_add(&buf, 2); \
+ guard(printbuf_indent)(&buf); \
bch2_btree_pos_to_text(&buf, c, b); \
trace_##event(c, buf.buf); \
} \
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 43f294284d57..2338feb8d8ed 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -717,16 +717,12 @@ fsck_err:
static int bch2_gc_btree(struct btree_trans *trans,
struct progress_indicator_state *progress,
- enum btree_id btree, bool initial)
+ enum btree_id btree, unsigned target_depth,
+ bool initial)
{
struct bch_fs *c = trans->c;
- unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
int ret = 0;
- /* We need to make sure every leaf node is readable before going RW */
- if (initial)
- target_depth = 0;
-
for (unsigned level = target_depth; level < BTREE_MAX_DEPTH; level++) {
struct btree *prev = NULL;
struct btree_iter iter;
@@ -797,7 +793,21 @@ static int bch2_gc_btrees(struct bch_fs *c)
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
continue;
- ret = bch2_gc_btree(trans, &progress, btree, true);
+
+ unsigned target_depth = BIT_ULL(btree) & btree_leaf_has_triggers_mask ? 0 : 1;
+
+ /*
+ * In fsck, we need to make sure every leaf node is readable
+ * before going RW, otherwise we can no longer rewind inside
+ * btree_lost_data to repair during the current fsck run.
+ *
+ * Otherwise, we can delay the repair to the next
+ * mount or offline fsck.
+ */
+ if (test_bit(BCH_FS_in_fsck, &c->flags))
+ target_depth = 0;
+
+ ret = bch2_gc_btree(trans, &progress, btree, target_depth, true);
}
bch_err_fn(c, ret);
@@ -1228,7 +1238,7 @@ int bch2_gc_gens(struct bch_fs *c)
}
for (unsigned i = 0; i < BTREE_ID_NR; i++)
- if (btree_type_has_ptrs(i)) {
+ if (btree_type_has_data_ptrs(i)) {
c->gc_gens_btree = i;
c->gc_gens_pos = POS_MIN;
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 2e3dd9bacac5..52d21259ed6f 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -27,10 +27,15 @@
#include <linux/moduleparam.h>
#include <linux/sched/mm.h>
+static __maybe_unused unsigned bch2_btree_read_corrupt_ratio;
+static __maybe_unused int bch2_btree_read_corrupt_device;
+
#ifdef CONFIG_BCACHEFS_DEBUG
-static unsigned bch2_btree_read_corrupt_ratio;
module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644);
MODULE_PARM_DESC(btree_read_corrupt_ratio, "");
+
+module_param_named(btree_read_corrupt_device, bch2_btree_read_corrupt_device, int, 0644);
+MODULE_PARM_DESC(btree_read_corrupt_ratio, "");
#endif
static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn)
@@ -1438,7 +1443,9 @@ start:
memset(&bio->bi_iter, 0, sizeof(bio->bi_iter));
bio->bi_iter.bi_size = btree_buf_bytes(b);
- bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio);
+ if (bch2_btree_read_corrupt_device == rb->pick.ptr.dev ||
+ bch2_btree_read_corrupt_device < 0)
+ bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio);
ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
if (ret != -BCH_ERR_btree_node_read_err_want_retry &&
@@ -2523,7 +2530,7 @@ do_write:
if (trace_btree_node_write_enabled()) {
CLASS(printbuf, buf)();
- printbuf_indent_add(&buf, 2);
+ guard(printbuf_indent)(&buf);
prt_printf(&buf, "offset %u sectors %u bytes %u\n",
b->written,
sectors_to_write,
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index d52d577a900e..b72ed543d9c0 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -137,18 +137,8 @@ static void __bch2_btree_path_verify_cached(struct btree_trans *trans,
static void __bch2_btree_path_verify_level(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
- struct btree_path_level *l;
- struct btree_node_iter tmp;
- bool locked;
- struct bkey_packed *p, *k;
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
- struct printbuf buf3 = PRINTBUF;
- const char *msg;
-
- l = &path->l[level];
- tmp = l->iter;
- locked = btree_node_locked(path, level);
+ struct btree_path_level *l = &path->l[level];
+ bool locked = btree_node_locked(path, level);
if (path->cached) {
if (!level)
@@ -166,14 +156,17 @@ static void __bch2_btree_path_verify_level(struct btree_trans *trans,
bch2_btree_node_iter_verify(&l->iter, l->b);
- /*
- * For interior nodes, the iterator will have skipped past deleted keys:
- */
- p = level
+ /* For interior nodes, the iterator may have skipped past deleted keys: */
+ struct btree_node_iter tmp = l->iter;
+ const struct bkey_packed *p = level
? bch2_btree_node_iter_prev(&tmp, l->b)
: bch2_btree_node_iter_prev_all(&tmp, l->b);
- k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+ tmp = l->iter;
+ const struct bkey_packed *k = level
+ ? bch2_btree_node_iter_peek(&tmp, l->b)
+ : bch2_btree_node_iter_peek_all(&tmp, l->b);
+ const char *msg;
if (!(level > path->level && trans->journal_replay_not_finished)) {
/*
* We can't run these checks for interior nodes when we're still
@@ -200,29 +193,31 @@ static void __bch2_btree_path_verify_level(struct btree_trans *trans,
btree_node_unlock(trans, path, level);
return;
err:
- bch2_bpos_to_text(&buf1, path->pos);
+ {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "path should be %s key at level %u", msg, level);
- if (p) {
- struct bkey uk = bkey_unpack_key(l->b, p);
+ prt_str(&buf, "\npath pos ");
+ bch2_bpos_to_text(&buf, path->pos);
- bch2_bkey_to_text(&buf2, &uk);
- } else {
- prt_printf(&buf2, "(none)");
- }
+ prt_str(&buf, "\nprev key ");
+ if (p) {
+ struct bkey uk = bkey_unpack_key(l->b, p);
+ bch2_bkey_to_text(&buf, &uk);
+ } else {
+ prt_printf(&buf, "(none)");
+ }
- if (k) {
- struct bkey uk = bkey_unpack_key(l->b, k);
+ prt_str(&buf, "\ncur key ");
+ if (k) {
+ struct bkey uk = bkey_unpack_key(l->b, k);
+ bch2_bkey_to_text(&buf, &uk);
+ } else {
+ prt_printf(&buf, "(none)");
+ }
- bch2_bkey_to_text(&buf3, &uk);
- } else {
- prt_printf(&buf3, "(none)");
+ panic("%s\n", buf.buf);
}
-
- panic("path should be %s key at level %u:\n"
- "path pos %s\n"
- "prev key %s\n"
- "cur key %s\n",
- msg, level, buf1.buf, buf2.buf, buf3.buf);
}
static void __bch2_btree_path_verify(struct btree_trans *trans,
@@ -898,28 +893,53 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
btree_node_unlock(trans, path, plevel);
}
+static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ CLASS(printbuf, buf)();
+
+ prt_str(&buf, "node not found at pos: ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, "\n within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+ prt_newline(&buf);
+
+ return __bch2_topology_error(c, &buf);
+}
+
+static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans,
+ struct btree_path *path,
+ struct bkey_i *k)
+{
+ struct bch_fs *c = trans->c;
+ CLASS(printbuf, buf)();
+
+ prt_str(&buf, "node doesn't cover expected range at pos: ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, "\n within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+ prt_str(&buf, "\n but got node: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+ prt_newline(&buf);
+
+ return __bch2_topology_error(c, &buf);
+}
+
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
struct btree_path *path,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
struct btree_path_level *l = path_l(path);
- struct btree_and_journal_iter jiter;
- struct bkey_s_c k;
int ret = 0;
+ struct btree_and_journal_iter jiter;
__bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
- k = bch2_btree_and_journal_iter_peek(c, &jiter);
+ struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &jiter);
if (!k.k) {
- CLASS(printbuf, buf)();
-
- prt_str(&buf, "node not found at pos ");
- bch2_bpos_to_text(&buf, path->pos);
- prt_str(&buf, " at btree ");
- bch2_btree_pos_to_text(&buf, c, l->b);
-
- ret = bch2_fs_topology_error(c, "%s", buf.buf);
+ ret = btree_node_missing_err(trans, path);
goto err;
}
@@ -934,20 +954,16 @@ err:
return ret;
}
-static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
- struct btree_path *path)
+static inline bool bpos_in_btree_node_key(struct bpos pos, const struct bkey_i *k)
{
- struct bch_fs *c = trans->c;
- CLASS(printbuf, buf)();
+ if (bpos_gt(pos, k->k.p))
+ return false;
- prt_str(&buf, "node not found at pos ");
- bch2_bpos_to_text(&buf, path->pos);
- prt_str(&buf, " within parent node ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
+ if (k->k.type == KEY_TYPE_btree_ptr_v2 &&
+ bpos_lt(pos, bkey_i_to_btree_ptr_v2_c(k)->v.min_key))
+ return false;
- bch2_fs_fatal_error(c, "%s", buf.buf);
- printbuf_exit(&buf);
- return bch_err_throw(c, btree_need_topology_repair);
+ return true;
}
static __always_inline int btree_path_down(struct btree_trans *trans,
@@ -983,6 +999,9 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
}
}
+ if (unlikely(!bpos_in_btree_node_key(path->pos, &trans->btree_path_down)))
+ return btree_node_gap_err(trans, path, &trans->btree_path_down);
+
b = bch2_btree_node_get(trans, path, &trans->btree_path_down,
level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
@@ -1488,7 +1507,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
{
prt_printf(buf, "%u transaction updates for %s journal seq %llu\n",
trans->nr_updates, trans->fn, trans->journal_res.seq);
- printbuf_indent_add(buf, 2);
+ guard(printbuf_indent)(buf);
trans_for_each_update(trans, i) {
struct bkey_s_c old = { &i->old_k, i->old_v };
@@ -1514,8 +1533,6 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
bch2_journal_entry_to_text(buf, trans->c, e);
prt_newline(buf);
}
-
- printbuf_indent_sub(buf, 2);
}
static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
@@ -1568,8 +1585,8 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt
prt_printf(out, " uptodate %u locks_want %u", path->uptodate, path->locks_want);
prt_newline(out);
+ guard(printbuf_indent)(out);
- printbuf_indent_add(out, 2);
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) {
prt_printf(out, "l=%u locks %s seq %u node ", l,
btree_node_locked_str(btree_node_locked_type(path, l)),
@@ -1582,7 +1599,6 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt
prt_printf(out, "%px", path->l[l].b);
prt_newline(out);
}
- printbuf_indent_sub(out, 2);
}
static noinline __cold
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 38c5643e8a78..a4f8aac448c0 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -205,9 +205,8 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
bch2_btree_trans_to_text(&buf, trans);
prt_printf(&buf, "backtrace:\n");
- printbuf_indent_add(&buf, 2);
- bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
- printbuf_indent_sub(&buf, 2);
+ scoped_guard(printbuf_indent, &buf)
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
prt_newline(&buf);
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index b618a0bd1186..c0dff992ad60 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -42,12 +42,11 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con
static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
{
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
darray_for_each(nodes, i) {
found_btree_node_to_text(out, c, i);
prt_newline(out);
}
- printbuf_indent_sub(out, 2);
}
static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 5fa7f2f9f1e9..2966971ee43e 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -970,6 +970,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans,
struct bkey_i *accounting;
retry:
+ memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
percpu_down_read(&c->mark_lock);
for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
accounting != btree_trans_subbuf_top(trans, &trans->accounting);
@@ -983,6 +984,9 @@ retry:
}
percpu_up_read(&c->mark_lock);
+ /* Only fatal errors are possible later, so no need to revert this */
+ bch2_trans_account_disk_usage_change(trans);
+
trans_for_each_update(trans, i) {
ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k);
if (ret)
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index e893eb938bb3..9e3c851200eb 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -840,6 +840,10 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type)
return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS;
}
+/* A mask of btree id bits that have triggers for their leaves */
+__maybe_unused
+static const u64 btree_leaf_has_triggers_mask = BTREE_NODE_TYPE_HAS_TRIGGERS >> 1;
+
static const u64 btree_is_extents_mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr)
BCH_BTREE_IDS()
@@ -883,15 +887,15 @@ static inline bool btree_type_has_snapshot_field(enum btree_id btree)
return BIT_ULL(btree) & mask;
}
-static inline bool btree_type_has_ptrs(enum btree_id btree)
-{
- const u64 mask = 0
+static const u64 btree_has_data_ptrs_mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return BIT_ULL(btree) & mask;
+static inline bool btree_type_has_data_ptrs(enum btree_id btree)
+{
+ return BIT_ULL(btree) & btree_has_data_ptrs_mask;
}
static inline bool btree_type_uses_write_buffer(enum btree_id btree)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index a9877a47bfc6..a8cd7a5a6e7d 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -324,9 +324,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct btree *b;
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
- ? BTREE_NODE_RESERVE
- : 0;
int ret;
b = bch2_btree_node_mem_alloc(trans, interior_node);
@@ -334,41 +331,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
return b;
BUG_ON(b->ob.nr);
-
- mutex_lock(&c->btree_reserve_cache_lock);
- if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) {
- guard(spinlock)(&c->freelist_lock);
- if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) {
- if (cl)
- closure_wait(&c->open_buckets_wait, cl);
-
- ret = cl
- ? bch_err_throw(c, bucket_alloc_blocked)
- : bch_err_throw(c, open_buckets_empty);
- mutex_unlock(&c->btree_reserve_cache_lock);
- goto err;
- }
- }
-
- if (c->btree_reserve_cache_nr > nr_reserve) {
- for (struct btree_alloc *a = c->btree_reserve_cache;
- a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) {
- /* check if it has sufficient durability */
-
- if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) {
- bch2_open_buckets_put(c, &a->ob);
- *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- continue;
- }
-
- bkey_copy(&b->key, &a->k);
- b->ob = a->ob;
- *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr];
- mutex_unlock(&c->btree_reserve_cache_lock);
- goto out;
- }
- }
- mutex_unlock(&c->btree_reserve_cache_lock);
retry:
ret = bch2_alloc_sectors_start_trans(trans,
target ?:
@@ -398,12 +360,29 @@ retry:
goto retry;
}
+ mutex_lock(&c->btree_reserve_cache_lock);
+ while (c->btree_reserve_cache_nr) {
+ struct btree_alloc *a = c->btree_reserve_cache + --c->btree_reserve_cache_nr;
+
+ /* check if it has sufficient durability */
+
+ if (can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) {
+ bkey_copy(&b->key, &a->k);
+ b->ob = a->ob;
+ mutex_unlock(&c->btree_reserve_cache_lock);
+ goto out;
+ }
+
+ bch2_open_buckets_put(c, &a->ob);
+ }
+ mutex_unlock(&c->btree_reserve_cache_lock);
+
bkey_btree_ptr_v2_init(&b->key);
bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false);
bch2_open_bucket_get(c, wp, &b->ob);
- bch2_alloc_sectors_done(c, wp);
out:
+ bch2_alloc_sectors_done(c, wp);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -2810,7 +2789,7 @@ static void bch2_btree_alloc_to_text(struct printbuf *out,
struct bch_fs *c,
struct btree_alloc *a)
{
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&a->k));
prt_newline(out);
@@ -2818,8 +2797,6 @@ static void bch2_btree_alloc_to_text(struct printbuf *out,
unsigned i;
open_bucket_for_each(c, &a->ob, ob, i)
bch2_open_bucket_to_text(out, c, ob);
-
- printbuf_indent_sub(out, 2);
}
void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c)
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 7bd9cf6104ca..10bfadcde80a 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -130,7 +130,7 @@ static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opt type,
}
static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
- struct bch_io_opts opts)
+ struct bch_inode_opts opts)
{
if (opts.nocow)
return 0;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 2c997fddefb3..7a0da6cdf78c 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -11,6 +11,7 @@
#include "ec.h"
#include "error.h"
#include "extents.h"
+#include "inode.h"
#include "io_write.h"
#include "keylist.h"
#include "move.h"
@@ -428,13 +429,18 @@ restart_drop_extra_replicas:
goto out;
}
+ struct bch_inode_opts opts;
+
ret = bch2_trans_log_str(trans, bch2_data_update_type_strs[m->type]) ?:
bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, insert->k.p) ?:
- bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?:
+ bch2_inum_snapshot_opts_get(trans, k.k->p.inode, k.k->p.snapshot, &opts) ?:
+ bch2_bkey_set_needs_rebalance(c, &opts, insert,
+ SET_NEEDS_REBALANCE_foreground,
+ m->op.opts.change_cookie) ?:
bch2_trans_update(trans, &iter, insert,
BTREE_UPDATE_internal_snapshot_node);
if (ret)
@@ -613,7 +619,7 @@ int bch2_update_unwritten_extent(struct btree_trans *trans,
}
void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
if (!out->nr_tabstops)
@@ -657,31 +663,32 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
prt_str_indented(out, "old key:\t");
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
+
+ bch2_write_op_to_text(out, &m->op);
}
void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m)
{
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
if (!m->read_done) {
prt_printf(out, "read:\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_read_bio_to_text(out, m->op.c, &m->rbio);
} else {
prt_printf(out, "write:\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_write_op_to_text(out, &m->op);
}
- printbuf_indent_sub(out, 4);
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
@@ -731,7 +738,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
}
static int __bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
unsigned buf_bytes)
{
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
@@ -758,7 +765,7 @@ static int __bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
}
int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
- struct bch_io_opts *io_opts)
+ struct bch_inode_opts *io_opts)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
const union bch_extent_entry *entry;
@@ -830,7 +837,7 @@ int bch2_data_update_init(struct btree_trans *trans,
struct moving_context *ctxt,
struct data_update *m,
struct write_point_specifier wp,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts data_opts,
enum btree_id btree_id,
struct bkey_s_c k)
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index fc12aa65366f..3b0ba6f6497f 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -23,7 +23,7 @@ struct data_update_opts {
};
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
- struct bch_io_opts *, struct data_update_opts *);
+ struct bch_inode_opts *, struct data_update_opts *);
#define BCH_DATA_UPDATE_TYPES() \
x(copygc, 0) \
@@ -76,18 +76,18 @@ void bch2_data_update_read_done(struct data_update *);
int bch2_extent_drop_ptrs(struct btree_trans *,
struct btree_iter *,
struct bkey_s_c,
- struct bch_io_opts *,
+ struct bch_inode_opts *,
struct data_update_opts *);
int bch2_data_update_bios_init(struct data_update *, struct bch_fs *,
- struct bch_io_opts *);
+ struct bch_inode_opts *);
void bch2_data_update_exit(struct data_update *);
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
struct moving_context *,
struct data_update *,
struct write_point_specifier,
- struct bch_io_opts *, struct data_update_opts,
+ struct bch_inode_opts *, struct data_update_opts,
enum btree_id, struct bkey_s_c);
void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 33cb94f70b19..ebfb68e2e035 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -282,16 +282,13 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
le64_to_cpu(i->journal_seq));
offset += sectors;
- printbuf_indent_add(out, 4);
+ scoped_guard(printbuf_indent, out)
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
+ struct bkey u;
- for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
- struct bkey u;
-
- bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
- prt_newline(out);
- }
-
- printbuf_indent_sub(out, 4);
+ bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
+ prt_newline(out);
+ }
}
out:
if (bio)
@@ -468,7 +465,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level);
prt_printf(out, "\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
prt_newline(out);
@@ -488,8 +485,6 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
&b->writes[1].journal, b->writes[1].journal.seq);
prt_printf(out, "ob:\t%u\n", b->ob.nr);
-
- printbuf_indent_sub(out, 2);
}
static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
@@ -605,9 +600,8 @@ restart:
bch2_btree_trans_to_text(&i->buf, trans);
prt_printf(&i->buf, "backtrace:\n");
- printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
prt_newline(&i->buf);
closure_put(&trans->ref);
@@ -765,40 +759,35 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
break;
prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]);
- printbuf_indent_add(&i->buf, 2);
+ guard(printbuf_indent)(&i->buf);
guard(mutex)(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
- printbuf_indent_add(&i->buf, 2);
- bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
#endif
prt_printf(&i->buf, "Transaction duration:\n");
- printbuf_indent_add(&i->buf, 2);
- bch2_time_stats_to_text(&i->buf, &s->duration);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_time_stats_to_text(&i->buf, &s->duration);
if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
prt_printf(&i->buf, "Lock hold times:\n");
- printbuf_indent_add(&i->buf, 2);
- bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
}
if (s->max_paths_text) {
prt_printf(&i->buf, "Maximum allocated btree paths (%u):\n", s->nr_max_paths);
- printbuf_indent_add(&i->buf, 2);
- prt_str_indented(&i->buf, s->max_paths_text);
- printbuf_indent_sub(&i->buf, 2);
+ scoped_guard(printbuf_indent, &i->buf)
+ prt_str_indented(&i->buf, s->max_paths_text);
}
- printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
i->iter++;
}
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index f0ebf91cd5fd..a99f821c6a1c 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -239,10 +239,12 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
c, accounting_key_junk_at_end,
"junk at end of accounting key");
- bkey_fsck_err_on(bch2_accounting_counters(k.k) != bch2_accounting_type_nr_counters[acc_k.type],
+ const unsigned nr_counters = bch2_accounting_counters(k.k);
+
+ bkey_fsck_err_on(!nr_counters || nr_counters > BCH_ACCOUNTING_MAX_COUNTERS,
c, accounting_key_nr_counters_wrong,
"accounting key with %u counters, should be %u",
- bch2_accounting_counters(k.k), bch2_accounting_type_nr_counters[acc_k.type]);
+ nr_counters, bch2_accounting_type_nr_counters[acc_k.type]);
fsck_err:
return ret;
}
@@ -359,10 +361,13 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
accounting_pos_cmp, &a.k->p) < acc->k.nr)
return 0;
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, a.k->p);
+
struct accounting_mem_entry n = {
.pos = a.k->p,
.bversion = a.k->bversion,
- .nr_counters = bch2_accounting_counters(a.k),
+ .nr_counters = bch2_accounting_type_nr_counters[acc_k.type],
.v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64),
sizeof(u64), GFP_KERNEL),
};
@@ -878,46 +883,44 @@ int bch2_accounting_read(struct bch_fs *c)
*dst++ = *i;
keys->gap = keys->nr = dst - keys->data;
- guard(percpu_write)(&c->mark_lock);
-
- darray_for_each_reverse(acc->k, i) {
- struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, i->pos);
+ CLASS(printbuf, underflow_err)();
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- memset(v, 0, sizeof(v));
+ scoped_guard(percpu_write, &c->mark_lock) {
+ darray_for_each_reverse(acc->k, i) {
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, i->pos);
- for (unsigned j = 0; j < i->nr_counters; j++)
- v[j] = percpu_u64_get(i->v[0] + j);
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ memset(v, 0, sizeof(v));
- /*
- * If the entry counters are zeroed, it should be treated as
- * nonexistent - it might point to an invalid device.
- *
- * Remove it, so that if it's re-added it gets re-marked in the
- * superblock:
- */
- ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
- ? -BCH_ERR_remove_disk_accounting_entry
- : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
-
- if (ret == -BCH_ERR_remove_disk_accounting_entry) {
- free_percpu(i->v[0]);
- free_percpu(i->v[1]);
- darray_remove_item(&acc->k, i);
- ret = 0;
- continue;
- }
+ for (unsigned j = 0; j < i->nr_counters; j++)
+ v[j] = percpu_u64_get(i->v[0] + j);
- if (ret)
- return ret;
- }
+ /*
+ * If the entry counters are zeroed, it should be treated as
+ * nonexistent - it might point to an invalid device.
+ *
+ * Remove it, so that if it's re-added it gets re-marked in the
+ * superblock:
+ */
+ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
+ ? -BCH_ERR_remove_disk_accounting_entry
+ : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
+
+ if (ret == -BCH_ERR_remove_disk_accounting_entry) {
+ free_percpu(i->v[0]);
+ free_percpu(i->v[1]);
+ darray_remove_item(&acc->k, i);
+ ret = 0;
+ continue;
+ }
- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
- accounting_pos_cmp, NULL);
+ if (ret)
+ return ret;
+ }
- scoped_guard(preempt) {
- struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
+ accounting_pos_cmp, NULL);
for (unsigned i = 0; i < acc->k.nr; i++) {
struct disk_accounting_pos k;
@@ -939,27 +942,20 @@ int bch2_accounting_read(struct bch_fs *c)
underflow |= (s64) v[j] < 0;
if (underflow) {
- CLASS(printbuf, buf)();
- bch2_log_msg_start(c, &buf);
-
- prt_printf(&buf, "Accounting underflow for\n");
- bch2_accounting_key_to_text(&buf, &k);
+ if (!underflow_err.pos) {
+ bch2_log_msg_start(c, &underflow_err);
+ prt_printf(&underflow_err, "Accounting underflow for\n");
+ }
+ bch2_accounting_key_to_text(&underflow_err, &k);
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
- prt_printf(&buf, " %lli", v[j]);
-
- bool print = bch2_count_fsck_err(c, accounting_key_underflow, &buf);
- unsigned pos = buf.pos;
- ret = bch2_run_explicit_recovery_pass(c, &buf,
- BCH_RECOVERY_PASS_check_allocations, 0);
- print |= buf.pos != pos;
-
- if (print)
- bch2_print_str(c, KERN_ERR, buf.buf);
- if (ret)
- return ret;
+ prt_printf(&underflow_err, " %lli", v[j]);
+ prt_newline(&underflow_err);
}
+ guard(preempt)();
+ struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+
switch (k.type) {
case BCH_DISK_ACCOUNTING_persistent_reserved:
usage->reserved += v[0] * k.persistent_reserved.nr_replicas;
@@ -986,24 +982,60 @@ int bch2_accounting_read(struct bch_fs *c)
}
}
+ if (underflow_err.pos) {
+ bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err);
+ unsigned pos = underflow_err.pos;
+ ret = bch2_run_explicit_recovery_pass(c, &underflow_err,
+ BCH_RECOVERY_PASS_check_allocations, 0);
+ print |= underflow_err.pos != pos;
+
+ if (print)
+ bch2_print_str(c, KERN_ERR, underflow_err.buf);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
-int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev)
+int bch2_dev_usage_remove(struct bch_fs *c, struct bch_dev *ca)
{
CLASS(btree_trans, trans)(c);
+
+ struct disk_accounting_pos start;
+ disk_accounting_key_init(start, dev_data_type, .dev = ca->dev_idx);
+
+ struct disk_accounting_pos end;
+ disk_accounting_key_init(end, dev_data_type, .dev = ca->dev_idx, .data_type = U8_MAX);
+
return bch2_btree_write_buffer_flush_sync(trans) ?:
- for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN,
- BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({
- struct disk_accounting_pos acc;
- bpos_to_disk_accounting_pos(&acc, k.k->p);
-
- acc.type == BCH_DISK_ACCOUNTING_dev_data_type &&
- acc.dev_data_type.dev == dev
- ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0)
- : 0;
- })) ?:
- bch2_btree_write_buffer_flush_sync(trans);
+ commit_do(trans, NULL, NULL, 0, ({
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_accounting,
+ disk_accounting_pos_to_bpos(&start),
+ disk_accounting_pos_to_bpos(&end),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (k.k->type != KEY_TYPE_accounting)
+ continue;
+
+ struct disk_accounting_pos acc;
+ bpos_to_disk_accounting_pos(&acc, k.k->p);
+
+ const unsigned nr = bch2_accounting_counters(k.k);
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ memcpy_u64s_small(v, bkey_s_c_to_accounting(k).v->d, nr);
+
+ bch2_u64s_neg(v, nr);
+
+ ret = bch2_disk_accounting_mod(trans, &acc, v, nr, false);
+ if (ret)
+ break;
+ }
+
+ ret;
+ })) ?: bch2_btree_write_buffer_flush_sync(trans);
}
int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
@@ -1074,13 +1106,17 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
case BCH_DISK_ACCOUNTING_dev_data_type: {
{
guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */
+ const enum bch_data_type data_type = acc_k.dev_data_type.data_type;
struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (!ca)
continue;
- v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets);
- v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors);
- v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented);
+ v[0] = percpu_u64_get(&ca->usage->d[data_type].buckets);
+ v[1] = percpu_u64_get(&ca->usage->d[data_type].sectors);
+ v[2] = percpu_u64_get(&ca->usage->d[data_type].fragmented);
+
+ if (data_type == BCH_DATA_sb || data_type == BCH_DATA_journal)
+ base.hidden += a.v->d[0] * ca->mi.bucket_size;
}
if (memcmp(a.v->d, v, 3 * sizeof(u64))) {
@@ -1108,7 +1144,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
mismatch = true; \
}
- //check(hidden);
+ check(hidden);
check(btree);
check(data);
check(cached);
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index cc73cce98a44..c0d3d7e8fda6 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -186,11 +186,15 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
break;
case BCH_DISK_ACCOUNTING_dev_data_type: {
guard(rcu)();
+ const enum bch_data_type data_type = acc_k.dev_data_type.data_type;
struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (ca) {
- this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]);
- this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]);
- this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]);
+ this_cpu_add(ca->usage->d[data_type].buckets, a.v->d[0]);
+ this_cpu_add(ca->usage->d[data_type].sectors, a.v->d[1]);
+ this_cpu_add(ca->usage->d[data_type].fragmented, a.v->d[2]);
+
+ if (data_type == BCH_DATA_sb || data_type == BCH_DATA_journal)
+ trans->fs_usage_delta.hidden += a.v->d[0] * ca->mi.bucket_size;
}
break;
}
@@ -212,9 +216,9 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
struct accounting_mem_entry *e = &acc->k.data[idx];
- EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters);
+ const unsigned nr = min_t(unsigned, bch2_accounting_counters(a.k), e->nr_counters);
- for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++)
+ for (unsigned i = 0; i < nr; i++)
this_cpu_add(e->v[gc][i], a.v->d[i]);
return 0;
}
@@ -297,7 +301,7 @@ int bch2_gc_accounting_done(struct bch_fs *);
int bch2_accounting_read(struct bch_fs *);
-int bch2_dev_usage_remove(struct bch_fs *, unsigned);
+int bch2_dev_usage_remove(struct bch_fs *, struct bch_dev *);
int bch2_dev_usage_init(struct bch_dev *, bool);
void bch2_verify_accounting_clean(struct bch_fs *c);
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 9e69263eb796..a16f55d98d97 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -468,10 +468,10 @@ int __bch2_fsck_err(struct bch_fs *c,
if ((flags & FSCK_ERR_SILENT) ||
test_bit(err, c->sb.errors_silent)) {
- ret = flags & FSCK_CAN_FIX
+ set_bit(BCH_FS_errors_fixed_silent, &c->flags);
+ return flags & FSCK_CAN_FIX
? bch_err_throw(c, fsck_fix)
: bch_err_throw(c, fsck_ignore);
- goto err;
}
printbuf_indent_add_nextline(out, 2);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 68a61f7bc737..86aa93ea2345 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -1151,7 +1151,7 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke
return NULL;
}
-static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
+static bool want_cached_ptr(struct bch_fs *c, struct bch_inode_opts *opts,
struct bch_extent_ptr *ptr)
{
unsigned target = opts->promote_target ?: opts->foreground_target;
@@ -1165,7 +1165,7 @@ static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
}
void bch2_extent_ptr_set_cached(struct bch_fs *c,
- struct bch_io_opts *opts,
+ struct bch_inode_opts *opts,
struct bkey_s k,
struct bch_extent_ptr *ptr)
{
@@ -1241,7 +1241,7 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
* the promote target.
*/
bool bch2_extent_normalize_by_opts(struct bch_fs *c,
- struct bch_io_opts *opts,
+ struct bch_inode_opts *opts,
struct bkey_s k)
{
struct bkey_ptrs ptrs;
@@ -1270,14 +1270,14 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
if (!ca) {
- prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ prt_printf(out, "%u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
ptr->cached ? " cached" : "");
} else {
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
- prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ prt_printf(out, "%u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
if (ca->mi.durability != 1)
prt_printf(out, " d=%u", ca->mi.durability);
@@ -1295,7 +1295,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
{
- prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
+ prt_printf(out, "c_size %u size %u offset %u nonce %u csum ",
crc->compressed_size,
crc->uncompressed_size,
crc->offset, crc->nonce);
@@ -1305,72 +1305,34 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr
bch2_prt_compression_type(out, crc->compression_type);
}
-static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
- const struct bch_extent_rebalance *r)
-{
- prt_str(out, "rebalance:");
-
- prt_printf(out, " replicas=%u", r->data_replicas);
- if (r->data_replicas_from_inode)
- prt_str(out, " (inode)");
-
- prt_str(out, " checksum=");
- bch2_prt_csum_opt(out, r->data_checksum);
- if (r->data_checksum_from_inode)
- prt_str(out, " (inode)");
-
- if (r->background_compression || r->background_compression_from_inode) {
- prt_str(out, " background_compression=");
- bch2_compression_opt_to_text(out, r->background_compression);
-
- if (r->background_compression_from_inode)
- prt_str(out, " (inode)");
- }
-
- if (r->background_target || r->background_target_from_inode) {
- prt_str(out, " background_target=");
- if (c)
- bch2_target_to_text(out, c, r->background_target);
- else
- prt_printf(out, "%u", r->background_target);
-
- if (r->background_target_from_inode)
- prt_str(out, " (inode)");
- }
-
- if (r->promote_target || r->promote_target_from_inode) {
- prt_str(out, " promote_target=");
- if (c)
- bch2_target_to_text(out, c, r->promote_target);
- else
- prt_printf(out, "%u", r->promote_target);
-
- if (r->promote_target_from_inode)
- prt_str(out, " (inode)");
- }
-
- if (r->erasure_code || r->erasure_code_from_inode) {
- prt_printf(out, " ec=%u", r->erasure_code);
- if (r->erasure_code_from_inode)
- prt_str(out, " (inode)");
- }
-}
+static const char * const extent_entry_types[] = {
+#define x(t, n, ...) [n] = #t,
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+ NULL
+};
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
- bool first = true;
if (c)
prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
+ guard(printbuf_indent)(out);
+
bkey_extent_entry_for_each(ptrs, entry) {
- if (!first)
- prt_printf(out, " ");
+ prt_newline(out);
- switch (__extent_entry_type(entry)) {
+ unsigned type = __extent_entry_type(entry);
+ if (type < BCH_EXTENT_ENTRY_MAX) {
+ prt_str(out, extent_entry_types[__extent_entry_type(entry)]);
+ prt_str(out, ": ");
+ }
+
+ switch (type) {
case BCH_EXTENT_ENTRY_ptr:
bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
break;
@@ -1387,8 +1349,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
case BCH_EXTENT_ENTRY_stripe_ptr: {
const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
- prt_printf(out, "ec: idx %llu block %u",
- (u64) ec->idx, ec->block);
+ prt_printf(out, "idx %llu block %u", (u64) ec->idx, ec->block);
break;
}
case BCH_EXTENT_ENTRY_rebalance:
@@ -1403,8 +1364,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
return;
}
-
- first = false;
}
}
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index f6dcb17108cd..03ea7c689d9a 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -686,10 +686,10 @@ bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
struct bch_extent_ptr *
bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
-void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_io_opts *,
+void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_inode_opts *,
struct bkey_s, struct bch_extent_ptr *);
-bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_io_opts *, struct bkey_s);
+bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_inode_opts *, struct bkey_s);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 45175a478b92..aab30571b056 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -284,12 +284,12 @@ void bch2_readahead(struct readahead_control *ractl)
{
struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch_io_opts opts;
struct folio *folio;
struct readpages_iter readpages_iter;
struct blk_plug plug;
- bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+ struct bch_inode_opts opts;
+ bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts);
int ret = readpages_iter_init(&readpages_iter, ractl);
if (ret)
@@ -350,7 +350,7 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_read_bio *rbio;
- struct bch_io_opts opts;
+ struct bch_inode_opts opts;
struct blk_plug plug;
int ret;
DECLARE_COMPLETION_ONSTACK(done);
@@ -361,7 +361,7 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
if (!bch2_folio_create(folio, GFP_KERNEL))
return -ENOMEM;
- bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+ bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts);
rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read),
c,
@@ -407,7 +407,7 @@ struct bch_writepage_io {
struct bch_writepage_state {
struct bch_writepage_io *io;
- struct bch_io_opts opts;
+ struct bch_inode_opts opts;
struct bch_folio_sector *tmp;
unsigned tmp_sectors;
struct blk_plug plug;
@@ -683,7 +683,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
struct bch_fs *c = mapping->host->i_sb->s_fs_info;
struct bch_writepage_state *w = kzalloc(sizeof(*w), GFP_NOFS|__GFP_NOFAIL);
- bch2_inode_opts_get(&w->opts, c, &to_bch_ei(mapping->host)->ei_inode);
+ bch2_inode_opts_get_inode(c, &to_bch_ei(mapping->host)->ei_inode, &w->opts);
blk_start_plug(&w->plug);
int ret = bch2_write_cache_pages(mapping, wbc, w);
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 79823234160f..a104b9d70bea 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -68,7 +68,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
struct file *file = req->ki_filp;
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch_io_opts opts;
struct dio_read *dio;
struct bio *bio;
struct blk_plug plug;
@@ -78,7 +77,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
size_t shorten;
ssize_t ret;
- bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+ struct bch_inode_opts opts;
+ bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts);
/* bios must be 512 byte aligned: */
if ((offset|iter->count) & (SECTOR_SIZE - 1))
@@ -445,13 +445,13 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
struct kiocb *req = dio->req;
struct address_space *mapping = dio->mapping;
struct bch_inode_info *inode = dio->inode;
- struct bch_io_opts opts;
+ struct bch_inode_opts opts;
struct bio *bio = &dio->op.wbio.bio;
unsigned unaligned, iter_count;
bool sync = dio->sync, dropped_locks;
long ret;
- bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+ bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts);
while (1) {
iter_count = dio->iter.count;
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index de0d965f3fde..57e9459afa07 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -627,10 +627,10 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bpos end_pos = POS(inode->v.i_ino, end_sector);
- struct bch_io_opts opts;
+ struct bch_inode_opts opts;
int ret = 0;
- bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+ bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts);
CLASS(btree_trans, trans)(c);
CLASS(btree_iter, iter)(trans, BTREE_ID_extents,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 4aa130ff7cf6..655ed90b2a39 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -369,9 +369,9 @@ err:
}
int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans,
- u64 inode_nr, u32 snapshot,
- struct bch_inode_unpacked *inode,
- unsigned flags)
+ u64 inode_nr, u32 snapshot,
+ struct bch_inode_unpacked *inode,
+ unsigned flags)
{
CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
@@ -598,7 +598,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
prt_printf(out, "\n");
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "mode=%o\n", inode->bi_mode);
prt_str(out, "flags=");
@@ -620,7 +620,6 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
#undef x
bch2_printbuf_strip_trailing_newline(out);
- printbuf_indent_sub(out, 2);
}
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
@@ -674,7 +673,7 @@ static inline void bkey_inode_flags_set(struct bkey_s k, u64 f)
static inline bool bkey_is_unlinked_inode(struct bkey_s_c k)
{
- unsigned f = bkey_inode_flags(k) & BCH_INODE_unlinked;
+ unsigned f = bkey_inode_flags(k);
return (f & BCH_INODE_unlinked) && !(f & BCH_INODE_has_child_snapshot);
}
@@ -1224,32 +1223,45 @@ struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
return ret;
}
-void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
- struct bch_inode_unpacked *inode)
+void bch2_inode_opts_get_inode(struct bch_fs *c,
+ struct bch_inode_unpacked *inode,
+ struct bch_inode_opts *ret)
{
#define x(_name, _bits) \
if ((inode)->bi_##_name) { \
- opts->_name = inode->bi_##_name - 1; \
- opts->_name##_from_inode = true; \
+ ret->_name = inode->bi_##_name - 1; \
+ ret->_name##_from_inode = true; \
} else { \
- opts->_name = c->opts._name; \
- opts->_name##_from_inode = false; \
+ ret->_name = c->opts._name; \
+ ret->_name##_from_inode = false; \
}
BCH_INODE_OPTS()
#undef x
- bch2_io_opts_fixups(opts);
+ ret->change_cookie = atomic_read(&c->opt_change_cookie);
+
+ bch2_io_opts_fixups(ret);
}
-int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
+int bch2_inum_snapshot_opts_get(struct btree_trans *trans,
+ u64 inum, u32 snapshot,
+ struct bch_inode_opts *opts)
{
- struct bch_inode_unpacked inode;
- int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
+ if (inum) {
+ struct bch_inode_unpacked inode;
+ int ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
+ if (ret)
+ return ret;
- if (ret)
- return ret;
+ bch2_inode_opts_get_inode(trans->c, &inode, opts);
+ } else {
+ /*
+ * data_update_index_update may call us for reflink btree extent
+ * updates, inum will be 0
+ */
- bch2_inode_opts_get(opts, trans->c, &inode);
+ bch2_inode_opts_get(trans->c, opts);
+ }
return 0;
}
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index 79092ea74844..63b7088811fb 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -289,9 +289,8 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *);
void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
-void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
- struct bch_inode_unpacked *);
-int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *);
+void bch2_inode_opts_get_inode(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_opts *);
+int bch2_inum_snapshot_opts_get(struct btree_trans *, u64, u32, struct bch_inode_opts *);
int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *, unsigned);
@@ -300,8 +299,8 @@ int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
static inline struct bch_extent_rebalance
bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode)
{
- struct bch_io_opts io_opts;
- bch2_inode_opts_get(&io_opts, c, inode);
+ struct bch_inode_opts io_opts;
+ bch2_inode_opts_get_inode(c, inode, &io_opts);
return io_opts_to_rebalance_opts(c, &io_opts);
}
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index fa0b06e17d17..04eb5ecd102b 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -24,7 +24,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
subvol_inum inum,
struct btree_iter *iter,
u64 sectors,
- struct bch_io_opts opts,
+ struct bch_inode_opts opts,
s64 *i_sectors_delta,
struct write_point_specifier write_point)
{
@@ -109,7 +109,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
}
ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
- 0, i_sectors_delta, true);
+ 0, i_sectors_delta, true, 0);
err:
if (!ret && sectors_allocated)
bch2_increment_clock(c, sectors_allocated, WRITE);
@@ -211,7 +211,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
bch2_cut_back(end_pos, &delete);
ret = bch2_extent_update(trans, inum, iter, &delete,
- &disk_res, 0, i_sectors_delta, false);
+ &disk_res, 0, i_sectors_delta, false, 0);
bch2_disk_reservation_put(c, &disk_res);
}
@@ -373,7 +373,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
struct btree_iter iter;
struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
- struct bch_io_opts opts;
u64 dst_offset = le64_to_cpu(op->v.dst_offset);
u64 src_offset = le64_to_cpu(op->v.src_offset);
s64 shift = dst_offset - src_offset;
@@ -384,10 +383,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
bool warn_errors = i_sectors_delta != NULL;
int ret = 0;
- ret = bch2_inum_opts_get(trans, inum, &opts);
- if (ret)
- return ret;
-
/*
* check for missing subvolume before fpunch, as in resume we don't want
* it to be a fatal error
@@ -476,8 +471,7 @@ case LOGGED_OP_FINSERT_shift_extents:
op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
- ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?:
- bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
bch2_logged_op_update(trans, &op->k_i) ?:
bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc);
diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h
index b93e4d4b3c0c..6a294f2a6dd6 100644
--- a/fs/bcachefs/io_misc.h
+++ b/fs/bcachefs/io_misc.h
@@ -3,7 +3,7 @@
#define _BCACHEFS_IO_MISC_H
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
- u64, struct bch_io_opts, s64 *,
+ u64, struct bch_inode_opts, s64 *,
struct write_point_specifier);
int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos);
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index bae2e181c9ed..7066be2701c0 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -158,7 +158,7 @@ static bool ptr_being_rewritten(struct bch_read_bio *orig, unsigned dev)
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bpos pos,
- struct bch_io_opts opts,
+ struct bch_inode_opts opts,
unsigned flags,
struct bch_io_failures *failed)
{
@@ -408,9 +408,8 @@ void bch2_promote_op_to_text(struct printbuf *out,
{
if (!op->write.read_done) {
prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
bch2_read_bio_to_text(out, c, op->write.rbio.parent);
- printbuf_indent_sub(out, 2);
}
bch2_data_update_to_text(out, &op->write);
@@ -1519,7 +1518,7 @@ void bch2_read_bio_to_text(struct printbuf *out,
/* Are we in a retry? */
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
u64 now = local_clock();
prt_printf(out, "start_time:\t");
@@ -1553,7 +1552,6 @@ void bch2_read_bio_to_text(struct printbuf *out,
prt_newline(out);
bch2_bio_to_text(out, &rbio->bio);
- printbuf_indent_sub(out, 2);
}
void bch2_fs_io_read_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index 1e1c0476bd03..df4632f6fe9e 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -74,7 +74,7 @@ struct bch_read_bio {
struct bpos data_pos;
struct bversion version;
- struct bch_io_opts opts;
+ struct bch_inode_opts opts;
struct work_struct work;
@@ -192,7 +192,7 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio,
static inline struct bch_read_bio *rbio_init(struct bio *bio,
struct bch_fs *c,
- struct bch_io_opts opts,
+ struct bch_inode_opts opts,
bio_end_io_t end_io)
{
struct bch_read_bio *rbio = to_rbio(bio);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 1d83dcc9731e..6a5da02ce266 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -205,7 +205,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
struct btree_iter *extent_iter,
u64 new_i_size,
- s64 i_sectors_delta)
+ s64 i_sectors_delta,
+ struct bch_inode_unpacked *inode_u)
{
/*
* Crazy performance optimization:
@@ -227,7 +228,13 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
BTREE_ITER_intent|
BTREE_ITER_cached);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
- int ret = bkey_err(k);
+
+ /*
+ * XXX: we currently need to unpack the inode on every write because we
+ * need the current io_opts, for transactional consistency - inode_v4?
+ */
+ int ret = bkey_err(k) ?:
+ bch2_inode_unpack(k, inode_u);
if (unlikely(ret))
return ret;
@@ -303,8 +310,10 @@ int bch2_extent_update(struct btree_trans *trans,
struct disk_reservation *disk_res,
u64 new_i_size,
s64 *i_sectors_delta_total,
- bool check_enospc)
+ bool check_enospc,
+ u32 change_cookie)
{
+ struct bch_fs *c = trans->c;
struct bpos next_pos;
bool usage_increasing;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
@@ -335,7 +344,7 @@ int bch2_extent_update(struct btree_trans *trans,
if (disk_res &&
disk_sectors_delta > (s64) disk_res->sectors) {
- ret = bch2_disk_reservation_add(trans->c, disk_res,
+ ret = bch2_disk_reservation_add(c, disk_res,
disk_sectors_delta - disk_res->sectors,
!check_enospc || !usage_increasing
? BCH_DISK_RESERVATION_NOFAIL : 0);
@@ -349,9 +358,16 @@ int bch2_extent_update(struct btree_trans *trans,
* aren't changing - for fsync to work properly; fsync relies on
* inode->bi_journal_seq which is updated by the trigger code:
*/
+ struct bch_inode_unpacked inode;
+ struct bch_inode_opts opts;
+
ret = bch2_extent_update_i_size_sectors(trans, iter,
min(k->k.p.offset << 9, new_i_size),
- i_sectors_delta) ?:
+ i_sectors_delta, &inode) ?:
+ (bch2_inode_opts_get_inode(c, &inode, &opts),
+ bch2_bkey_set_needs_rebalance(c, &opts, k,
+ SET_NEEDS_REBALANCE_foreground,
+ change_cookie)) ?:
bch2_trans_update(trans, iter, k, 0) ?:
bch2_trans_commit(trans, disk_res, NULL,
BCH_TRANS_COMMIT_no_check_rw|
@@ -402,7 +418,8 @@ static int bch2_write_index_default(struct bch_write_op *op)
ret = bch2_extent_update(trans, inum, &iter, sk.k,
&op->res,
op->new_i_size, &op->i_sectors_delta,
- op->flags & BCH_WRITE_check_enospc);
+ op->flags & BCH_WRITE_check_enospc,
+ op->opts.change_cookie);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
@@ -792,10 +809,6 @@ static void init_append_extent(struct bch_write_op *op,
bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
op->flags & BCH_WRITE_cached);
-
- if (!(op->flags & BCH_WRITE_move))
- bch2_bkey_set_needs_rebalance(op->c, &op->opts, &e->k_i);
-
bch2_keylist_push(&op->insert_keys);
}
@@ -1225,6 +1238,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
return 0;
}
+ struct bch_fs *c = trans->c;
struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans,
bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance));
int ret = PTR_ERR_OR_ZERO(new);
@@ -1239,8 +1253,6 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
bkey_for_each_ptr(ptrs, ptr)
ptr->unwritten = 0;
- bch2_bkey_set_needs_rebalance(op->c, &op->opts, new);
-
/*
* Note that we're not calling bch2_subvol_get_snapshot() in this path -
* that was done when we kicked off the write, and here it's important
@@ -1248,8 +1260,20 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
* since been created. The write is still outstanding, so we're ok
* w.r.t. snapshot atomicity:
*/
+
+ /*
+ * For transactional consistency, set_needs_rebalance() has to be called
+ * with the io_opts from the btree in the same transaction:
+ */
+ struct bch_inode_unpacked inode;
+ struct bch_inode_opts opts;
+
return bch2_extent_update_i_size_sectors(trans, iter,
- min(new->k.p.offset << 9, new_i_size), 0) ?:
+ min(new->k.p.offset << 9, new_i_size), 0, &inode) ?:
+ (bch2_inode_opts_get_inode(c, &inode, &opts),
+ bch2_bkey_set_needs_rebalance(c, &opts, new,
+ SET_NEEDS_REBALANCE_foreground,
+ op->opts.change_cookie)) ?:
bch2_trans_update(trans, iter, new,
BTREE_UPDATE_internal_snapshot_node);
}
@@ -1742,7 +1766,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
prt_printf(out, "pos:\t");
bch2_bpos_to_text(out, op->pos);
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "started:\t");
bch2_pr_time_units(out, local_clock() - op->start_time);
@@ -1754,11 +1778,12 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
prt_printf(out, "nr_replicas:\t%u\n", op->nr_replicas);
prt_printf(out, "nr_replicas_required:\t%u\n", op->nr_replicas_required);
+ prt_printf(out, "devs_have:\t");
+ bch2_devs_list_to_text(out, &op->devs_have);
+ prt_newline(out);
prt_printf(out, "ref:\t%u\n", closure_nr_remaining(&op->cl));
prt_printf(out, "ret\t%s\n", bch2_err_str(op->error));
-
- printbuf_indent_sub(out, 2);
}
void bch2_fs_io_write_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index 2c0a8f35ee1f..692529bf401d 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -28,10 +28,10 @@ int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, subvol_inum,
struct btree_iter *, struct bkey_i *,
- struct disk_reservation *, u64, s64 *, bool);
+ struct disk_reservation *, u64, s64 *, bool, u32);
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
- struct bch_io_opts opts)
+ struct bch_inode_opts opts)
{
op->c = c;
op->end_io = NULL;
diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h
index 5da4eb8bb6f6..ab36b03e0a46 100644
--- a/fs/bcachefs/io_write_types.h
+++ b/fs/bcachefs/io_write_types.h
@@ -90,7 +90,7 @@ struct bch_write_op {
struct bch_devs_list devs_have;
u16 target;
u16 nonce;
- struct bch_io_opts opts;
+ struct bch_inode_opts opts;
u32 subvol;
struct bpos pos;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 93ac0faedf7d..6505c79f8516 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -48,7 +48,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
struct journal_buf *buf = j->buf + i;
prt_printf(out, "seq:\t%llu\n", seq);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
if (!buf->write_started)
prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK));
@@ -81,8 +81,6 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
if (buf->write_done)
prt_str(out, "write_done");
prt_newline(out);
-
- printbuf_indent_sub(out, 2);
}
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
@@ -1767,20 +1765,20 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
bch2_journal_bufs_to_text(out, j);
prt_printf(out, "space:\n");
- printbuf_indent_add(out, 2);
- prt_printf(out, "discarded\t%u:%u\n",
- j->space[journal_space_discarded].next_entry,
- j->space[journal_space_discarded].total);
- prt_printf(out, "clean ondisk\t%u:%u\n",
- j->space[journal_space_clean_ondisk].next_entry,
- j->space[journal_space_clean_ondisk].total);
- prt_printf(out, "clean\t%u:%u\n",
- j->space[journal_space_clean].next_entry,
- j->space[journal_space_clean].total);
- prt_printf(out, "total\t%u:%u\n",
- j->space[journal_space_total].next_entry,
- j->space[journal_space_total].total);
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out) {
+ prt_printf(out, "discarded\t%u:%u\n",
+ j->space[journal_space_discarded].next_entry,
+ j->space[journal_space_discarded].total);
+ prt_printf(out, "clean ondisk\t%u:%u\n",
+ j->space[journal_space_clean_ondisk].next_entry,
+ j->space[journal_space_clean_ondisk].total);
+ prt_printf(out, "clean\t%u:%u\n",
+ j->space[journal_space_clean].next_entry,
+ j->space[journal_space_clean].total);
+ prt_printf(out, "total\t%u:%u\n",
+ j->space[journal_space_total].next_entry,
+ j->space[journal_space_total].total);
+ }
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
if (!ca->mi.durability)
@@ -1796,7 +1794,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "dev %u:\n", ca->dev_idx);
prt_printf(out, "durability %u:\n", ca->mi.durability);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "nr\t%u\n", ja->nr);
prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size);
prt_printf(out, "available\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free);
@@ -1804,7 +1802,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "dirty_ondisk\t%u (seq %llu)\n",ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]);
prt_printf(out, "dirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]);
prt_printf(out, "cur_idx\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
- printbuf_indent_sub(out, 2);
}
prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 44328d02cf67..e6f778bf7763 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -760,8 +760,8 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
return;
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
+ guard(printbuf_indent)(out);
- printbuf_indent_add(out, 2);
for (i = 0; i < nr_types; i++) {
prt_newline(out);
bch2_prt_data_type(out, i);
@@ -770,7 +770,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
le64_to_cpu(u->d[i].sectors),
le64_to_cpu(u->d[i].fragmented));
}
- printbuf_indent_sub(out, 2);
}
static int journal_entry_log_validate(struct bch_fs *c,
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index bd1885607d3e..ae747c87fcf9 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -1019,7 +1019,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
pin_list = journal_seq_pin(j, *seq);
prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "unflushed:\n");
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++)
@@ -1031,8 +1031,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
list_for_each_entry(pin, &pin_list->flushed[i], list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);
- printbuf_indent_sub(out, 2);
-
return false;
}
diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c
index c5d09fb5907d..dc0ecedb3a8f 100644
--- a/fs/bcachefs/journal_sb.c
+++ b/fs/bcachefs/journal_sb.c
@@ -230,3 +230,40 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(dst + 1 != nr_compacted);
return 0;
}
+
+static inline bool journal_v2_unsorted(struct bch_sb_field_journal_v2 *j)
+{
+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(j);
+ for (unsigned i = 0; i + 1 < nr; i++)
+ if (le64_to_cpu(j->d[i].start) > le64_to_cpu(j->d[i + 1].start))
+ return true;
+ return false;
+}
+
+int bch2_sb_journal_sort(struct bch_fs *c)
+{
+ BUG_ON(!c->sb.clean);
+ BUG_ON(test_bit(BCH_FS_rw, &c->flags));
+
+ guard(mutex)(&c->sb_lock);
+ bool write_sb = false;
+
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_journal_sort) {
+ struct bch_sb_field_journal_v2 *j = bch2_sb_field_get(ca->disk_sb.sb, journal_v2);
+ if (!j)
+ continue;
+
+ if ((j && journal_v2_unsorted(j)) ||
+ bch2_sb_field_get(ca->disk_sb.sb, journal)) {
+ struct journal_device *ja = &ca->journal;
+
+ sort(ja->buckets, ja->nr, sizeof(ja->buckets[0]), u64_cmp, NULL);
+ bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr);
+ write_sb = true;
+ }
+ }
+
+ return write_sb
+ ? bch2_write_super(c)
+ : 0;
+}
diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h
index ba40a7e8d90a..e0fc40652607 100644
--- a/fs/bcachefs/journal_sb.h
+++ b/fs/bcachefs/journal_sb.h
@@ -22,3 +22,4 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned);
+int bch2_sb_journal_sort(struct bch_fs *);
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index b9c0834498dd..c533b60706bf 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -51,25 +51,17 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
: 0;
}
-int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
+static int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
{
- return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
-}
-
-int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
-{
- return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, true);
}
int __bch2_lru_change(struct btree_trans *trans,
u16 lru_id, u64 dev_bucket,
u64 old_time, u64 new_time)
{
- if (old_time == new_time)
- return 0;
-
- return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
- bch2_lru_set(trans, lru_id, dev_bucket, new_time);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?:
+ __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true);
}
static const char * const bch2_lru_types[] = {
@@ -87,7 +79,6 @@ int bch2_lru_check_set(struct btree_trans *trans,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- CLASS(printbuf, buf)();
CLASS(btree_iter, lru_iter)(trans, BTREE_ID_lru, lru_pos(lru_id, dev_bucket, time), 0);
struct bkey_s_c lru_k = bch2_btree_iter_peek_slot(&lru_iter);
int ret = bkey_err(lru_k);
@@ -99,10 +90,13 @@ int bch2_lru_check_set(struct btree_trans *trans,
if (ret)
return ret;
- if (fsck_err(trans, alloc_key_to_missing_lru_entry,
- "missing %s lru entry\n%s",
- bch2_lru_types[lru_type(lru_k)],
- (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "missing %s lru entry at pos ", bch2_lru_types[lru_type(lru_k)]);
+ bch2_bpos_to_text(&buf, lru_iter.pos);
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, c, referring_k);
+
+ if (fsck_err(trans, alloc_key_to_missing_lru_entry, "%s", buf.buf)) {
ret = bch2_lru_set(trans, lru_id, dev_bucket, time);
if (ret)
return ret;
@@ -127,6 +121,23 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k)
}
}
+int bch2_dev_remove_lrus(struct bch_fs *c, struct bch_dev *ca)
+{
+ CLASS(btree_trans, trans)(c);
+ int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ for_each_btree_key(trans, iter,
+ BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, ({
+ struct bbpos bp = lru_pos_to_bp(k);
+
+ bp.btree == BTREE_ID_alloc && bp.pos.inode == ca->dev_idx
+ ? (bch2_btree_delete_at(trans, &iter, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0))
+ : 0;
+ }));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
static u64 bkey_lru_type_idx(struct bch_fs *c,
enum bch_lru_type type,
struct bkey_s_c k)
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index 6f1e0a7b5db5..d5a2620f2507 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -59,8 +59,6 @@ void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
.min_val_size = 8, \
})
-int bch2_lru_del(struct btree_trans *, u16, u64, u64);
-int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int __bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
static inline int bch2_lru_change(struct btree_trans *trans,
@@ -72,9 +70,10 @@ static inline int bch2_lru_change(struct btree_trans *trans,
: 0;
}
+int bch2_dev_remove_lrus(struct bch_fs *, struct bch_dev *);
+
struct bkey_buf;
int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c, struct bkey_buf *);
-
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 5b4c3f4b1c25..8a3981e1016e 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -126,8 +126,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
{
CLASS(btree_trans, trans)(c);
+ /* FIXME: this does not handle unknown btrees with data pointers */
for (unsigned id = 0; id < BTREE_ID_NR; id++) {
- if (!btree_type_has_ptrs(id))
+ if (!btree_type_has_data_ptrs(id))
continue;
/* Stripe keys have pointers, but are handled separately */
@@ -167,7 +168,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
bch2_bkey_buf_init(&k);
closure_init_stack(&cl);
- for (id = 0; id < BTREE_ID_NR; id++) {
+ for (id = 0; id < btree_id_nr_alive(c); id++) {
bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
BTREE_ITER_prefetch);
retry:
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 101658cbe95a..9a440d3f7180 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -46,12 +46,12 @@ struct evacuate_bucket_arg {
static bool evacuate_bucket_pred(struct bch_fs *, void *,
enum btree_id, struct bkey_s_c,
- struct bch_io_opts *,
+ struct bch_inode_opts *,
struct data_update_opts *);
static noinline void
trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
CLASS(printbuf, buf)();
@@ -72,7 +72,7 @@ static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
static noinline void
trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts,
move_pred_fn pred, void *_arg, bool p)
{
@@ -327,7 +327,7 @@ int bch2_move_extent(struct moving_context *ctxt,
struct move_bucket *bucket_in_flight,
struct btree_iter *iter,
struct bkey_s_c k,
- struct bch_io_opts io_opts,
+ struct bch_inode_opts io_opts,
struct data_update_opts data_opts)
{
struct btree_trans *trans = ctxt->trans;
@@ -451,93 +451,6 @@ err:
return ret;
}
-struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
- struct per_snapshot_io_opts *io_opts,
- struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
- struct btree_iter *extent_iter,
- struct bkey_s_c extent_k)
-{
- struct bch_fs *c = trans->c;
- u32 restart_count = trans->restart_count;
- struct bch_io_opts *opts_ret = &io_opts->fs_io_opts;
- int ret = 0;
-
- if (btree_iter_path(trans, extent_iter)->level)
- return opts_ret;
-
- if (extent_k.k->type == KEY_TYPE_reflink_v)
- goto out;
-
- if (io_opts->cur_inum != extent_pos.inode) {
- io_opts->d.nr = 0;
-
- ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
- BTREE_ITER_all_snapshots, k, ({
- if (k.k->p.offset != extent_pos.inode)
- break;
-
- if (!bkey_is_inode(k.k))
- continue;
-
- struct bch_inode_unpacked inode;
- _ret3 = bch2_inode_unpack(k, &inode);
- if (_ret3)
- break;
-
- struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
- bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
-
- darray_push(&io_opts->d, e);
- }));
- io_opts->cur_inum = extent_pos.inode;
- }
-
- ret = ret ?: trans_was_restarted(trans, restart_count);
- if (ret)
- return ERR_PTR(ret);
-
- if (extent_k.k->p.snapshot)
- darray_for_each(io_opts->d, i)
- if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) {
- opts_ret = &i->io_opts;
- break;
- }
-out:
- ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k);
- if (ret)
- return ERR_PTR(ret);
- return opts_ret;
-}
-
-int bch2_move_get_io_opts_one(struct btree_trans *trans,
- struct bch_io_opts *io_opts,
- struct btree_iter *extent_iter,
- struct bkey_s_c extent_k)
-{
- struct bch_fs *c = trans->c;
-
- *io_opts = bch2_opts_to_inode_opts(c->opts);
-
- /* reflink btree? */
- if (extent_k.k->p.inode) {
- CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes,
- SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
- BTREE_ITER_cached);
- struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter);
- int ret = bkey_err(inode_k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- return ret;
-
- if (!ret && bkey_is_inode(inode_k.k)) {
- struct bch_inode_unpacked inode;
- bch2_inode_unpack(inode_k, &inode);
- bch2_inode_opts_get(io_opts, c, &inode);
- }
- }
-
- return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k);
-}
-
int bch2_move_ratelimit(struct moving_context *ctxt)
{
struct bch_fs *c = ctxt->trans->c;
@@ -582,37 +495,6 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
return 0;
}
-/*
- * Move requires non extents iterators, and there's also no need for it to
- * signal indirect_extent_missing_error:
- */
-static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c_reflink_p p)
-{
- if (unlikely(REFLINK_P_ERROR(p.v)))
- return bkey_s_c_null;
-
- struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v));
-
- bch2_trans_iter_init(trans, iter,
- BTREE_ID_reflink, reflink_pos,
- BTREE_ITER_not_extents);
-
- struct bkey_s_c k = bch2_btree_iter_peek(iter);
- if (!k.k || bkey_err(k)) {
- bch2_trans_iter_exit(iter);
- return k;
- }
-
- if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) {
- bch2_trans_iter_exit(iter);
- return bkey_s_c_null;
- }
-
- return k;
-}
-
int bch2_move_data_btree(struct moving_context *ctxt,
struct bpos start,
struct bpos end,
@@ -622,17 +504,11 @@ int bch2_move_data_btree(struct moving_context *ctxt,
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
struct per_snapshot_io_opts snapshot_io_opts;
- struct bch_io_opts *io_opts;
+ struct bch_inode_opts *io_opts;
struct bkey_buf sk;
struct btree_iter iter, reflink_iter = {};
struct bkey_s_c k;
struct data_update_opts data_opts;
- /*
- * If we're moving a single file, also process reflinked data it points
- * to (this includes propagating changed io_opts from the inode to the
- * extent):
- */
- bool walk_indirect = start.inode == end.inode;
int ret = 0, ret2;
per_snapshot_io_opts_init(&snapshot_io_opts, c);
@@ -697,8 +573,6 @@ root_err:
bch2_ratelimit_reset(ctxt->rate);
while (!bch2_move_ratelimit(ctxt)) {
- struct btree_iter *extent_iter = &iter;
-
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
@@ -717,41 +591,18 @@ root_err:
if (ctxt->stats)
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
- if (walk_indirect &&
- k.k->type == KEY_TYPE_reflink_p &&
- REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
- struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-
- bch2_trans_iter_exit(&reflink_iter);
- k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p);
- ret = bkey_err(k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- break;
-
- if (!k.k)
- goto next_nondata;
-
- /*
- * XXX: reflink pointers may point to multiple indirect
- * extents, so don't advance past the entire reflink
- * pointer - need to fixup iter->k
- */
- extent_iter = &reflink_iter;
- }
-
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
- iter.pos, extent_iter, k);
+ io_opts = bch2_extent_get_apply_io_opts(trans, &snapshot_io_opts,
+ iter.pos, &iter, k,
+ SET_NEEDS_REBALANCE_other);
ret = PTR_ERR_OR_ZERO(io_opts);
if (ret)
continue;
memset(&data_opts, 0, sizeof(data_opts));
- if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts))
+ if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts))
goto next;
/*
@@ -762,7 +613,7 @@ root_err:
k = bkey_i_to_s_c(sk.k);
if (!level)
- ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
+ ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
else if (!data_opts.scrub)
ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level,
k.k->p, data_opts.target, 0);
@@ -824,7 +675,7 @@ static int bch2_move_data(struct bch_fs *c,
unsigned min_depth_this_btree = min_depth;
/* Stripe keys have pointers, but are handled separately */
- if (!btree_type_has_ptrs(id) ||
+ if (!btree_type_has_data_ptrs(id) ||
id == BTREE_ID_stripes)
min_depth_this_btree = max(min_depth_this_btree, 1);
@@ -859,7 +710,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
bool is_kthread = current->flags & PF_KTHREAD;
- struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_iter iter = {};
struct bkey_buf sk;
struct bkey_s_c k;
@@ -867,7 +717,11 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
u64 check_mismatch_done = bucket_start;
int ret = 0;
- CLASS(bch2_dev_tryget, ca)(c, dev);
+ struct bch_inode_opts io_opts;
+ bch2_inode_opts_get(c, &io_opts);
+
+ /* Userspace might have supplied @dev: */
+ CLASS(bch2_dev_tryget_noerror, ca)(c, dev);
if (!ca)
return 0;
@@ -941,7 +795,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
goto next;
if (!bp.v->level) {
- ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k);
+ ret = bch2_extent_get_apply_io_opts_one(trans, &io_opts, &iter, k,
+ SET_NEEDS_REBALANCE_other);
if (ret) {
bch2_trans_iter_exit(&iter);
continue;
@@ -1038,7 +893,7 @@ int bch2_move_data_phys(struct bch_fs *c,
static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg,
enum btree_id btree, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct evacuate_bucket_arg *arg = _arg;
@@ -1079,7 +934,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
}
typedef bool (*move_btree_pred)(struct bch_fs *, void *,
- struct btree *, struct bch_io_opts *,
+ struct btree *, struct bch_inode_opts *,
struct data_update_opts *);
static int bch2_move_btree(struct bch_fs *c,
@@ -1089,7 +944,6 @@ static int bch2_move_btree(struct bch_fs *c,
struct bch_move_stats *stats)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
- struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct moving_context ctxt;
struct btree_trans *trans;
struct btree_iter iter;
@@ -1098,6 +952,9 @@ static int bch2_move_btree(struct bch_fs *c,
struct data_update_opts data_opts;
int ret = 0;
+ struct bch_inode_opts io_opts;
+ bch2_inode_opts_get(c, &io_opts);
+
bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
writepoint_ptr(&c->btree_write_point),
true);
@@ -1158,7 +1015,7 @@ next:
static bool rereplicate_pred(struct bch_fs *c, void *arg,
enum btree_id btree, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
unsigned nr_good = bch2_bkey_durability(c, k);
@@ -1189,7 +1046,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg,
static bool migrate_pred(struct bch_fs *c, void *arg,
enum btree_id btree, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -1226,7 +1083,7 @@ static bool bformat_needs_redo(struct bkey_format *f)
static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
struct btree *b,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
if (b->version_ondisk != c->sb.version ||
@@ -1263,7 +1120,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
enum btree_id btree, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
unsigned durability = bch2_bkey_durability(c, k);
@@ -1301,7 +1158,7 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
static bool scrub_pred(struct bch_fs *c, void *_arg,
enum btree_id btree, struct bkey_s_c k,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct bch_ioctl_data *arg = _arg;
@@ -1404,7 +1261,7 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
prt_str(out, " pos=");
bch2_bbpos_to_text(out, stats->pos);
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved));
prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced));
@@ -1419,8 +1276,6 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
prt_printf(out, "bytes raced:\t");
prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
prt_newline(out);
-
- printbuf_indent_sub(out, 2);
}
static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
@@ -1429,7 +1284,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
printbuf_tabstop_push(out, 32);
bch2_move_stats_to_text(out, ctxt->stats);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
prt_printf(out, "reads: ios %u/%u sectors %u/%u\n",
atomic_read(&ctxt->read_ios),
@@ -1443,15 +1298,13 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
atomic_read(&ctxt->write_sectors),
c->opts.move_bytes_in_flight >> 9);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
scoped_guard(mutex, &ctxt->lock) {
struct moving_io *io;
list_for_each_entry(io, &ctxt->ios, io_list)
bch2_data_update_inflight_to_text(out, &io->write);
}
-
- printbuf_indent_sub(out, 4);
}
void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 481026ff99ab..754b0ad45950 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -73,7 +73,7 @@ do { \
} while (1)
typedef bool (*move_pred_fn)(struct bch_fs *, void *, enum btree_id, struct bkey_s_c,
- struct bch_io_opts *, struct data_update_opts *);
+ struct bch_inode_opts *, struct data_update_opts *);
extern const char * const bch2_data_ops_strs[];
@@ -87,45 +87,15 @@ void bch2_moving_ctxt_flush_all(struct moving_context *);
void bch2_move_ctxt_wait_for_io(struct moving_context *);
int bch2_move_ratelimit(struct moving_context *);
-/* Inodes in different snapshots may have different IO options: */
-struct snapshot_io_opts_entry {
- u32 snapshot;
- struct bch_io_opts io_opts;
-};
-
-struct per_snapshot_io_opts {
- u64 cur_inum;
- struct bch_io_opts fs_io_opts;
- DARRAY(struct snapshot_io_opts_entry) d;
-};
-
-static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
-{
- memset(io_opts, 0, sizeof(*io_opts));
- io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
-}
-
-static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
-{
- darray_exit(&io_opts->d);
-}
-
-int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *,
- struct btree_iter *, struct bkey_s_c);
-
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
int bch2_move_extent(struct moving_context *,
struct move_bucket *,
struct btree_iter *,
struct bkey_s_c,
- struct bch_io_opts,
+ struct bch_inode_opts,
struct data_update_opts);
-struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
- struct per_snapshot_io_opts *, struct bpos,
- struct btree_iter *, struct bkey_s_c);
-
int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos,
move_pred_fn, void *, enum btree_id, unsigned);
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index c3ef35dc01e2..122bc98e4cbb 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -518,7 +518,7 @@ void bch2_opts_to_text(struct printbuf *out,
}
}
-int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id id, u64 v)
+int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, u64 v)
{
int ret = 0;
@@ -531,6 +531,8 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id
case Opt_compression:
case Opt_background_compression:
ret = bch2_check_set_has_compressed_data(c, v);
+ if (ret)
+ return ret;
break;
case Opt_erasure_code:
if (v)
@@ -546,7 +548,7 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id
int bch2_opts_hooks_pre_set(struct bch_fs *c)
{
for (unsigned i = 0; i < bch2_opts_nr; i++) {
- int ret = bch2_opt_hook_pre_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i));
+ int ret = bch2_opt_hook_pre_set(c, NULL, 0, i, bch2_opt_get_by_id(&c->opts, i));
if (ret)
return ret;
}
@@ -555,26 +557,15 @@ int bch2_opts_hooks_pre_set(struct bch_fs *c)
}
void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
- struct bch_opts *new_opts, enum bch_opt_id id)
+ enum bch_opt_id id, u64 v)
{
switch (id) {
case Opt_foreground_target:
- if (new_opts->foreground_target &&
- !new_opts->background_target)
- bch2_set_rebalance_needs_scan(c, inum);
- break;
case Opt_compression:
- if (new_opts->compression &&
- !new_opts->background_compression)
- bch2_set_rebalance_needs_scan(c, inum);
- break;
case Opt_background_target:
- if (new_opts->background_target)
- bch2_set_rebalance_needs_scan(c, inum);
- break;
case Opt_background_compression:
- if (new_opts->background_compression)
- bch2_set_rebalance_needs_scan(c, inum);
+ bch2_set_rebalance_needs_scan(c, inum);
+ bch2_rebalance_wakeup(c);
break;
case Opt_rebalance_enabled:
bch2_rebalance_wakeup(c);
@@ -600,12 +591,14 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
* upgrades at runtime as well, but right now there's nothing
* that does that:
*/
- if (new_opts->version_upgrade == BCH_VERSION_UPGRADE_incompatible)
+ if (v == BCH_VERSION_UPGRADE_incompatible)
bch2_sb_upgrade_incompat(c);
break;
default:
break;
}
+
+ atomic_inc(&c->opt_change_cookie);
}
int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
@@ -802,16 +795,17 @@ bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca,
/* io opts: */
-struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
+void bch2_inode_opts_get(struct bch_fs *c, struct bch_inode_opts *ret)
{
- struct bch_io_opts opts = {
-#define x(_name, _bits) ._name = src._name,
+ memset(ret, 0, sizeof(*ret));
+
+#define x(_name, _bits) ret->_name = c->opts._name,
BCH_INODE_OPTS()
#undef x
- };
- bch2_io_opts_fixups(&opts);
- return opts;
+ ret->change_cookie = atomic_read(&c->opt_change_cookie);
+
+ bch2_io_opts_fixups(ret);
}
bool bch2_opt_is_inode_opt(enum bch_opt_id id)
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index f8828f4699c7..22cf109fb9c9 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -658,10 +658,9 @@ void bch2_opts_to_text(struct printbuf *,
struct bch_fs *, struct bch_sb *,
unsigned, unsigned, unsigned);
-int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, enum bch_opt_id, u64);
+int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64);
int bch2_opts_hooks_pre_set(struct bch_fs *);
-void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64,
- struct bch_opts *, enum bch_opt_id);
+void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64);
int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *,
struct printbuf *, const char *, const char *);
@@ -670,16 +669,19 @@ int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *,
/* inode opts: */
-struct bch_io_opts {
+struct bch_inode_opts {
#define x(_name, _bits) u##_bits _name;
BCH_INODE_OPTS()
#undef x
+
#define x(_name, _bits) u64 _name##_from_inode:1;
BCH_INODE_OPTS()
#undef x
+
+ u32 change_cookie;
};
-static inline void bch2_io_opts_fixups(struct bch_io_opts *opts)
+static inline void bch2_io_opts_fixups(struct bch_inode_opts *opts)
{
if (!opts->background_target)
opts->background_target = opts->foreground_target;
@@ -692,7 +694,7 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts)
}
}
-struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
+void bch2_inode_opts_get(struct bch_fs *, struct bch_inode_opts *);
bool bch2_opt_is_inode_opt(enum bch_opt_id);
#endif /* _BCACHEFS_OPTS_H */
diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
index 907e5c97550b..5fa5265d7ba8 100644
--- a/fs/bcachefs/printbuf.h
+++ b/fs/bcachefs/printbuf.h
@@ -299,4 +299,18 @@ DEFINE_GUARD(printbuf_atomic, struct printbuf *,
printbuf_atomic_inc(_T),
printbuf_atomic_dec(_T));
+static inline void printbuf_indent_add_2(struct printbuf *out)
+{
+ bch2_printbuf_indent_add(out, 2);
+}
+
+static inline void printbuf_indent_sub_2(struct printbuf *out)
+{
+ bch2_printbuf_indent_sub(out, 2);
+}
+
+DEFINE_GUARD(printbuf_indent, struct printbuf *,
+ printbuf_indent_add_2(_T),
+ printbuf_indent_sub_2(_T));
+
#endif /* _BCACHEFS_PRINTBUF_H */
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
index 792fc6fef270..541ee951d1c9 100644
--- a/fs/bcachefs/progress.c
+++ b/fs/bcachefs/progress.c
@@ -12,7 +12,7 @@ void bch2_progress_init(struct progress_indicator_state *s,
s->next_print = jiffies + HZ * 10;
- for (unsigned i = 0; i < BTREE_ID_NR; i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
if (!(btree_id_mask & BIT_ULL(i)))
continue;
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 25bf72dc6488..fa73de7890da 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -43,8 +43,57 @@ static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s
return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k));
}
+void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
+ const struct bch_extent_rebalance *r)
+{
+ prt_printf(out, "replicas=%u", r->data_replicas);
+ if (r->data_replicas_from_inode)
+ prt_str(out, " (inode)");
+
+ prt_str(out, " checksum=");
+ bch2_prt_csum_opt(out, r->data_checksum);
+ if (r->data_checksum_from_inode)
+ prt_str(out, " (inode)");
+
+ if (r->background_compression || r->background_compression_from_inode) {
+ prt_str(out, " background_compression=");
+ bch2_compression_opt_to_text(out, r->background_compression);
+
+ if (r->background_compression_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->background_target || r->background_target_from_inode) {
+ prt_str(out, " background_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->background_target);
+ else
+ prt_printf(out, "%u", r->background_target);
+
+ if (r->background_target_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->promote_target || r->promote_target_from_inode) {
+ prt_str(out, " promote_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->promote_target);
+ else
+ prt_printf(out, "%u", r->promote_target);
+
+ if (r->promote_target_from_inode)
+ prt_str(out, " (inode)");
+ }
+
+ if (r->erasure_code || r->erasure_code_from_inode) {
+ prt_printf(out, " ec=%u", r->erasure_code);
+ if (r->erasure_code_from_inode)
+ prt_str(out, " (inode)");
+ }
+}
+
static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c,
- struct bch_io_opts *opts,
+ struct bch_inode_opts *opts,
struct bkey_s_c k,
struct bkey_ptrs_c ptrs)
{
@@ -71,7 +120,7 @@ static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c,
}
static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
- struct bch_io_opts *opts,
+ struct bch_inode_opts *opts,
struct bkey_ptrs_c ptrs)
{
if (!opts->background_target ||
@@ -92,7 +141,7 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
}
static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
- struct bch_io_opts *opts,
+ struct bch_inode_opts *opts,
struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -145,7 +194,7 @@ incompressible:
return sectors;
}
-static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts,
+static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_inode_opts *opts,
struct bkey_s_c k)
{
if (!bkey_extent_is_direct_data(k.k))
@@ -161,8 +210,10 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opt
}
}
-int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts,
- struct bkey_i *_k)
+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts,
+ struct bkey_i *_k,
+ enum set_needs_rebalance_ctx ctx,
+ u32 change_cookie)
{
if (!bkey_extent_is_direct_data(&_k->k))
return 0;
@@ -186,10 +237,11 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts,
return 0;
}
-int bch2_get_update_rebalance_opts(struct btree_trans *trans,
- struct bch_io_opts *io_opts,
- struct btree_iter *iter,
- struct bkey_s_c k)
+static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
+ struct bch_inode_opts *io_opts,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ enum set_needs_rebalance_ctx ctx)
{
BUG_ON(iter->flags & BTREE_ITER_is_extents);
BUG_ON(iter->flags & BTREE_ITER_filter_snapshots);
@@ -218,10 +270,121 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans,
/* On successfull transaction commit, @k was invalidated: */
- return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?:
+ return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n, ctx, 0) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
- bch_err_throw(trans->c, transaction_restart_nested);
+ bch_err_throw(trans->c, transaction_restart_commit);
+}
+
+static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ if (btree_iter_path(trans, extent_iter)->level)
+ return &io_opts->fs_io_opts;
+
+ if (extent_k.k->type == KEY_TYPE_reflink_v)
+ return &io_opts->fs_io_opts;
+
+ if (io_opts->cur_inum != extent_pos.inode) {
+ io_opts->d.nr = 0;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
+ BTREE_ITER_all_snapshots, k, ({
+ if (k.k->p.offset != extent_pos.inode)
+ break;
+
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ struct bch_inode_unpacked inode;
+ _ret3 = bch2_inode_unpack(k, &inode);
+ if (_ret3)
+ break;
+
+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+ bch2_inode_opts_get_inode(c, &inode, &e.io_opts);
+
+ darray_push(&io_opts->d, e);
+ }));
+ io_opts->cur_inum = extent_pos.inode;
+ }
+
+ ret = ret ?: trans_was_restarted(trans, restart_count);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (extent_k.k->p.snapshot)
+ darray_for_each(io_opts->d, i)
+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+ return &i->io_opts;
+
+ return &io_opts->fs_io_opts;
+}
+
+struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *snapshot_io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ enum set_needs_rebalance_ctx ctx)
+{
+ struct bch_inode_opts *opts =
+ bch2_extent_get_io_opts(trans, snapshot_io_opts, extent_pos, extent_iter, extent_k);
+ if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level)
+ return opts;
+
+ int ret = bch2_get_update_rebalance_opts(trans, opts, extent_iter, extent_k, ctx);
+ return ret ? ERR_PTR(ret) : opts;
+}
+
+int bch2_extent_get_io_opts_one(struct btree_trans *trans,
+ struct bch_inode_opts *io_opts,
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ enum set_needs_rebalance_ctx ctx)
+{
+ struct bch_fs *c = trans->c;
+
+ bch2_inode_opts_get(c, io_opts);
+
+ /* reflink btree? */
+ if (extent_k.k->p.inode) {
+ CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes,
+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+ BTREE_ITER_cached);
+ struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter);
+ int ret = bkey_err(inode_k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+
+ if (!ret && bkey_is_inode(inode_k.k)) {
+ struct bch_inode_unpacked inode;
+ bch2_inode_unpack(inode_k, &inode);
+ bch2_inode_opts_get_inode(c, &inode, io_opts);
+ }
+ }
+
+ return 0;
+}
+
+int bch2_extent_get_apply_io_opts_one(struct btree_trans *trans,
+ struct bch_inode_opts *io_opts,
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ enum set_needs_rebalance_ctx ctx)
+{
+ int ret = bch2_extent_get_io_opts_one(trans, io_opts, extent_iter, extent_k, ctx);
+ if (ret || btree_iter_path(trans, extent_iter)->level)
+ return ret;
+
+ return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, ctx);
}
#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
@@ -354,9 +517,10 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
}
static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
+ struct per_snapshot_io_opts *snapshot_io_opts,
struct bpos work_pos,
struct btree_iter *extent_iter,
- struct bch_io_opts *io_opts,
+ struct bch_inode_opts **opts_ret,
struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
@@ -370,13 +534,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
if (bkey_err(k))
return k;
- int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k);
+ struct bch_inode_opts *opts =
+ bch2_extent_get_apply_io_opts(trans, snapshot_io_opts,
+ extent_iter->pos, extent_iter, k,
+ SET_NEEDS_REBALANCE_other);
+ int ret = PTR_ERR_OR_ZERO(opts);
if (ret)
return bkey_s_c_err(ret);
+ *opts_ret = opts;
+
memset(data_opts, 0, sizeof(*data_opts));
- data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k);
- data_opts->target = io_opts->background_target;
+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, opts, k);
+ data_opts->target = opts->background_target;
data_opts->write_flags |= BCH_WRITE_only_specified_devs;
if (!data_opts->rewrite_ptrs) {
@@ -401,19 +571,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs);
+ unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs);
if (p) {
prt_str(&buf, "compression=");
- bch2_compression_opt_to_text(&buf, io_opts->background_compression);
+ bch2_compression_opt_to_text(&buf, opts->background_compression);
prt_str(&buf, " ");
bch2_prt_u64_base2(&buf, p);
prt_newline(&buf);
}
- p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs);
+ p = bch2_bkey_ptrs_need_move(c, opts, ptrs);
if (p) {
prt_str(&buf, "move=");
- bch2_target_to_text(&buf, c, io_opts->background_target);
+ bch2_target_to_text(&buf, c, opts->background_target);
prt_str(&buf, " ");
bch2_prt_u64_base2(&buf, p);
prt_newline(&buf);
@@ -428,6 +598,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
noinline_for_stack
static int do_rebalance_extent(struct moving_context *ctxt,
+ struct per_snapshot_io_opts *snapshot_io_opts,
struct bpos work_pos,
struct btree_iter *extent_iter)
{
@@ -435,7 +606,7 @@ static int do_rebalance_extent(struct moving_context *ctxt,
struct bch_fs *c = trans->c;
struct bch_fs_rebalance *r = &trans->c->rebalance;
struct data_update_opts data_opts;
- struct bch_io_opts io_opts;
+ struct bch_inode_opts *io_opts;
struct bkey_s_c k;
struct bkey_buf sk;
int ret;
@@ -446,8 +617,8 @@ static int do_rebalance_extent(struct moving_context *ctxt,
bch2_bkey_buf_init(&sk);
ret = lockrestart_do(trans,
- bkey_err(k = next_rebalance_extent(trans, work_pos,
- extent_iter, &io_opts, &data_opts)));
+ bkey_err(k = next_rebalance_extent(trans, snapshot_io_opts,
+ work_pos, extent_iter, &io_opts, &data_opts)));
if (ret || !k.k)
goto out;
@@ -460,7 +631,7 @@ static int do_rebalance_extent(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts);
+ ret = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
if (ret) {
if (bch2_err_matches(ret, ENOMEM)) {
/* memory allocation failure, wait for some IO to finish */
@@ -479,7 +650,31 @@ out:
return ret;
}
+static int do_rebalance_scan_indirect(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ struct bch_inode_opts *opts)
+{
+ u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad);
+ u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad);
+ u32 restart_count = trans->restart_count;
+
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink,
+ POS(0, idx), BTREE_ITER_not_extents, k, ({
+ if (bpos_ge(bkey_start_pos(k.k), POS(0, end)))
+ break;
+ bch2_get_update_rebalance_opts(trans, opts, &iter, k,
+ SET_NEEDS_REBALANCE_opt_change_indirect);
+ }));
+ if (ret)
+ return ret;
+
+ /* suppress trans_was_restarted() check */
+ trans->restart_count = restart_count;
+ return 0;
+}
+
static int do_rebalance_scan(struct moving_context *ctxt,
+ struct per_snapshot_io_opts *snapshot_io_opts,
u64 inum, u64 cookie, u64 *sectors_scanned)
{
struct btree_trans *trans = ctxt->trans;
@@ -499,32 +694,33 @@ static int do_rebalance_scan(struct moving_context *ctxt,
r->state = BCH_REBALANCE_scanning;
- struct per_snapshot_io_opts snapshot_io_opts;
- per_snapshot_io_opts_init(&snapshot_io_opts, c);
-
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
r->scan_start.pos, r->scan_end.pos,
BTREE_ITER_all_snapshots|
BTREE_ITER_prefetch, k, ({
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
- struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans,
- &snapshot_io_opts, iter.pos, &iter, k);
- PTR_ERR_OR_ZERO(io_opts);
+ struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans,
+ snapshot_io_opts, iter.pos, &iter, k,
+ SET_NEEDS_REBALANCE_opt_change);
+ PTR_ERR_OR_ZERO(opts) ?:
+ (inum &&
+ k.k->type == KEY_TYPE_reflink_p &&
+ REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)
+ ? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts)
+ : 0);
})) ?:
commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_clear_rebalance_needs_scan(trans, inum, cookie));
- per_snapshot_io_opts_exit(&snapshot_io_opts);
*sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen);
- bch2_move_stats_exit(&r->scan_stats, c);
-
/*
* Ensure that the rebalance_work entries we created are seen by the
* next iteration of do_rebalance(), so we don't end up stuck in
* rebalance_wait():
*/
*sectors_scanned += 1;
+ bch2_move_stats_exit(&r->scan_stats, c);
bch2_btree_write_buffer_flush_sync(trans);
@@ -576,6 +772,9 @@ static int do_rebalance(struct moving_context *ctxt)
bch2_move_stats_init(&r->work_stats, "rebalance_work");
+ struct per_snapshot_io_opts snapshot_io_opts;
+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
+
while (!bch2_move_ratelimit(ctxt)) {
if (!bch2_rebalance_enabled(c)) {
bch2_moving_ctxt_flush_all(ctxt);
@@ -590,15 +789,18 @@ static int do_rebalance(struct moving_context *ctxt)
break;
ret = k->k.type == KEY_TYPE_cookie
- ? do_rebalance_scan(ctxt, k->k.p.inode,
+ ? do_rebalance_scan(ctxt, &snapshot_io_opts,
+ k->k.p.inode,
le64_to_cpu(bkey_i_to_cookie(k)->v.cookie),
&sectors_scanned)
- : do_rebalance_extent(ctxt, k->k.p, &extent_iter);
+ : do_rebalance_extent(ctxt, &snapshot_io_opts,
+ k->k.p, &extent_iter);
if (ret)
break;
}
bch2_trans_iter_exit(&extent_iter);
+ per_snapshot_io_opts_exit(&snapshot_io_opts);
bch2_move_stats_exit(&r->work_stats, c);
if (!ret &&
@@ -661,7 +863,7 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
prt_str(out, bch2_rebalance_state_strs[r->state]);
prt_newline(out);
- printbuf_indent_add(out, 2);
+ guard(printbuf_indent)(out);
switch (r->state) {
case BCH_REBALANCE_waiting: {
@@ -700,8 +902,6 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL);
put_task_struct(t);
}
-
- printbuf_indent_sub(out, 2);
}
void bch2_rebalance_stop(struct bch_fs *c)
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index 7a565ea7dbfc..bff91aa0102e 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -8,7 +8,7 @@
#include "rebalance_types.h"
static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c,
- struct bch_io_opts *opts)
+ struct bch_inode_opts *opts)
{
struct bch_extent_rebalance r = {
.type = BIT(BCH_EXTENT_ENTRY_rebalance),
@@ -26,12 +26,55 @@ static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_f
return r;
};
+void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *,
+ const struct bch_extent_rebalance *);
+
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
-int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *);
-int bch2_get_update_rebalance_opts(struct btree_trans *,
- struct bch_io_opts *,
- struct btree_iter *,
- struct bkey_s_c);
+
+enum set_needs_rebalance_ctx {
+ SET_NEEDS_REBALANCE_opt_change,
+ SET_NEEDS_REBALANCE_opt_change_indirect,
+ SET_NEEDS_REBALANCE_foreground,
+ SET_NEEDS_REBALANCE_other,
+};
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *,
+ struct bkey_i *, enum set_needs_rebalance_ctx, u32);
+
+/* Inodes in different snapshots may have different IO options: */
+struct snapshot_io_opts_entry {
+ u32 snapshot;
+ struct bch_inode_opts io_opts;
+};
+
+struct per_snapshot_io_opts {
+ u64 cur_inum;
+ struct bch_inode_opts fs_io_opts;
+ DARRAY(struct snapshot_io_opts_entry) d;
+};
+
+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
+{
+ memset(io_opts, 0, sizeof(*io_opts));
+ bch2_inode_opts_get(c, &io_opts->fs_io_opts);
+}
+
+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
+{
+ darray_exit(&io_opts->d);
+}
+
+struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *,
+ struct per_snapshot_io_opts *, struct bpos,
+ struct btree_iter *, struct bkey_s_c,
+ enum set_needs_rebalance_ctx);
+
+int bch2_extent_get_io_opts_one(struct btree_trans *, struct bch_inode_opts *,
+ struct btree_iter *, struct bkey_s_c,
+ enum set_needs_rebalance_ctx);
+int bch2_extent_get_apply_io_opts_one(struct btree_trans *, struct bch_inode_opts *,
+ struct btree_iter *, struct bkey_s_c,
+ enum set_needs_rebalance_ctx);
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64);
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 6319144a440c..531c2ef128ae 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -15,6 +15,7 @@
#include "error.h"
#include "journal_io.h"
#include "journal_reclaim.h"
+#include "journal_sb.h"
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
@@ -67,9 +68,12 @@ int bch2_btree_lost_data(struct bch_fs *c,
#endif
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent);
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent);
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent);
+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent);
switch (btree) {
case BTREE_ID_alloc:
@@ -644,6 +648,10 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq));
+
+ ret = bch2_sb_journal_sort(c);
+ if (ret)
+ goto err;
} else {
bch_info(c, "recovering from unclean shutdown");
}
@@ -829,33 +837,39 @@ use_clean:
bch2_async_btree_node_rewrites_flush(c);
/* fsync if we fixed errors */
- if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ bool errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags) ||
+ test_bit(BCH_FS_errors_fixed_silent, &c->flags);
+
+ if (errors_fixed) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_meta(&c->journal);
}
/* If we fixed errors, verify that fs is actually clean now: */
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
- test_bit(BCH_FS_errors_fixed, &c->flags) &&
+ errors_fixed &&
!test_bit(BCH_FS_errors_not_fixed, &c->flags) &&
!test_bit(BCH_FS_error, &c->flags)) {
bch2_flush_fsck_errs(c);
bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
+ errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags);
clear_bit(BCH_FS_errors_fixed, &c->flags);
+ clear_bit(BCH_FS_errors_fixed_silent, &c->flags);
ret = bch2_run_recovery_passes(c,
BCH_RECOVERY_PASS_check_alloc_info);
if (ret)
goto err;
- if (test_bit(BCH_FS_errors_fixed, &c->flags) ||
+ if (errors_fixed ||
test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
bch_err(c, "Second fsck run was not clean");
set_bit(BCH_FS_errors_not_fixed, &c->flags);
}
- set_bit(BCH_FS_errors_fixed, &c->flags);
+ if (errors_fixed)
+ set_bit(BCH_FS_errors_fixed, &c->flags);
}
if (enabled_qtypes(c)) {
diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h
index 2696eee00345..d5654de64e4c 100644
--- a/fs/bcachefs/recovery_passes_format.h
+++ b/fs/bcachefs/recovery_passes_format.h
@@ -29,6 +29,7 @@
x(stripes_read, 1, 0) \
x(initialize_subvolumes, 2, 0) \
x(snapshots_read, 3, PASS_ALWAYS) \
+ x(delete_dead_interior_snapshots, 44, 0) \
x(check_allocations, 5, PASS_FSCK_ALLOC) \
x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \
x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 238a362de19e..d54468fdcb18 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -589,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bpos dst_start = POS(dst_inum.inum, dst_offset);
struct bpos src_start = POS(src_inum.inum, src_offset);
struct bpos dst_end = dst_start, src_end = src_start;
- struct bch_io_opts opts;
struct bpos src_want;
u64 dst_done = 0;
u32 dst_snapshot, src_snapshot;
@@ -609,10 +608,6 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_init(&new_src);
CLASS(btree_trans, trans)(c);
- ret = bch2_inum_opts_get(trans, src_inum, &opts);
- if (ret)
- goto err;
-
bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
BTREE_ITER_intent);
bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
@@ -709,11 +704,10 @@ s64 bch2_remap_range(struct bch_fs *c,
min(src_k.k->p.offset - src_want.offset,
dst_end.offset - dst_iter.pos.offset));
- ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?:
- bch2_extent_update(trans, dst_inum, &dst_iter,
- new_dst.k, &disk_res,
- new_i_size, i_sectors_delta,
- true);
+ ret = bch2_extent_update(trans, dst_inum, &dst_iter,
+ new_dst.k, &disk_res,
+ new_i_size, i_sectors_delta,
+ true, 0);
bch2_disk_reservation_put(c, &disk_res);
}
bch2_trans_iter_exit(&dst_iter);
@@ -744,7 +738,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_trans_iter_exit(&inode_iter);
} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
-err:
+
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index 41a259eab4fb..b356e80135fd 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -54,23 +54,41 @@ static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f,
return 0;
}
+static int error_entry_cmp(const void *_l, const void *_r)
+{
+ const struct bch_sb_field_error_entry *l = _l;
+ const struct bch_sb_field_error_entry *r = _r;
+
+ return -cmp_int(l->last_error_time, r->last_error_time);
+}
+
static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_errors *e = field_to_type(f, errors);
- unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
+ unsigned nr = bch2_sb_field_errors_nr_entries(e);
+
+ struct bch_sb_field_error_entry *sorted = kvmalloc_array(nr, sizeof(*sorted), GFP_KERNEL);
+
+ if (sorted)
+ sort(sorted, nr, sizeof(*sorted), error_entry_cmp, NULL);
+ else
+ sorted = e->entries;
if (out->nr_tabstops <= 1)
printbuf_tabstop_push(out, 16);
- for (i = 0; i < nr; i++) {
- bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
+ for (struct bch_sb_field_error_entry *i = sorted; i < sorted + nr; i++) {
+ bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i));
prt_tab(out);
- prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
+ prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i));
prt_tab(out);
- bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time));
+ bch2_prt_datetime(out, le64_to_cpu(i->last_error_time));
prt_newline(out);
}
+
+ if (sorted != e->entries)
+ kvfree(sorted);
}
const struct bch_sb_field_ops bch_sb_field_ops_errors = {
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index d26a0ca4a59d..963f8c2690c9 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -36,12 +36,10 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
{
- if (dev != BCH_SB_MEMBER_INVALID) {
+ if (dev != BCH_SB_MEMBER_INVALID)
bch2_fs_inconsistent(c, "pointer to %s device %u",
test_bit(dev, c->devs_removed.d)
? "removed" : "nonexistent", dev);
- dump_stack();
- }
}
void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
@@ -287,10 +285,9 @@ static void member_to_text(struct printbuf *out,
return;
prt_printf(out, "Device:\t%u\n", idx);
+ guard(printbuf_indent)(out);
- printbuf_indent_add(out, 2);
bch2_member_to_text(out, &m, gi, sb, idx);
- printbuf_indent_sub(out, 2);
}
static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f,
@@ -437,21 +434,19 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
prt_str(out, "IO errors since filesystem creation");
prt_newline(out);
- printbuf_indent_add(out, 2);
- for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
- prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
+ prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
prt_str(out, "IO errors since ");
bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
prt_str(out, " ago");
prt_newline(out);
- printbuf_indent_add(out, 2);
- for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
- prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
- atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
+ prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
+ atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
}
void bch2_dev_errors_reset(struct bch_dev *ca)
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index eab0c1e3ff56..00546b59dca6 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -309,7 +309,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
- t->state = !BCH_SNAPSHOT_DELETED(s.v)
+ t->state = !BCH_SNAPSHOT_DELETED(s.v) && !BCH_SNAPSHOT_NO_KEYS(s.v)
? SNAPSHOT_ID_live
: SNAPSHOT_ID_deleted;
t->parent = le32_to_cpu(s.v->parent);
@@ -1101,6 +1101,20 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
return 0;
}
+static int bch2_snapshot_node_set_no_keys(struct btree_trans *trans, u32 id)
+{
+ struct bkey_i_snapshot *s =
+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot);
+ int ret = PTR_ERR_OR_ZERO(s);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", id);
+ if (unlikely(ret))
+ return ret;
+
+ SET_BCH_SNAPSHOT_NO_KEYS(&s->v, true);
+ s->v.subvol = 0;
+ return 0;
+}
+
static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
{
if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
@@ -1783,22 +1797,9 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c)
if (ret)
goto err;
}
-
- /*
- * Fixing children of deleted snapshots can't be done completely
- * atomically, if we crash between here and when we delete the interior
- * nodes some depth fields will be off:
- */
- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
- BTREE_ITER_intent, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
- if (ret)
- goto err;
-
darray_for_each(d->delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
- bch2_snapshot_node_delete(trans, i->id));
+ bch2_snapshot_node_set_no_keys(trans, i->id));
if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "deleting snapshot %u", i->id);
if (ret)
@@ -1887,6 +1888,66 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return ret;
}
+static int bch2_get_dead_interior_snapshots(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+
+ if (k.k->type == KEY_TYPE_snapshot &&
+ BCH_SNAPSHOT_NO_KEYS(bkey_s_c_to_snapshot(k).v)) {
+ struct snapshot_interior_delete n = {
+ .id = k.k->p.offset,
+ .live_child = live_child(c, k.k->p.offset),
+ };
+
+ if (!n.live_child) {
+ bch_err(c, "error finding live child of snapshot %u", n.id);
+ return -EINVAL;
+ }
+
+ return darray_push(&c->snapshot_delete.delete_interior, n);
+ }
+
+ return 0;
+}
+
+int bch2_delete_dead_interior_snapshots(struct bch_fs *c)
+{
+ CLASS(btree_trans, trans)(c);
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MAX, 0, k,
+ bch2_get_dead_interior_snapshots(trans, k));
+ if (ret)
+ goto err;
+
+ struct snapshot_delete *d = &c->snapshot_delete;
+ if (d->delete_interior.nr) {
+ /*
+ * Fixing children of deleted snapshots can't be done completely
+ * atomically, if we crash between here and when we delete the interior
+ * nodes some depth fields will be off:
+ */
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
+ BTREE_ITER_intent, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
+ if (ret)
+ goto err;
+
+ darray_for_each(d->delete_interior, i) {
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, i->id));
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting snapshot %u", i->id);
+ if (ret)
+ goto err;
+ }
+
+ darray_exit(&d->delete_interior);
+ }
+err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
{
/* If there's one child, it's redundant and keys will be moved to the child */
@@ -1895,13 +1956,18 @@ static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
{
+ struct bch_fs *c = trans->c;
+
if (k.k->type != KEY_TYPE_snapshot)
return 0;
- struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
- if (BCH_SNAPSHOT_WILL_DELETE(snap.v) ||
- interior_snapshot_needs_delete(snap))
- set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
+ struct bkey_s_c_snapshot s= bkey_s_c_to_snapshot(k);
+
+ if (BCH_SNAPSHOT_NO_KEYS(s.v))
+ c->recovery.passes_to_run |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_interior_snapshots);
+ if (BCH_SNAPSHOT_WILL_DELETE(s.v) ||
+ interior_snapshot_needs_delete(s))
+ set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
return 0;
}
@@ -1909,6 +1975,15 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
int bch2_snapshots_read(struct bch_fs *c)
{
/*
+ * It's important that we check if we need to reconstruct snapshots
+ * before going RW, so we mark that pass as required in the superblock -
+ * otherwise, we could end up deleting keys with missing snapshot nodes
+ * instead
+ */
+ BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+ test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ /*
* Initializing the is_ancestor bitmaps requires ancestors to already be
* initialized - so mark in reverse:
*/
@@ -1919,15 +1994,6 @@ int bch2_snapshots_read(struct bch_fs *c)
bch2_check_snapshot_needs_deletion(trans, k));
bch_err_fn(c, ret);
- /*
- * It's important that we check if we need to reconstruct snapshots
- * before going RW, so we mark that pass as required in the superblock -
- * otherwise, we could end up deleting keys with missing snapshot nodes
- * instead
- */
- BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
- test_bit(BCH_FS_may_go_rw, &c->flags));
-
return ret;
}
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 28d9a29a1fd0..65d43a7ab877 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -291,6 +291,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *);
+int bch2_delete_dead_interior_snapshots(struct bch_fs *);
int bch2_snapshots_read(struct bch_fs *);
void bch2_fs_snapshots_exit(struct bch_fs *);
void bch2_fs_snapshots_init_early(struct bch_fs *);
diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h
index 9bccae1f3590..444885106140 100644
--- a/fs/bcachefs/snapshot_format.h
+++ b/fs/bcachefs/snapshot_format.h
@@ -15,10 +15,35 @@ struct bch_snapshot {
bch_le128 btime;
};
+/*
+ * WILL_DELETE: leaf node that's no longer referenced by a subvolume, still has
+ * keys, will be deleted by delete_dead_snapshots
+ *
+ * SUBVOL: true if a subvol points to this snapshot (why do we have this?
+ * subvols are nonzero)
+ *
+ * DELETED: we never delete snapshot keys, we mark them as deleted so that we
+ * can distinguish between a key for a missing snapshot (and we have no idea
+ * what happened) and a key for a deleted snapshot (delete_dead_snapshots() missed
+ * something, key should be deleted)
+ *
+ * NO_KEYS: we don't remove interior snapshot nodes from snapshot trees at
+ * runtime, since we can't do the adjustements for the depth/skiplist field
+ * atomically - and that breaks e.g. is_ancestor(). Instead, we mark it to be
+ * deleted at the next remount; this tells us that we don't need to run the full
+ * delete_dead_snapshots().
+ *
+ *
+ * XXX - todo item:
+ *
+ * We should guard against a bitflip causing us to delete a snapshot incorrectly
+ * by cross checking with the subvolume btree: delete_dead_snapshots() can take
+ * out more data than any other codepath if it runs incorrectly
+ */
LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1)
-/* True if a subvolume points to this snapshot node: */
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3)
+LE32_BITMASK(BCH_SNAPSHOT_NO_KEYS, struct bch_snapshot, flags, 3, 4)
/*
* Snapshot trees:
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 61eeac671283..98d31a1f9630 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1516,8 +1516,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
prt_newline(out);
prt_printf(out, "Options:");
prt_newline(out);
- printbuf_indent_add(out, 2);
- {
+ scoped_guard(printbuf_indent, out) {
enum bch_opt_id id;
for (id = 0; id < bch2_opts_nr; id++) {
@@ -1534,15 +1533,12 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
}
}
- printbuf_indent_sub(out, 2);
-
if (print_layout) {
prt_newline(out);
prt_printf(out, "layout:");
prt_newline(out);
- printbuf_indent_add(out, 2);
- bch2_sb_layout_to_text(out, &sb->layout);
- printbuf_indent_sub(out, 2);
+ scoped_guard(printbuf_indent, out)
+ bch2_sb_layout_to_text(out, &sb->layout);
}
vstruct_for_each(sb, f)
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index e908fc77b671..ed504ce75169 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -277,6 +277,17 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
return c;
}
+void bch2_devs_list_to_text(struct printbuf *out, struct bch_devs_list *d)
+{
+ prt_char(out, '[');
+ darray_for_each(*d, i) {
+ if (i != d->data)
+ prt_char(out, ' ');
+ prt_printf(out, "%u", *i);
+ }
+ prt_char(out, ']');
+}
+
/* Filesystem RO/RW: */
/*
@@ -461,9 +472,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou
bch2_fs_read_only_async(c);
wake_up(&bch2_read_only_wait);
- if (ret)
+ if (ret) {
prt_printf(out, "emergency read only at seq %llu\n",
journal_cur_seq(&c->journal));
+ bch2_prt_task_backtrace(out, current, 2, out->atomic ? GFP_ATOMIC : GFP_KERNEL);
+ }
return ret;
}
@@ -1273,7 +1286,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
if (ret)
goto err;
- if (go_rw_in_recovery(c)) {
+ /*
+ * just make sure this is always allocated if we might need it - mount
+ * failing due to kthread_create() failing is _very_ annoying
+ */
+ if (!(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) ||
+ go_rw_in_recovery(c)) {
/*
* start workqueues/kworkers early - kthread creation checks for
* pending signals, which is _very_ annoying
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index d13dbf2b8227..351dc5911645 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -16,6 +16,8 @@ extern const char * const bch2_dev_write_refs[];
struct bch_fs *bch2_dev_to_fs(dev_t);
struct bch_fs *bch2_uuid_to_fs(__uuid_t);
+void bch2_devs_list_to_text(struct printbuf *, struct bch_devs_list *);
+
bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
enum bch_member_state, int,
struct printbuf *);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 6b071dcc062b..4c6e6c46d18a 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -784,7 +784,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
u64 v;
ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?:
- bch2_opt_hook_pre_set(c, ca, id, v);
+ bch2_opt_hook_pre_set(c, ca, 0, id, v);
kfree(tmp);
if (ret < 0)
@@ -807,7 +807,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
bch2_opt_set_by_id(&c->opts, id, v);
if (changed)
- bch2_opt_hook_post_set(c, ca, 0, &c->opts, id);
+ bch2_opt_hook_post_set(c, ca, 0, id, v);
ret = size;
err:
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 2ded7f3c835f..2a9462275f92 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -415,45 +415,41 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
printbuf_tabstop_push(out, TABSTOP_SIZE);
prt_printf(out, "duration of events\n");
- printbuf_indent_add(out, 2);
-
- pr_name_and_units(out, "min:", stats->min_duration);
- pr_name_and_units(out, "max:", stats->max_duration);
- pr_name_and_units(out, "total:", stats->total_duration);
-
- prt_printf(out, "mean:\t");
- bch2_pr_time_units_aligned(out, d_mean);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
- prt_newline(out);
-
- prt_printf(out, "stddev:\t");
- bch2_pr_time_units_aligned(out, d_stddev);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ scoped_guard(printbuf_indent, out) {
+ pr_name_and_units(out, "min:", stats->min_duration);
+ pr_name_and_units(out, "max:", stats->max_duration);
+ pr_name_and_units(out, "total:", stats->total_duration);
+
+ prt_printf(out, "mean:\t");
+ bch2_pr_time_units_aligned(out, d_mean);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
- printbuf_indent_sub(out, 2);
- prt_newline(out);
+ prt_printf(out, "stddev:\t");
+ bch2_pr_time_units_aligned(out, d_stddev);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
+ }
prt_printf(out, "time between events\n");
- printbuf_indent_add(out, 2);
-
- pr_name_and_units(out, "min:", stats->min_freq);
- pr_name_and_units(out, "max:", stats->max_freq);
-
- prt_printf(out, "mean:\t");
- bch2_pr_time_units_aligned(out, f_mean);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
- prt_newline(out);
-
- prt_printf(out, "stddev:\t");
- bch2_pr_time_units_aligned(out, f_stddev);
- prt_tab(out);
- bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ scoped_guard(printbuf_indent, out) {
+ pr_name_and_units(out, "min:", stats->min_freq);
+ pr_name_and_units(out, "max:", stats->max_freq);
+
+ prt_printf(out, "mean:\t");
+ bch2_pr_time_units_aligned(out, f_mean);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
- printbuf_indent_sub(out, 2);
- prt_newline(out);
+ prt_printf(out, "stddev:\t");
+ bch2_pr_time_units_aligned(out, f_stddev);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
+ prt_newline(out);
+ }
printbuf_tabstops_reset(out);
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 6d7303008b19..784e75a21132 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -535,10 +535,9 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
return -EINVAL;
s.id = inode_opt_id;
+ u64 v = 0;
if (value) {
- u64 v = 0;
-
buf = kmalloc(size + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -551,7 +550,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
if (ret < 0)
goto err;
- ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v);
+ ret = bch2_opt_hook_pre_set(c, NULL, inode->ei_inode.bi_inum, opt_id, v);
if (ret < 0)
goto err;
@@ -591,6 +590,8 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
}
+
+ bch2_opt_hook_post_set(c, NULL, inode->ei_inode.bi_inum, opt_id, v);
err:
return bch2_err_class(ret);
}