diff options
Diffstat (limited to 'fs/bcachefs')
65 files changed, 1211 insertions, 872 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3fc728efbf5c..cab4d6798dd7 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -344,7 +344,7 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs * struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL; prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); bch2_prt_data_type(out, a->data_type); @@ -367,7 +367,6 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs * if (ca) prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); - printbuf_indent_sub(out, 2); bch2_dev_put(ca); } @@ -2385,8 +2384,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) * We clear the LRU and need_discard btrees first so that we don't race * with bch2_do_invalidates() and bch2_do_discards() */ - ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end, - BTREE_TRIGGER_norun, NULL) ?: + ret = bch2_dev_remove_lrus(c, ca) ?: bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, BTREE_TRIGGER_norun, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, @@ -2397,7 +2395,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) BTREE_TRIGGER_norun, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, BTREE_TRIGGER_norun, NULL) ?: - bch2_dev_usage_remove(c, ca->dev_idx); + bch2_dev_usage_remove(c, ca); bch_err_msg(ca, ret, "removing dev alloc info"); return ret; } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index f6ea4a8272d0..3d125ee81663 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1491,10 +1491,9 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, prt_newline(out); - printbuf_indent_add(out, 2); - open_bucket_for_each(c, &wp->ptrs, ob, i) - bch2_open_bucket_to_text(out, c, ob); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + open_bucket_for_each(c, &wp->ptrs, ob, i) + bch2_open_bucket_to_text(out, c, ob); } void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) @@ -1586,9 +1585,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); - printbuf_indent_add(&buf, 2); - bch2_fs_alloc_debug_to_text(&buf, c); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_fs_alloc_debug_to_text(&buf, c); prt_newline(&buf); bch2_printbuf_make_room(&buf, 4096); @@ -1597,23 +1595,20 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) guard(printbuf_atomic)(&buf); for_each_online_member_rcu(c, ca) { prt_printf(&buf, "Dev %u:\n", ca->dev_idx); - printbuf_indent_add(&buf, 2); - bch2_dev_alloc_debug_to_text(&buf, ca); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_dev_alloc_debug_to_text(&buf, ca); prt_newline(&buf); } } prt_printf(&buf, "Copygc debug:\n"); - printbuf_indent_add(&buf, 2); - bch2_copygc_wait_to_text(&buf, c); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_copygc_wait_to_text(&buf, c); prt_newline(&buf); prt_printf(&buf, "Journal debug:\n"); - printbuf_indent_add(&buf, 2); - bch2_journal_debug_to_text(&buf, &c->journal); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_journal_debug_to_text(&buf, &c->journal); bch2_print_str(c, KERN_ERR, buf.buf); } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index cb25cddb759b..c662eeba66ab 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -117,7 +117,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, prt_printf(&buf, "existing backpointer found when inserting "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); prt_newline(&buf); - printbuf_indent_add(&buf, 2); + guard(printbuf_indent)(&buf); prt_printf(&buf, "found "); bch2_bkey_val_to_text(&buf, c, found_bp); @@ -127,7 +127,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, orig_k); } else if (!will_check) { prt_printf(&buf, "backpointer not found when deleting\n"); - printbuf_indent_add(&buf, 2); + guard(printbuf_indent)(&buf); prt_printf(&buf, "searching for "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); @@ -278,9 +278,20 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, bp.v->level - 1, 0); struct btree *b = bch2_btree_iter_peek_node(iter); - if (IS_ERR_OR_NULL(b)) + if (IS_ERR(b)) goto err; + if (!b) { + /* Backpointer for nonexistent tree depth: */ + bkey_init(&iter->k); + iter->k.p = bp.v->pos; + struct bkey_s_c k = { &iter->k }; + + int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); + b = ret ? ERR_PTR(ret) : NULL; + goto err; + } + BUG_ON(b->c.level != bp.v->level - 1); if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, @@ -809,7 +820,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, for (enum btree_id btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; + int level, depth = btree_type_has_data_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, @@ -862,17 +873,25 @@ static int data_type_to_alloc_counter(enum bch_data_type t) } } -static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); +static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos, + struct bkey_buf *last_flushed); static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, bool *had_mismatch, - struct bkey_buf *last_flushed) + struct bkey_buf *last_flushed, + struct bpos *last_pos, + unsigned *nr_iters) { struct bch_fs *c = trans->c; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); bool need_commit = false; + if (!bpos_eq(*last_pos, alloc_k.k->p)) + *nr_iters = 0; + + *last_pos = alloc_k.k->p; + *had_mismatch = false; if (a->data_type == BCH_DATA_sb || @@ -926,6 +945,46 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b return ret; } + if (sectors[ALLOC_dirty] > a->dirty_sectors || + sectors[ALLOC_cached] > a->cached_sectors || + sectors[ALLOC_stripe] > a->stripe_sectors) { + if (*nr_iters) { + CLASS(printbuf, buf)(); + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n" + "bucket %llu:%llu data type %s, counters\n", + alloc_k.k->p.inode, + alloc_k.k->p.offset, + __bch2_data_types[a->data_type]); + if (sectors[ALLOC_dirty] > a->dirty_sectors) + prt_printf(&buf, "dirty: %u > %u\n", + sectors[ALLOC_dirty], a->dirty_sectors); + if (sectors[ALLOC_cached] > a->cached_sectors) + prt_printf(&buf, "cached: %u > %u\n", + sectors[ALLOC_cached], a->cached_sectors); + if (sectors[ALLOC_stripe] > a->stripe_sectors) + prt_printf(&buf, "stripe: %u > %u\n", + sectors[ALLOC_stripe], a->stripe_sectors); + + for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, + bucket_pos_to_bp_start(ca, alloc_k.k->p), + bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { + bch2_bkey_val_to_text(&buf, c, bp_k); + prt_newline(&buf); + } + + bch2_print_str(c, KERN_ERR, buf.buf); + __WARN(); + return ret; + } + + *nr_iters += 1; + + return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p, last_flushed) ?: + bch_err_throw(c, transaction_restart_nested); + } + if (sectors[ALLOC_dirty] != a->dirty_sectors || sectors[ALLOC_cached] != a->cached_sectors || sectors[ALLOC_stripe] != a->stripe_sectors) { @@ -943,13 +1002,6 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b return ret; } - if (sectors[ALLOC_dirty] > a->dirty_sectors || - sectors[ALLOC_cached] > a->cached_sectors || - sectors[ALLOC_stripe] > a->stripe_sectors) { - return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: - bch_err_throw(c, transaction_restart_nested); - } - bool empty = (sectors[ALLOC_dirty] + sectors[ALLOC_stripe] + sectors[ALLOC_cached]) == 0; @@ -1113,6 +1165,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) CLASS(btree_trans, trans)(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; + struct bpos last_pos = POS_MIN; + unsigned nr_iters = 0; bch2_bkey_buf_init(&s.last_flushed); bkey_init(&s.last_flushed.k->k); @@ -1121,7 +1175,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) POS_MIN, BTREE_ITER_prefetch, k, ({ bool had_mismatch; bch2_fs_going_ro(c) ?: - check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed); + check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed, + &last_pos, &nr_iters); })); if (ret) goto err; @@ -1189,7 +1244,11 @@ static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans, if (ret) return ret; - return check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed); + struct bpos last_pos = POS_MIN; + unsigned nr_iters = 0; + return check_bucket_backpointer_mismatch(trans, k, had_mismatch, + last_flushed, + &last_pos, &nr_iters); } int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, @@ -1253,22 +1312,21 @@ static int check_one_backpointer(struct btree_trans *trans, } static int check_bucket_backpointers_to_extents(struct btree_trans *trans, - struct bch_dev *ca, struct bpos bucket) + struct bch_dev *ca, struct bpos bucket, + struct bkey_buf *last_flushed) { u32 restart_count = trans->restart_count; - struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers, bucket_pos_to_bp_start(ca, bucket), bucket_pos_to_bp_end(ca, bucket), 0, k, - check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed) + check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, last_flushed) ); - bch2_bkey_buf_exit(&last_flushed, trans->c); - return ret ?: trans_was_restarted(trans, restart_count); + return ret ?: + bch2_btree_write_buffer_flush_sync(trans) ?: /* make sure bad backpointers that were deleted are visible */ + trans_was_restarted(trans, restart_count); } static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 16d08dfb5f19..553031a3b06a 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -523,6 +523,7 @@ struct discard_in_flight { x(journal_read) \ x(fs_journal_alloc) \ x(fs_resize_on_mount) \ + x(sb_journal_sort) \ x(btree_node_read) \ x(btree_node_read_all_replicas) \ x(btree_node_scrub) \ @@ -674,6 +675,7 @@ struct bch_dev { x(error) \ x(topology_error) \ x(errors_fixed) \ + x(errors_fixed_silent) \ x(errors_not_fixed) \ x(no_invalid_checks) \ x(discard_mount_opt_set) \ @@ -807,6 +809,8 @@ struct bch_fs { struct bch_disk_groups_cpu __rcu *disk_groups; struct bch_opts opts; + atomic_t opt_change_cookie; + unsigned loglevel; unsigned prev_loglevel; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b2de993d802b..0839397105a9 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -654,7 +654,6 @@ struct bch_sb_field_ext { /* * field 1: version name * field 2: BCH_VERSION(major, minor) - * field 3: recovery passess required on upgrade */ #define BCH_METADATA_VERSIONS() \ x(bkey_renumber, BCH_VERSION(0, 10)) \ diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 035b2cb25077..49d0be6405d8 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -166,7 +166,7 @@ void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *); do { \ if (trace_##event##_enabled()) { \ CLASS(printbuf, buf)(); \ - printbuf_indent_add(&buf, 2); \ + guard(printbuf_indent)(&buf); \ bch2_btree_pos_to_text(&buf, c, b); \ trace_##event(c, buf.buf); \ } \ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 43f294284d57..2338feb8d8ed 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -717,16 +717,12 @@ fsck_err: static int bch2_gc_btree(struct btree_trans *trans, struct progress_indicator_state *progress, - enum btree_id btree, bool initial) + enum btree_id btree, unsigned target_depth, + bool initial) { struct bch_fs *c = trans->c; - unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1; int ret = 0; - /* We need to make sure every leaf node is readable before going RW */ - if (initial) - target_depth = 0; - for (unsigned level = target_depth; level < BTREE_MAX_DEPTH; level++) { struct btree *prev = NULL; struct btree_iter iter; @@ -797,7 +793,21 @@ static int bch2_gc_btrees(struct bch_fs *c) if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b)) continue; - ret = bch2_gc_btree(trans, &progress, btree, true); + + unsigned target_depth = BIT_ULL(btree) & btree_leaf_has_triggers_mask ? 0 : 1; + + /* + * In fsck, we need to make sure every leaf node is readable + * before going RW, otherwise we can no longer rewind inside + * btree_lost_data to repair during the current fsck run. + * + * Otherwise, we can delay the repair to the next + * mount or offline fsck. + */ + if (test_bit(BCH_FS_in_fsck, &c->flags)) + target_depth = 0; + + ret = bch2_gc_btree(trans, &progress, btree, target_depth, true); } bch_err_fn(c, ret); @@ -1228,7 +1238,7 @@ int bch2_gc_gens(struct bch_fs *c) } for (unsigned i = 0; i < BTREE_ID_NR; i++) - if (btree_type_has_ptrs(i)) { + if (btree_type_has_data_ptrs(i)) { c->gc_gens_btree = i; c->gc_gens_pos = POS_MIN; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2e3dd9bacac5..52d21259ed6f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -27,10 +27,15 @@ #include <linux/moduleparam.h> #include <linux/sched/mm.h> +static __maybe_unused unsigned bch2_btree_read_corrupt_ratio; +static __maybe_unused int bch2_btree_read_corrupt_device; + #ifdef CONFIG_BCACHEFS_DEBUG -static unsigned bch2_btree_read_corrupt_ratio; module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644); MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); + +module_param_named(btree_read_corrupt_device, bch2_btree_read_corrupt_device, int, 0644); +MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); #endif static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) @@ -1438,7 +1443,9 @@ start: memset(&bio->bi_iter, 0, sizeof(bio->bi_iter)); bio->bi_iter.bi_size = btree_buf_bytes(b); - bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); + if (bch2_btree_read_corrupt_device == rb->pick.ptr.dev || + bch2_btree_read_corrupt_device < 0) + bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); if (ret != -BCH_ERR_btree_node_read_err_want_retry && @@ -2523,7 +2530,7 @@ do_write: if (trace_btree_node_write_enabled()) { CLASS(printbuf, buf)(); - printbuf_indent_add(&buf, 2); + guard(printbuf_indent)(&buf); prt_printf(&buf, "offset %u sectors %u bytes %u\n", b->written, sectors_to_write, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index d52d577a900e..b72ed543d9c0 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -137,18 +137,8 @@ static void __bch2_btree_path_verify_cached(struct btree_trans *trans, static void __bch2_btree_path_verify_level(struct btree_trans *trans, struct btree_path *path, unsigned level) { - struct btree_path_level *l; - struct btree_node_iter tmp; - bool locked; - struct bkey_packed *p, *k; - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; - struct printbuf buf3 = PRINTBUF; - const char *msg; - - l = &path->l[level]; - tmp = l->iter; - locked = btree_node_locked(path, level); + struct btree_path_level *l = &path->l[level]; + bool locked = btree_node_locked(path, level); if (path->cached) { if (!level) @@ -166,14 +156,17 @@ static void __bch2_btree_path_verify_level(struct btree_trans *trans, bch2_btree_node_iter_verify(&l->iter, l->b); - /* - * For interior nodes, the iterator will have skipped past deleted keys: - */ - p = level + /* For interior nodes, the iterator may have skipped past deleted keys: */ + struct btree_node_iter tmp = l->iter; + const struct bkey_packed *p = level ? bch2_btree_node_iter_prev(&tmp, l->b) : bch2_btree_node_iter_prev_all(&tmp, l->b); - k = bch2_btree_node_iter_peek_all(&l->iter, l->b); + tmp = l->iter; + const struct bkey_packed *k = level + ? bch2_btree_node_iter_peek(&tmp, l->b) + : bch2_btree_node_iter_peek_all(&tmp, l->b); + const char *msg; if (!(level > path->level && trans->journal_replay_not_finished)) { /* * We can't run these checks for interior nodes when we're still @@ -200,29 +193,31 @@ static void __bch2_btree_path_verify_level(struct btree_trans *trans, btree_node_unlock(trans, path, level); return; err: - bch2_bpos_to_text(&buf1, path->pos); + { + CLASS(printbuf, buf)(); + prt_printf(&buf, "path should be %s key at level %u", msg, level); - if (p) { - struct bkey uk = bkey_unpack_key(l->b, p); + prt_str(&buf, "\npath pos "); + bch2_bpos_to_text(&buf, path->pos); - bch2_bkey_to_text(&buf2, &uk); - } else { - prt_printf(&buf2, "(none)"); - } + prt_str(&buf, "\nprev key "); + if (p) { + struct bkey uk = bkey_unpack_key(l->b, p); + bch2_bkey_to_text(&buf, &uk); + } else { + prt_printf(&buf, "(none)"); + } - if (k) { - struct bkey uk = bkey_unpack_key(l->b, k); + prt_str(&buf, "\ncur key "); + if (k) { + struct bkey uk = bkey_unpack_key(l->b, k); + bch2_bkey_to_text(&buf, &uk); + } else { + prt_printf(&buf, "(none)"); + } - bch2_bkey_to_text(&buf3, &uk); - } else { - prt_printf(&buf3, "(none)"); + panic("%s\n", buf.buf); } - - panic("path should be %s key at level %u:\n" - "path pos %s\n" - "prev key %s\n" - "cur key %s\n", - msg, level, buf1.buf, buf2.buf, buf3.buf); } static void __bch2_btree_path_verify(struct btree_trans *trans, @@ -898,28 +893,53 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, btree_node_unlock(trans, path, plevel); } +static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, + struct btree_path *path) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + + prt_str(&buf, "node not found at pos: "); + bch2_bpos_to_text(&buf, path->pos); + prt_str(&buf, "\n within parent node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); + prt_newline(&buf); + + return __bch2_topology_error(c, &buf); +} + +static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans, + struct btree_path *path, + struct bkey_i *k) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + + prt_str(&buf, "node doesn't cover expected range at pos: "); + bch2_bpos_to_text(&buf, path->pos); + prt_str(&buf, "\n within parent node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); + prt_str(&buf, "\n but got node: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + prt_newline(&buf); + + return __bch2_topology_error(c, &buf); +} + static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, struct btree_path *path, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; struct btree_path_level *l = path_l(path); - struct btree_and_journal_iter jiter; - struct bkey_s_c k; int ret = 0; + struct btree_and_journal_iter jiter; __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos); - k = bch2_btree_and_journal_iter_peek(c, &jiter); + struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &jiter); if (!k.k) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "node not found at pos "); - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, " at btree "); - bch2_btree_pos_to_text(&buf, c, l->b); - - ret = bch2_fs_topology_error(c, "%s", buf.buf); + ret = btree_node_missing_err(trans, path); goto err; } @@ -934,20 +954,16 @@ err: return ret; } -static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, - struct btree_path *path) +static inline bool bpos_in_btree_node_key(struct bpos pos, const struct bkey_i *k) { - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); + if (bpos_gt(pos, k->k.p)) + return false; - prt_str(&buf, "node not found at pos "); - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, " within parent node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); + if (k->k.type == KEY_TYPE_btree_ptr_v2 && + bpos_lt(pos, bkey_i_to_btree_ptr_v2_c(k)->v.min_key)) + return false; - bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); - return bch_err_throw(c, btree_need_topology_repair); + return true; } static __always_inline int btree_path_down(struct btree_trans *trans, @@ -983,6 +999,9 @@ static __always_inline int btree_path_down(struct btree_trans *trans, } } + if (unlikely(!bpos_in_btree_node_key(path->pos, &trans->btree_path_down))) + return btree_node_gap_err(trans, path, &trans->btree_path_down); + b = bch2_btree_node_get(trans, path, &trans->btree_path_down, level, lock_type, trace_ip); ret = PTR_ERR_OR_ZERO(b); @@ -1488,7 +1507,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) { prt_printf(buf, "%u transaction updates for %s journal seq %llu\n", trans->nr_updates, trans->fn, trans->journal_res.seq); - printbuf_indent_add(buf, 2); + guard(printbuf_indent)(buf); trans_for_each_update(trans, i) { struct bkey_s_c old = { &i->old_k, i->old_v }; @@ -1514,8 +1533,6 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) bch2_journal_entry_to_text(buf, trans->c, e); prt_newline(buf); } - - printbuf_indent_sub(buf, 2); } static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) @@ -1568,8 +1585,8 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt prt_printf(out, " uptodate %u locks_want %u", path->uptodate, path->locks_want); prt_newline(out); + guard(printbuf_indent)(out); - printbuf_indent_add(out, 2); for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) { prt_printf(out, "l=%u locks %s seq %u node ", l, btree_node_locked_str(btree_node_locked_type(path, l)), @@ -1582,7 +1599,6 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, bt prt_printf(out, "%px", path->l[l].b); prt_newline(out); } - printbuf_indent_sub(out, 2); } static noinline __cold diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 38c5643e8a78..a4f8aac448c0 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -205,9 +205,8 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) bch2_btree_trans_to_text(&buf, trans); prt_printf(&buf, "backtrace:\n"); - printbuf_indent_add(&buf, 2); - bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); - printbuf_indent_sub(&buf, 2); + scoped_guard(printbuf_indent, &buf) + bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); prt_newline(&buf); } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index b618a0bd1186..c0dff992ad60 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -42,12 +42,11 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes) { - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); darray_for_each(nodes, i) { found_btree_node_to_text(out, c, i); prt_newline(out); } - printbuf_indent_sub(out, 2); } static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 5fa7f2f9f1e9..2966971ee43e 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -970,6 +970,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans, struct bkey_i *accounting; retry: + memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); percpu_down_read(&c->mark_lock); for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); accounting != btree_trans_subbuf_top(trans, &trans->accounting); @@ -983,6 +984,9 @@ retry: } percpu_up_read(&c->mark_lock); + /* Only fatal errors are possible later, so no need to revert this */ + bch2_trans_account_disk_usage_change(trans); + trans_for_each_update(trans, i) { ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); if (ret) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e893eb938bb3..9e3c851200eb 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -840,6 +840,10 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type) return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS; } +/* A mask of btree id bits that have triggers for their leaves */ +__maybe_unused +static const u64 btree_leaf_has_triggers_mask = BTREE_NODE_TYPE_HAS_TRIGGERS >> 1; + static const u64 btree_is_extents_mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr) BCH_BTREE_IDS() @@ -883,15 +887,15 @@ static inline bool btree_type_has_snapshot_field(enum btree_id btree) return BIT_ULL(btree) & mask; } -static inline bool btree_type_has_ptrs(enum btree_id btree) -{ - const u64 mask = 0 +static const u64 btree_has_data_ptrs_mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr) BCH_BTREE_IDS() #undef x ; - return BIT_ULL(btree) & mask; +static inline bool btree_type_has_data_ptrs(enum btree_id btree) +{ + return BIT_ULL(btree) & btree_has_data_ptrs_mask; } static inline bool btree_type_uses_write_buffer(enum btree_id btree) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index a9877a47bfc6..a8cd7a5a6e7d 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -324,9 +324,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct btree *b; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim - ? BTREE_NODE_RESERVE - : 0; int ret; b = bch2_btree_node_mem_alloc(trans, interior_node); @@ -334,41 +331,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, return b; BUG_ON(b->ob.nr); - - mutex_lock(&c->btree_reserve_cache_lock); - if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { - guard(spinlock)(&c->freelist_lock); - if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) { - if (cl) - closure_wait(&c->open_buckets_wait, cl); - - ret = cl - ? bch_err_throw(c, bucket_alloc_blocked) - : bch_err_throw(c, open_buckets_empty); - mutex_unlock(&c->btree_reserve_cache_lock); - goto err; - } - } - - if (c->btree_reserve_cache_nr > nr_reserve) { - for (struct btree_alloc *a = c->btree_reserve_cache; - a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { - /* check if it has sufficient durability */ - - if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { - bch2_open_buckets_put(c, &a->ob); - *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - continue; - } - - bkey_copy(&b->key, &a->k); - b->ob = a->ob; - *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - mutex_unlock(&c->btree_reserve_cache_lock); - goto out; - } - } - mutex_unlock(&c->btree_reserve_cache_lock); retry: ret = bch2_alloc_sectors_start_trans(trans, target ?: @@ -398,12 +360,29 @@ retry: goto retry; } + mutex_lock(&c->btree_reserve_cache_lock); + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = c->btree_reserve_cache + --c->btree_reserve_cache_nr; + + /* check if it has sufficient durability */ + + if (can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { + bkey_copy(&b->key, &a->k); + b->ob = a->ob; + mutex_unlock(&c->btree_reserve_cache_lock); + goto out; + } + + bch2_open_buckets_put(c, &a->ob); + } + mutex_unlock(&c->btree_reserve_cache_lock); + bkey_btree_ptr_v2_init(&b->key); bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false); bch2_open_bucket_get(c, wp, &b->ob); - bch2_alloc_sectors_done(c, wp); out: + bch2_alloc_sectors_done(c, wp); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -2810,7 +2789,7 @@ static void bch2_btree_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct btree_alloc *a) { - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&a->k)); prt_newline(out); @@ -2818,8 +2797,6 @@ static void bch2_btree_alloc_to_text(struct printbuf *out, unsigned i; open_bucket_for_each(c, &a->ob, ob, i) bch2_open_bucket_to_text(out, c, ob); - - printbuf_indent_sub(out, 2); } void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 7bd9cf6104ca..10bfadcde80a 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -130,7 +130,7 @@ static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opt type, } static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c, - struct bch_io_opts opts) + struct bch_inode_opts opts) { if (opts.nocow) return 0; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 2c997fddefb3..7a0da6cdf78c 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -11,6 +11,7 @@ #include "ec.h" #include "error.h" #include "extents.h" +#include "inode.h" #include "io_write.h" #include "keylist.h" #include "move.h" @@ -428,13 +429,18 @@ restart_drop_extra_replicas: goto out; } + struct bch_inode_opts opts; + ret = bch2_trans_log_str(trans, bch2_data_update_type_strs[m->type]) ?: bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, insert->k.p) ?: - bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: + bch2_inum_snapshot_opts_get(trans, k.k->p.inode, k.k->p.snapshot, &opts) ?: + bch2_bkey_set_needs_rebalance(c, &opts, insert, + SET_NEEDS_REBALANCE_foreground, + m->op.opts.change_cookie) ?: bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_internal_snapshot_node); if (ret) @@ -613,7 +619,7 @@ int bch2_update_unwritten_extent(struct btree_trans *trans, } void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { if (!out->nr_tabstops) @@ -657,31 +663,32 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) prt_str_indented(out, "old key:\t"); bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); + + bch2_write_op_to_text(out, &m->op); } void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m) { bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); if (!m->read_done) { prt_printf(out, "read:\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_read_bio_to_text(out, m->op.c, &m->rbio); } else { prt_printf(out, "write:\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_write_op_to_text(out, &m->op); } - printbuf_indent_sub(out, 4); } int bch2_extent_drop_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { struct bch_fs *c = trans->c; @@ -731,7 +738,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, } static int __bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, unsigned buf_bytes) { unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE); @@ -758,7 +765,7 @@ static int __bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c, } int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c, - struct bch_io_opts *io_opts) + struct bch_inode_opts *io_opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k)); const union bch_extent_entry *entry; @@ -830,7 +837,7 @@ int bch2_data_update_init(struct btree_trans *trans, struct moving_context *ctxt, struct data_update *m, struct write_point_specifier wp, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts data_opts, enum btree_id btree_id, struct bkey_s_c k) diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index fc12aa65366f..3b0ba6f6497f 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -23,7 +23,7 @@ struct data_update_opts { }; void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, - struct bch_io_opts *, struct data_update_opts *); + struct bch_inode_opts *, struct data_update_opts *); #define BCH_DATA_UPDATE_TYPES() \ x(copygc, 0) \ @@ -76,18 +76,18 @@ void bch2_data_update_read_done(struct data_update *); int bch2_extent_drop_ptrs(struct btree_trans *, struct btree_iter *, struct bkey_s_c, - struct bch_io_opts *, + struct bch_inode_opts *, struct data_update_opts *); int bch2_data_update_bios_init(struct data_update *, struct bch_fs *, - struct bch_io_opts *); + struct bch_inode_opts *); void bch2_data_update_exit(struct data_update *); int bch2_data_update_init(struct btree_trans *, struct btree_iter *, struct moving_context *, struct data_update *, struct write_point_specifier, - struct bch_io_opts *, struct data_update_opts, + struct bch_inode_opts *, struct data_update_opts, enum btree_id, struct bkey_s_c); void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 33cb94f70b19..ebfb68e2e035 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -282,16 +282,13 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, le64_to_cpu(i->journal_seq)); offset += sectors; - printbuf_indent_add(out, 4); + scoped_guard(printbuf_indent, out) + for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { + struct bkey u; - for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { - struct bkey u; - - bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); - prt_newline(out); - } - - printbuf_indent_sub(out, 4); + bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); + prt_newline(out); + } } out: if (bio) @@ -468,7 +465,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); prt_printf(out, "\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); prt_newline(out); @@ -488,8 +485,6 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * &b->writes[1].journal, b->writes[1].journal.seq); prt_printf(out, "ob:\t%u\n", b->ob.nr); - - printbuf_indent_sub(out, 2); } static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, @@ -605,9 +600,8 @@ restart: bch2_btree_trans_to_text(&i->buf, trans); prt_printf(&i->buf, "backtrace:\n"); - printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); prt_newline(&i->buf); closure_put(&trans->ref); @@ -765,40 +759,35 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, break; prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]); - printbuf_indent_add(&i->buf, 2); + guard(printbuf_indent)(&i->buf); guard(mutex)(&s->lock); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE - printbuf_indent_add(&i->buf, 2); - bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace); #endif prt_printf(&i->buf, "Transaction duration:\n"); - printbuf_indent_add(&i->buf, 2); - bch2_time_stats_to_text(&i->buf, &s->duration); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_time_stats_to_text(&i->buf, &s->duration); if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { prt_printf(&i->buf, "Lock hold times:\n"); - printbuf_indent_add(&i->buf, 2); - bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); } if (s->max_paths_text) { prt_printf(&i->buf, "Maximum allocated btree paths (%u):\n", s->nr_max_paths); - printbuf_indent_add(&i->buf, 2); - prt_str_indented(&i->buf, s->max_paths_text); - printbuf_indent_sub(&i->buf, 2); + scoped_guard(printbuf_indent, &i->buf) + prt_str_indented(&i->buf, s->max_paths_text); } - printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); i->iter++; } diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f0ebf91cd5fd..a99f821c6a1c 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -239,10 +239,12 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, c, accounting_key_junk_at_end, "junk at end of accounting key"); - bkey_fsck_err_on(bch2_accounting_counters(k.k) != bch2_accounting_type_nr_counters[acc_k.type], + const unsigned nr_counters = bch2_accounting_counters(k.k); + + bkey_fsck_err_on(!nr_counters || nr_counters > BCH_ACCOUNTING_MAX_COUNTERS, c, accounting_key_nr_counters_wrong, "accounting key with %u counters, should be %u", - bch2_accounting_counters(k.k), bch2_accounting_type_nr_counters[acc_k.type]); + nr_counters, bch2_accounting_type_nr_counters[acc_k.type]); fsck_err: return ret; } @@ -359,10 +361,13 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun accounting_pos_cmp, &a.k->p) < acc->k.nr) return 0; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); + struct accounting_mem_entry n = { .pos = a.k->p, .bversion = a.k->bversion, - .nr_counters = bch2_accounting_counters(a.k), + .nr_counters = bch2_accounting_type_nr_counters[acc_k.type], .v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), sizeof(u64), GFP_KERNEL), }; @@ -878,46 +883,44 @@ int bch2_accounting_read(struct bch_fs *c) *dst++ = *i; keys->gap = keys->nr = dst - keys->data; - guard(percpu_write)(&c->mark_lock); - - darray_for_each_reverse(acc->k, i) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, i->pos); + CLASS(printbuf, underflow_err)(); - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - memset(v, 0, sizeof(v)); + scoped_guard(percpu_write, &c->mark_lock) { + darray_for_each_reverse(acc->k, i) { + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, i->pos); - for (unsigned j = 0; j < i->nr_counters; j++) - v[j] = percpu_u64_get(i->v[0] + j); + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + memset(v, 0, sizeof(v)); - /* - * If the entry counters are zeroed, it should be treated as - * nonexistent - it might point to an invalid device. - * - * Remove it, so that if it's re-added it gets re-marked in the - * superblock: - */ - ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) - ? -BCH_ERR_remove_disk_accounting_entry - : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters); - - if (ret == -BCH_ERR_remove_disk_accounting_entry) { - free_percpu(i->v[0]); - free_percpu(i->v[1]); - darray_remove_item(&acc->k, i); - ret = 0; - continue; - } + for (unsigned j = 0; j < i->nr_counters; j++) + v[j] = percpu_u64_get(i->v[0] + j); - if (ret) - return ret; - } + /* + * If the entry counters are zeroed, it should be treated as + * nonexistent - it might point to an invalid device. + * + * Remove it, so that if it's re-added it gets re-marked in the + * superblock: + */ + ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) + ? -BCH_ERR_remove_disk_accounting_entry + : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters); + + if (ret == -BCH_ERR_remove_disk_accounting_entry) { + free_percpu(i->v[0]); + free_percpu(i->v[1]); + darray_remove_item(&acc->k, i); + ret = 0; + continue; + } - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); + if (ret) + return ret; + } - scoped_guard(preempt) { - struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); for (unsigned i = 0; i < acc->k.nr; i++) { struct disk_accounting_pos k; @@ -939,27 +942,20 @@ int bch2_accounting_read(struct bch_fs *c) underflow |= (s64) v[j] < 0; if (underflow) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - - prt_printf(&buf, "Accounting underflow for\n"); - bch2_accounting_key_to_text(&buf, &k); + if (!underflow_err.pos) { + bch2_log_msg_start(c, &underflow_err); + prt_printf(&underflow_err, "Accounting underflow for\n"); + } + bch2_accounting_key_to_text(&underflow_err, &k); for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) - prt_printf(&buf, " %lli", v[j]); - - bool print = bch2_count_fsck_err(c, accounting_key_underflow, &buf); - unsigned pos = buf.pos; - ret = bch2_run_explicit_recovery_pass(c, &buf, - BCH_RECOVERY_PASS_check_allocations, 0); - print |= buf.pos != pos; - - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); - if (ret) - return ret; + prt_printf(&underflow_err, " %lli", v[j]); + prt_newline(&underflow_err); } + guard(preempt)(); + struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + switch (k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: usage->reserved += v[0] * k.persistent_reserved.nr_replicas; @@ -986,24 +982,60 @@ int bch2_accounting_read(struct bch_fs *c) } } + if (underflow_err.pos) { + bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err); + unsigned pos = underflow_err.pos; + ret = bch2_run_explicit_recovery_pass(c, &underflow_err, + BCH_RECOVERY_PASS_check_allocations, 0); + print |= underflow_err.pos != pos; + + if (print) + bch2_print_str(c, KERN_ERR, underflow_err.buf); + if (ret) + return ret; + } + return ret; } -int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) +int bch2_dev_usage_remove(struct bch_fs *c, struct bch_dev *ca) { CLASS(btree_trans, trans)(c); + + struct disk_accounting_pos start; + disk_accounting_key_init(start, dev_data_type, .dev = ca->dev_idx); + + struct disk_accounting_pos end; + disk_accounting_key_init(end, dev_data_type, .dev = ca->dev_idx, .data_type = U8_MAX); + return bch2_btree_write_buffer_flush_sync(trans) ?: - for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN, - BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({ - struct disk_accounting_pos acc; - bpos_to_disk_accounting_pos(&acc, k.k->p); - - acc.type == BCH_DISK_ACCOUNTING_dev_data_type && - acc.dev_data_type.dev == dev - ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0) - : 0; - })) ?: - bch2_btree_write_buffer_flush_sync(trans); + commit_do(trans, NULL, NULL, 0, ({ + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key_max_norestart(trans, iter, BTREE_ID_accounting, + disk_accounting_pos_to_bpos(&start), + disk_accounting_pos_to_bpos(&end), + BTREE_ITER_all_snapshots, k, ret) { + if (k.k->type != KEY_TYPE_accounting) + continue; + + struct disk_accounting_pos acc; + bpos_to_disk_accounting_pos(&acc, k.k->p); + + const unsigned nr = bch2_accounting_counters(k.k); + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + memcpy_u64s_small(v, bkey_s_c_to_accounting(k).v->d, nr); + + bch2_u64s_neg(v, nr); + + ret = bch2_disk_accounting_mod(trans, &acc, v, nr, false); + if (ret) + break; + } + + ret; + })) ?: bch2_btree_write_buffer_flush_sync(trans); } int bch2_dev_usage_init(struct bch_dev *ca, bool gc) @@ -1074,13 +1106,17 @@ void bch2_verify_accounting_clean(struct bch_fs *c) case BCH_DISK_ACCOUNTING_dev_data_type: { { guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ + const enum bch_data_type data_type = acc_k.dev_data_type.data_type; struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); if (!ca) continue; - v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); - v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); - v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); + v[0] = percpu_u64_get(&ca->usage->d[data_type].buckets); + v[1] = percpu_u64_get(&ca->usage->d[data_type].sectors); + v[2] = percpu_u64_get(&ca->usage->d[data_type].fragmented); + + if (data_type == BCH_DATA_sb || data_type == BCH_DATA_journal) + base.hidden += a.v->d[0] * ca->mi.bucket_size; } if (memcmp(a.v->d, v, 3 * sizeof(u64))) { @@ -1108,7 +1144,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; \ } - //check(hidden); + check(hidden); check(btree); check(data); check(cached); diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index cc73cce98a44..c0d3d7e8fda6 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -186,11 +186,15 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, break; case BCH_DISK_ACCOUNTING_dev_data_type: { guard(rcu)(); + const enum bch_data_type data_type = acc_k.dev_data_type.data_type; struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); if (ca) { - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]); + this_cpu_add(ca->usage->d[data_type].buckets, a.v->d[0]); + this_cpu_add(ca->usage->d[data_type].sectors, a.v->d[1]); + this_cpu_add(ca->usage->d[data_type].fragmented, a.v->d[2]); + + if (data_type == BCH_DATA_sb || data_type == BCH_DATA_journal) + trans->fs_usage_delta.hidden += a.v->d[0] * ca->mi.bucket_size; } break; } @@ -212,9 +216,9 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct accounting_mem_entry *e = &acc->k.data[idx]; - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + const unsigned nr = min_t(unsigned, bch2_accounting_counters(a.k), e->nr_counters); - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + for (unsigned i = 0; i < nr; i++) this_cpu_add(e->v[gc][i], a.v->d[i]); return 0; } @@ -297,7 +301,7 @@ int bch2_gc_accounting_done(struct bch_fs *); int bch2_accounting_read(struct bch_fs *); -int bch2_dev_usage_remove(struct bch_fs *, unsigned); +int bch2_dev_usage_remove(struct bch_fs *, struct bch_dev *); int bch2_dev_usage_init(struct bch_dev *, bool); void bch2_verify_accounting_clean(struct bch_fs *c); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 9e69263eb796..a16f55d98d97 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -468,10 +468,10 @@ int __bch2_fsck_err(struct bch_fs *c, if ((flags & FSCK_ERR_SILENT) || test_bit(err, c->sb.errors_silent)) { - ret = flags & FSCK_CAN_FIX + set_bit(BCH_FS_errors_fixed_silent, &c->flags); + return flags & FSCK_CAN_FIX ? bch_err_throw(c, fsck_fix) : bch_err_throw(c, fsck_ignore); - goto err; } printbuf_indent_add_nextline(out, 2); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 68a61f7bc737..86aa93ea2345 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1151,7 +1151,7 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke return NULL; } -static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, +static bool want_cached_ptr(struct bch_fs *c, struct bch_inode_opts *opts, struct bch_extent_ptr *ptr) { unsigned target = opts->promote_target ?: opts->foreground_target; @@ -1165,7 +1165,7 @@ static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, } void bch2_extent_ptr_set_cached(struct bch_fs *c, - struct bch_io_opts *opts, + struct bch_inode_opts *opts, struct bkey_s k, struct bch_extent_ptr *ptr) { @@ -1241,7 +1241,7 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) * the promote target. */ bool bch2_extent_normalize_by_opts(struct bch_fs *c, - struct bch_io_opts *opts, + struct bch_inode_opts *opts, struct bkey_s k) { struct bkey_ptrs ptrs; @@ -1270,14 +1270,14 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc guard(rcu)(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); if (!ca) { - prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, + prt_printf(out, "%u:%llu gen %u%s", ptr->dev, (u64) ptr->offset, ptr->gen, ptr->cached ? " cached" : ""); } else { u32 offset; u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - prt_printf(out, "ptr: %u:%llu:%u gen %u", + prt_printf(out, "%u:%llu:%u gen %u", ptr->dev, b, offset, ptr->gen); if (ca->mi.durability != 1) prt_printf(out, " d=%u", ca->mi.durability); @@ -1295,7 +1295,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc) { - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ", + prt_printf(out, "c_size %u size %u offset %u nonce %u csum ", crc->compressed_size, crc->uncompressed_size, crc->offset, crc->nonce); @@ -1305,72 +1305,34 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr bch2_prt_compression_type(out, crc->compression_type); } -static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, - const struct bch_extent_rebalance *r) -{ - prt_str(out, "rebalance:"); - - prt_printf(out, " replicas=%u", r->data_replicas); - if (r->data_replicas_from_inode) - prt_str(out, " (inode)"); - - prt_str(out, " checksum="); - bch2_prt_csum_opt(out, r->data_checksum); - if (r->data_checksum_from_inode) - prt_str(out, " (inode)"); - - if (r->background_compression || r->background_compression_from_inode) { - prt_str(out, " background_compression="); - bch2_compression_opt_to_text(out, r->background_compression); - - if (r->background_compression_from_inode) - prt_str(out, " (inode)"); - } - - if (r->background_target || r->background_target_from_inode) { - prt_str(out, " background_target="); - if (c) - bch2_target_to_text(out, c, r->background_target); - else - prt_printf(out, "%u", r->background_target); - - if (r->background_target_from_inode) - prt_str(out, " (inode)"); - } - - if (r->promote_target || r->promote_target_from_inode) { - prt_str(out, " promote_target="); - if (c) - bch2_target_to_text(out, c, r->promote_target); - else - prt_printf(out, "%u", r->promote_target); - - if (r->promote_target_from_inode) - prt_str(out, " (inode)"); - } - - if (r->erasure_code || r->erasure_code_from_inode) { - prt_printf(out, " ec=%u", r->erasure_code); - if (r->erasure_code_from_inode) - prt_str(out, " (inode)"); - } -} +static const char * const extent_entry_types[] = { +#define x(t, n, ...) [n] = #t, + BCH_EXTENT_ENTRY_TYPES() +#undef x + NULL +}; void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - bool first = true; if (c) prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k)); + guard(printbuf_indent)(out); + bkey_extent_entry_for_each(ptrs, entry) { - if (!first) - prt_printf(out, " "); + prt_newline(out); - switch (__extent_entry_type(entry)) { + unsigned type = __extent_entry_type(entry); + if (type < BCH_EXTENT_ENTRY_MAX) { + prt_str(out, extent_entry_types[__extent_entry_type(entry)]); + prt_str(out, ": "); + } + + switch (type) { case BCH_EXTENT_ENTRY_ptr: bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry)); break; @@ -1387,8 +1349,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, case BCH_EXTENT_ENTRY_stripe_ptr: { const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr; - prt_printf(out, "ec: idx %llu block %u", - (u64) ec->idx, ec->block); + prt_printf(out, "idx %llu block %u", (u64) ec->idx, ec->block); break; } case BCH_EXTENT_ENTRY_rebalance: @@ -1403,8 +1364,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; } - - first = false; } } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index f6dcb17108cd..03ea7c689d9a 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -686,10 +686,10 @@ bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); struct bch_extent_ptr * bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s); -void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_io_opts *, +void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_inode_opts *, struct bkey_s, struct bch_extent_ptr *); -bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_io_opts *, struct bkey_s); +bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_inode_opts *, struct bkey_s); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 45175a478b92..aab30571b056 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -284,12 +284,12 @@ void bch2_readahead(struct readahead_control *ractl) { struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_io_opts opts; struct folio *folio; struct readpages_iter readpages_iter; struct blk_plug plug; - bch2_inode_opts_get(&opts, c, &inode->ei_inode); + struct bch_inode_opts opts; + bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts); int ret = readpages_iter_init(&readpages_iter, ractl); if (ret) @@ -350,7 +350,7 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_read_bio *rbio; - struct bch_io_opts opts; + struct bch_inode_opts opts; struct blk_plug plug; int ret; DECLARE_COMPLETION_ONSTACK(done); @@ -361,7 +361,7 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) if (!bch2_folio_create(folio, GFP_KERNEL)) return -ENOMEM; - bch2_inode_opts_get(&opts, c, &inode->ei_inode); + bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts); rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), c, @@ -407,7 +407,7 @@ struct bch_writepage_io { struct bch_writepage_state { struct bch_writepage_io *io; - struct bch_io_opts opts; + struct bch_inode_opts opts; struct bch_folio_sector *tmp; unsigned tmp_sectors; struct blk_plug plug; @@ -683,7 +683,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc struct bch_fs *c = mapping->host->i_sb->s_fs_info; struct bch_writepage_state *w = kzalloc(sizeof(*w), GFP_NOFS|__GFP_NOFAIL); - bch2_inode_opts_get(&w->opts, c, &to_bch_ei(mapping->host)->ei_inode); + bch2_inode_opts_get_inode(c, &to_bch_ei(mapping->host)->ei_inode, &w->opts); blk_start_plug(&w->plug); int ret = bch2_write_cache_pages(mapping, wbc, w); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 79823234160f..a104b9d70bea 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -68,7 +68,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) struct file *file = req->ki_filp; struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_io_opts opts; struct dio_read *dio; struct bio *bio; struct blk_plug plug; @@ -78,7 +77,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) size_t shorten; ssize_t ret; - bch2_inode_opts_get(&opts, c, &inode->ei_inode); + struct bch_inode_opts opts; + bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts); /* bios must be 512 byte aligned: */ if ((offset|iter->count) & (SECTOR_SIZE - 1)) @@ -445,13 +445,13 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio) struct kiocb *req = dio->req; struct address_space *mapping = dio->mapping; struct bch_inode_info *inode = dio->inode; - struct bch_io_opts opts; + struct bch_inode_opts opts; struct bio *bio = &dio->op.wbio.bio; unsigned unaligned, iter_count; bool sync = dio->sync, dropped_locks; long ret; - bch2_inode_opts_get(&opts, c, &inode->ei_inode); + bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts); while (1) { iter_count = dio->iter.count; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index de0d965f3fde..57e9459afa07 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -627,10 +627,10 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bpos end_pos = POS(inode->v.i_ino, end_sector); - struct bch_io_opts opts; + struct bch_inode_opts opts; int ret = 0; - bch2_inode_opts_get(&opts, c, &inode->ei_inode); + bch2_inode_opts_get_inode(c, &inode->ei_inode, &opts); CLASS(btree_trans, trans)(c); CLASS(btree_iter, iter)(trans, BTREE_ID_extents, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 4aa130ff7cf6..655ed90b2a39 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -369,9 +369,9 @@ err: } int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans, - u64 inode_nr, u32 snapshot, - struct bch_inode_unpacked *inode, - unsigned flags) + u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode, + unsigned flags) { CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); @@ -598,7 +598,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { prt_printf(out, "\n"); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "mode=%o\n", inode->bi_mode); prt_str(out, "flags="); @@ -620,7 +620,6 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, #undef x bch2_printbuf_strip_trailing_newline(out); - printbuf_indent_sub(out, 2); } void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) @@ -674,7 +673,7 @@ static inline void bkey_inode_flags_set(struct bkey_s k, u64 f) static inline bool bkey_is_unlinked_inode(struct bkey_s_c k) { - unsigned f = bkey_inode_flags(k) & BCH_INODE_unlinked; + unsigned f = bkey_inode_flags(k); return (f & BCH_INODE_unlinked) && !(f & BCH_INODE_has_child_snapshot); } @@ -1224,32 +1223,45 @@ struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) return ret; } -void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, - struct bch_inode_unpacked *inode) +void bch2_inode_opts_get_inode(struct bch_fs *c, + struct bch_inode_unpacked *inode, + struct bch_inode_opts *ret) { #define x(_name, _bits) \ if ((inode)->bi_##_name) { \ - opts->_name = inode->bi_##_name - 1; \ - opts->_name##_from_inode = true; \ + ret->_name = inode->bi_##_name - 1; \ + ret->_name##_from_inode = true; \ } else { \ - opts->_name = c->opts._name; \ - opts->_name##_from_inode = false; \ + ret->_name = c->opts._name; \ + ret->_name##_from_inode = false; \ } BCH_INODE_OPTS() #undef x - bch2_io_opts_fixups(opts); + ret->change_cookie = atomic_read(&c->opt_change_cookie); + + bch2_io_opts_fixups(ret); } -int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) +int bch2_inum_snapshot_opts_get(struct btree_trans *trans, + u64 inum, u32 snapshot, + struct bch_inode_opts *opts) { - struct bch_inode_unpacked inode; - int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); + if (inum) { + struct bch_inode_unpacked inode; + int ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); + if (ret) + return ret; - if (ret) - return ret; + bch2_inode_opts_get_inode(trans->c, &inode, opts); + } else { + /* + * data_update_index_update may call us for reflink btree extent + * updates, inum will be 0 + */ - bch2_inode_opts_get(opts, trans->c, &inode); + bch2_inode_opts_get(trans->c, opts); + } return 0; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 79092ea74844..63b7088811fb 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -289,9 +289,8 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *); void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); -void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, - struct bch_inode_unpacked *); -int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *); +void bch2_inode_opts_get_inode(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_opts *); +int bch2_inum_snapshot_opts_get(struct btree_trans *, u64, u32, struct bch_inode_opts *); int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, unsigned); @@ -300,8 +299,8 @@ int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, static inline struct bch_extent_rebalance bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) { - struct bch_io_opts io_opts; - bch2_inode_opts_get(&io_opts, c, inode); + struct bch_inode_opts io_opts; + bch2_inode_opts_get_inode(c, inode, &io_opts); return io_opts_to_rebalance_opts(c, &io_opts); } diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index fa0b06e17d17..04eb5ecd102b 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -24,7 +24,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, u64 sectors, - struct bch_io_opts opts, + struct bch_inode_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) { @@ -109,7 +109,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, } ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, - 0, i_sectors_delta, true); + 0, i_sectors_delta, true, 0); err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); @@ -211,7 +211,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, - &disk_res, 0, i_sectors_delta, false); + &disk_res, 0, i_sectors_delta, false, 0); bch2_disk_reservation_put(c, &disk_res); } @@ -373,7 +373,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; - struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; @@ -384,10 +383,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, bool warn_errors = i_sectors_delta != NULL; int ret = 0; - ret = bch2_inum_opts_get(trans, inum, &opts); - if (ret) - return ret; - /* * check for missing subvolume before fpunch, as in resume we don't want * it to be a fatal error @@ -476,8 +471,7 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: + ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index b93e4d4b3c0c..6a294f2a6dd6 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -3,7 +3,7 @@ #define _BCACHEFS_IO_MISC_H int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, - u64, struct bch_io_opts, s64 *, + u64, struct bch_inode_opts, s64 *, struct write_point_specifier); int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index bae2e181c9ed..7066be2701c0 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -158,7 +158,7 @@ static bool ptr_being_rewritten(struct bch_read_bio *orig, unsigned dev) static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, struct bpos pos, - struct bch_io_opts opts, + struct bch_inode_opts opts, unsigned flags, struct bch_io_failures *failed) { @@ -408,9 +408,8 @@ void bch2_promote_op_to_text(struct printbuf *out, { if (!op->write.read_done) { prt_printf(out, "parent read: %px\n", op->write.rbio.parent); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); bch2_read_bio_to_text(out, c, op->write.rbio.parent); - printbuf_indent_sub(out, 2); } bch2_data_update_to_text(out, &op->write); @@ -1519,7 +1518,7 @@ void bch2_read_bio_to_text(struct printbuf *out, /* Are we in a retry? */ - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); u64 now = local_clock(); prt_printf(out, "start_time:\t"); @@ -1553,7 +1552,6 @@ void bch2_read_bio_to_text(struct printbuf *out, prt_newline(out); bch2_bio_to_text(out, &rbio->bio); - printbuf_indent_sub(out, 2); } void bch2_fs_io_read_exit(struct bch_fs *c) diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 1e1c0476bd03..df4632f6fe9e 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -74,7 +74,7 @@ struct bch_read_bio { struct bpos data_pos; struct bversion version; - struct bch_io_opts opts; + struct bch_inode_opts opts; struct work_struct work; @@ -192,7 +192,7 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, static inline struct bch_read_bio *rbio_init(struct bio *bio, struct bch_fs *c, - struct bch_io_opts opts, + struct bch_inode_opts opts, bio_end_io_t end_io) { struct bch_read_bio *rbio = to_rbio(bio); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 1d83dcc9731e..6a5da02ce266 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -205,7 +205,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, struct btree_iter *extent_iter, u64 new_i_size, - s64 i_sectors_delta) + s64 i_sectors_delta, + struct bch_inode_unpacked *inode_u) { /* * Crazy performance optimization: @@ -227,7 +228,13 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, BTREE_ITER_intent| BTREE_ITER_cached); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); + + /* + * XXX: we currently need to unpack the inode on every write because we + * need the current io_opts, for transactional consistency - inode_v4? + */ + int ret = bkey_err(k) ?: + bch2_inode_unpack(k, inode_u); if (unlikely(ret)) return ret; @@ -303,8 +310,10 @@ int bch2_extent_update(struct btree_trans *trans, struct disk_reservation *disk_res, u64 new_i_size, s64 *i_sectors_delta_total, - bool check_enospc) + bool check_enospc, + u32 change_cookie) { + struct bch_fs *c = trans->c; struct bpos next_pos; bool usage_increasing; s64 i_sectors_delta = 0, disk_sectors_delta = 0; @@ -335,7 +344,7 @@ int bch2_extent_update(struct btree_trans *trans, if (disk_res && disk_sectors_delta > (s64) disk_res->sectors) { - ret = bch2_disk_reservation_add(trans->c, disk_res, + ret = bch2_disk_reservation_add(c, disk_res, disk_sectors_delta - disk_res->sectors, !check_enospc || !usage_increasing ? BCH_DISK_RESERVATION_NOFAIL : 0); @@ -349,9 +358,16 @@ int bch2_extent_update(struct btree_trans *trans, * aren't changing - for fsync to work properly; fsync relies on * inode->bi_journal_seq which is updated by the trigger code: */ + struct bch_inode_unpacked inode; + struct bch_inode_opts opts; + ret = bch2_extent_update_i_size_sectors(trans, iter, min(k->k.p.offset << 9, new_i_size), - i_sectors_delta) ?: + i_sectors_delta, &inode) ?: + (bch2_inode_opts_get_inode(c, &inode, &opts), + bch2_bkey_set_needs_rebalance(c, &opts, k, + SET_NEEDS_REBALANCE_foreground, + change_cookie)) ?: bch2_trans_update(trans, iter, k, 0) ?: bch2_trans_commit(trans, disk_res, NULL, BCH_TRANS_COMMIT_no_check_rw| @@ -402,7 +418,8 @@ static int bch2_write_index_default(struct bch_write_op *op) ret = bch2_extent_update(trans, inum, &iter, sk.k, &op->res, op->new_i_size, &op->i_sectors_delta, - op->flags & BCH_WRITE_check_enospc); + op->flags & BCH_WRITE_check_enospc, + op->opts.change_cookie); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -792,10 +809,6 @@ static void init_append_extent(struct bch_write_op *op, bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, op->flags & BCH_WRITE_cached); - - if (!(op->flags & BCH_WRITE_move)) - bch2_bkey_set_needs_rebalance(op->c, &op->opts, &e->k_i); - bch2_keylist_push(&op->insert_keys); } @@ -1225,6 +1238,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, return 0; } + struct bch_fs *c = trans->c; struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); int ret = PTR_ERR_OR_ZERO(new); @@ -1239,8 +1253,6 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) ptr->unwritten = 0; - bch2_bkey_set_needs_rebalance(op->c, &op->opts, new); - /* * Note that we're not calling bch2_subvol_get_snapshot() in this path - * that was done when we kicked off the write, and here it's important @@ -1248,8 +1260,20 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, * since been created. The write is still outstanding, so we're ok * w.r.t. snapshot atomicity: */ + + /* + * For transactional consistency, set_needs_rebalance() has to be called + * with the io_opts from the btree in the same transaction: + */ + struct bch_inode_unpacked inode; + struct bch_inode_opts opts; + return bch2_extent_update_i_size_sectors(trans, iter, - min(new->k.p.offset << 9, new_i_size), 0) ?: + min(new->k.p.offset << 9, new_i_size), 0, &inode) ?: + (bch2_inode_opts_get_inode(c, &inode, &opts), + bch2_bkey_set_needs_rebalance(c, &opts, new, + SET_NEEDS_REBALANCE_foreground, + op->opts.change_cookie)) ?: bch2_trans_update(trans, iter, new, BTREE_UPDATE_internal_snapshot_node); } @@ -1742,7 +1766,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) prt_printf(out, "pos:\t"); bch2_bpos_to_text(out, op->pos); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "started:\t"); bch2_pr_time_units(out, local_clock() - op->start_time); @@ -1754,11 +1778,12 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) prt_printf(out, "nr_replicas:\t%u\n", op->nr_replicas); prt_printf(out, "nr_replicas_required:\t%u\n", op->nr_replicas_required); + prt_printf(out, "devs_have:\t"); + bch2_devs_list_to_text(out, &op->devs_have); + prt_newline(out); prt_printf(out, "ref:\t%u\n", closure_nr_remaining(&op->cl)); prt_printf(out, "ret\t%s\n", bch2_err_str(op->error)); - - printbuf_indent_sub(out, 2); } void bch2_fs_io_write_exit(struct bch_fs *c) diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index 2c0a8f35ee1f..692529bf401d 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -28,10 +28,10 @@ int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, struct bkey_i *, bool *, s64 *, s64 *); int bch2_extent_update(struct btree_trans *, subvol_inum, struct btree_iter *, struct bkey_i *, - struct disk_reservation *, u64, s64 *, bool); + struct disk_reservation *, u64, s64 *, bool, u32); static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, - struct bch_io_opts opts) + struct bch_inode_opts opts) { op->c = c; op->end_io = NULL; diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h index 5da4eb8bb6f6..ab36b03e0a46 100644 --- a/fs/bcachefs/io_write_types.h +++ b/fs/bcachefs/io_write_types.h @@ -90,7 +90,7 @@ struct bch_write_op { struct bch_devs_list devs_have; u16 target; u16 nonce; - struct bch_io_opts opts; + struct bch_inode_opts opts; u32 subvol; struct bpos pos; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 93ac0faedf7d..6505c79f8516 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -48,7 +48,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 struct journal_buf *buf = j->buf + i; prt_printf(out, "seq:\t%llu\n", seq); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); if (!buf->write_started) prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK)); @@ -81,8 +81,6 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 if (buf->write_done) prt_str(out, "write_done"); prt_newline(out); - - printbuf_indent_sub(out, 2); } static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) @@ -1767,20 +1765,20 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) bch2_journal_bufs_to_text(out, j); prt_printf(out, "space:\n"); - printbuf_indent_add(out, 2); - prt_printf(out, "discarded\t%u:%u\n", - j->space[journal_space_discarded].next_entry, - j->space[journal_space_discarded].total); - prt_printf(out, "clean ondisk\t%u:%u\n", - j->space[journal_space_clean_ondisk].next_entry, - j->space[journal_space_clean_ondisk].total); - prt_printf(out, "clean\t%u:%u\n", - j->space[journal_space_clean].next_entry, - j->space[journal_space_clean].total); - prt_printf(out, "total\t%u:%u\n", - j->space[journal_space_total].next_entry, - j->space[journal_space_total].total); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) { + prt_printf(out, "discarded\t%u:%u\n", + j->space[journal_space_discarded].next_entry, + j->space[journal_space_discarded].total); + prt_printf(out, "clean ondisk\t%u:%u\n", + j->space[journal_space_clean_ondisk].next_entry, + j->space[journal_space_clean_ondisk].total); + prt_printf(out, "clean\t%u:%u\n", + j->space[journal_space_clean].next_entry, + j->space[journal_space_clean].total); + prt_printf(out, "total\t%u:%u\n", + j->space[journal_space_total].next_entry, + j->space[journal_space_total].total); + } for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { if (!ca->mi.durability) @@ -1796,7 +1794,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "dev %u:\n", ca->dev_idx); prt_printf(out, "durability %u:\n", ca->mi.durability); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "nr\t%u\n", ja->nr); prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size); prt_printf(out, "available\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free); @@ -1804,7 +1802,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "dirty_ondisk\t%u (seq %llu)\n",ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]); prt_printf(out, "dirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]); prt_printf(out, "cur_idx\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]); - printbuf_indent_sub(out, 2); } prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 44328d02cf67..e6f778bf7763 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -760,8 +760,8 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs return; prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); + guard(printbuf_indent)(out); - printbuf_indent_add(out, 2); for (i = 0; i < nr_types; i++) { prt_newline(out); bch2_prt_data_type(out, i); @@ -770,7 +770,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].sectors), le64_to_cpu(u->d[i].fragmented)); } - printbuf_indent_sub(out, 2); } static int journal_entry_log_validate(struct bch_fs *c, diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index bd1885607d3e..ae747c87fcf9 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -1019,7 +1019,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 pin_list = journal_seq_pin(j, *seq); prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "unflushed:\n"); for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++) @@ -1031,8 +1031,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 list_for_each_entry(pin, &pin_list->flushed[i], list) prt_printf(out, "\t%px %ps\n", pin, pin->flush); - printbuf_indent_sub(out, 2); - return false; } diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index c5d09fb5907d..dc0ecedb3a8f 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -230,3 +230,40 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca, BUG_ON(dst + 1 != nr_compacted); return 0; } + +static inline bool journal_v2_unsorted(struct bch_sb_field_journal_v2 *j) +{ + unsigned nr = bch2_sb_field_journal_v2_nr_entries(j); + for (unsigned i = 0; i + 1 < nr; i++) + if (le64_to_cpu(j->d[i].start) > le64_to_cpu(j->d[i + 1].start)) + return true; + return false; +} + +int bch2_sb_journal_sort(struct bch_fs *c) +{ + BUG_ON(!c->sb.clean); + BUG_ON(test_bit(BCH_FS_rw, &c->flags)); + + guard(mutex)(&c->sb_lock); + bool write_sb = false; + + for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_journal_sort) { + struct bch_sb_field_journal_v2 *j = bch2_sb_field_get(ca->disk_sb.sb, journal_v2); + if (!j) + continue; + + if ((j && journal_v2_unsorted(j)) || + bch2_sb_field_get(ca->disk_sb.sb, journal)) { + struct journal_device *ja = &ca->journal; + + sort(ja->buckets, ja->nr, sizeof(ja->buckets[0]), u64_cmp, NULL); + bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr); + write_sb = true; + } + } + + return write_sb + ? bch2_write_super(c) + : 0; +} diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h index ba40a7e8d90a..e0fc40652607 100644 --- a/fs/bcachefs/journal_sb.h +++ b/fs/bcachefs/journal_sb.h @@ -22,3 +22,4 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_journal; extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2; int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned); +int bch2_sb_journal_sort(struct bch_fs *); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index b9c0834498dd..c533b60706bf 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -51,25 +51,17 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, : 0; } -int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) +static int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); -} - -int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) -{ - return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, true); } int __bch2_lru_change(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 old_time, u64 new_time) { - if (old_time == new_time) - return 0; - - return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: - bch2_lru_set(trans, lru_id, dev_bucket, new_time); + return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?: + __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true); } static const char * const bch2_lru_types[] = { @@ -87,7 +79,6 @@ int bch2_lru_check_set(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); CLASS(btree_iter, lru_iter)(trans, BTREE_ID_lru, lru_pos(lru_id, dev_bucket, time), 0); struct bkey_s_c lru_k = bch2_btree_iter_peek_slot(&lru_iter); int ret = bkey_err(lru_k); @@ -99,10 +90,13 @@ int bch2_lru_check_set(struct btree_trans *trans, if (ret) return ret; - if (fsck_err(trans, alloc_key_to_missing_lru_entry, - "missing %s lru entry\n%s", - bch2_lru_types[lru_type(lru_k)], - (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "missing %s lru entry at pos ", bch2_lru_types[lru_type(lru_k)]); + bch2_bpos_to_text(&buf, lru_iter.pos); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, referring_k); + + if (fsck_err(trans, alloc_key_to_missing_lru_entry, "%s", buf.buf)) { ret = bch2_lru_set(trans, lru_id, dev_bucket, time); if (ret) return ret; @@ -127,6 +121,23 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k) } } +int bch2_dev_remove_lrus(struct bch_fs *c, struct bch_dev *ca) +{ + CLASS(btree_trans, trans)(c); + int ret = bch2_btree_write_buffer_flush_sync(trans) ?: + for_each_btree_key(trans, iter, + BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, ({ + struct bbpos bp = lru_pos_to_bp(k); + + bp.btree == BTREE_ID_alloc && bp.pos.inode == ca->dev_idx + ? (bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_trans_commit(trans, NULL, NULL, 0)) + : 0; + })); + bch_err_fn(c, ret); + return ret; +} + static u64 bkey_lru_type_idx(struct bch_fs *c, enum bch_lru_type type, struct bkey_s_c k) diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 6f1e0a7b5db5..d5a2620f2507 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -59,8 +59,6 @@ void bch2_lru_pos_to_text(struct printbuf *, struct bpos); .min_val_size = 8, \ }) -int bch2_lru_del(struct btree_trans *, u16, u64, u64); -int bch2_lru_set(struct btree_trans *, u16, u64, u64); int __bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); static inline int bch2_lru_change(struct btree_trans *trans, @@ -72,9 +70,10 @@ static inline int bch2_lru_change(struct btree_trans *trans, : 0; } +int bch2_dev_remove_lrus(struct bch_fs *, struct bch_dev *); + struct bkey_buf; int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c, struct bkey_buf *); - int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 5b4c3f4b1c25..8a3981e1016e 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -126,8 +126,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, { CLASS(btree_trans, trans)(c); + /* FIXME: this does not handle unknown btrees with data pointers */ for (unsigned id = 0; id < BTREE_ID_NR; id++) { - if (!btree_type_has_ptrs(id)) + if (!btree_type_has_data_ptrs(id)) continue; /* Stripe keys have pointers, but are handled separately */ @@ -167,7 +168,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, bch2_bkey_buf_init(&k); closure_init_stack(&cl); - for (id = 0; id < BTREE_ID_NR; id++) { + for (id = 0; id < btree_id_nr_alive(c); id++) { bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, BTREE_ITER_prefetch); retry: diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 101658cbe95a..9a440d3f7180 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -46,12 +46,12 @@ struct evacuate_bucket_arg { static bool evacuate_bucket_pred(struct bch_fs *, void *, enum btree_id, struct bkey_s_c, - struct bch_io_opts *, + struct bch_inode_opts *, struct data_update_opts *); static noinline void trace_io_move2(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { CLASS(printbuf, buf)(); @@ -72,7 +72,7 @@ static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k) static noinline void trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts, move_pred_fn pred, void *_arg, bool p) { @@ -327,7 +327,7 @@ int bch2_move_extent(struct moving_context *ctxt, struct move_bucket *bucket_in_flight, struct btree_iter *iter, struct bkey_s_c k, - struct bch_io_opts io_opts, + struct bch_inode_opts io_opts, struct data_update_opts data_opts) { struct btree_trans *trans = ctxt->trans; @@ -451,93 +451,6 @@ err: return ret; } -struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - struct per_snapshot_io_opts *io_opts, - struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ - struct btree_iter *extent_iter, - struct bkey_s_c extent_k) -{ - struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; - struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; - int ret = 0; - - if (btree_iter_path(trans, extent_iter)->level) - return opts_ret; - - if (extent_k.k->type == KEY_TYPE_reflink_v) - goto out; - - if (io_opts->cur_inum != extent_pos.inode) { - io_opts->d.nr = 0; - - ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), - BTREE_ITER_all_snapshots, k, ({ - if (k.k->p.offset != extent_pos.inode) - break; - - if (!bkey_is_inode(k.k)) - continue; - - struct bch_inode_unpacked inode; - _ret3 = bch2_inode_unpack(k, &inode); - if (_ret3) - break; - - struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; - bch2_inode_opts_get(&e.io_opts, trans->c, &inode); - - darray_push(&io_opts->d, e); - })); - io_opts->cur_inum = extent_pos.inode; - } - - ret = ret ?: trans_was_restarted(trans, restart_count); - if (ret) - return ERR_PTR(ret); - - if (extent_k.k->p.snapshot) - darray_for_each(io_opts->d, i) - if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) { - opts_ret = &i->io_opts; - break; - } -out: - ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); - if (ret) - return ERR_PTR(ret); - return opts_ret; -} - -int bch2_move_get_io_opts_one(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct btree_iter *extent_iter, - struct bkey_s_c extent_k) -{ - struct bch_fs *c = trans->c; - - *io_opts = bch2_opts_to_inode_opts(c->opts); - - /* reflink btree? */ - if (extent_k.k->p.inode) { - CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, - SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), - BTREE_ITER_cached); - struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter); - int ret = bkey_err(inode_k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - - if (!ret && bkey_is_inode(inode_k.k)) { - struct bch_inode_unpacked inode; - bch2_inode_unpack(inode_k, &inode); - bch2_inode_opts_get(io_opts, c, &inode); - } - } - - return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); -} - int bch2_move_ratelimit(struct moving_context *ctxt) { struct bch_fs *c = ctxt->trans->c; @@ -582,37 +495,6 @@ int bch2_move_ratelimit(struct moving_context *ctxt) return 0; } -/* - * Move requires non extents iterators, and there's also no need for it to - * signal indirect_extent_missing_error: - */ -static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c_reflink_p p) -{ - if (unlikely(REFLINK_P_ERROR(p.v))) - return bkey_s_c_null; - - struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v)); - - bch2_trans_iter_init(trans, iter, - BTREE_ID_reflink, reflink_pos, - BTREE_ITER_not_extents); - - struct bkey_s_c k = bch2_btree_iter_peek(iter); - if (!k.k || bkey_err(k)) { - bch2_trans_iter_exit(iter); - return k; - } - - if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) { - bch2_trans_iter_exit(iter); - return bkey_s_c_null; - } - - return k; -} - int bch2_move_data_btree(struct moving_context *ctxt, struct bpos start, struct bpos end, @@ -622,17 +504,11 @@ int bch2_move_data_btree(struct moving_context *ctxt, struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct per_snapshot_io_opts snapshot_io_opts; - struct bch_io_opts *io_opts; + struct bch_inode_opts *io_opts; struct bkey_buf sk; struct btree_iter iter, reflink_iter = {}; struct bkey_s_c k; struct data_update_opts data_opts; - /* - * If we're moving a single file, also process reflinked data it points - * to (this includes propagating changed io_opts from the inode to the - * extent): - */ - bool walk_indirect = start.inode == end.inode; int ret = 0, ret2; per_snapshot_io_opts_init(&snapshot_io_opts, c); @@ -697,8 +573,6 @@ root_err: bch2_ratelimit_reset(ctxt->rate); while (!bch2_move_ratelimit(ctxt)) { - struct btree_iter *extent_iter = &iter; - bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -717,41 +591,18 @@ root_err: if (ctxt->stats) ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - if (walk_indirect && - k.k->type == KEY_TYPE_reflink_p && - REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - - bch2_trans_iter_exit(&reflink_iter); - k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - - if (!k.k) - goto next_nondata; - - /* - * XXX: reflink pointers may point to multiple indirect - * extents, so don't advance past the entire reflink - * pointer - need to fixup iter->k - */ - extent_iter = &reflink_iter; - } - if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, - iter.pos, extent_iter, k); + io_opts = bch2_extent_get_apply_io_opts(trans, &snapshot_io_opts, + iter.pos, &iter, k, + SET_NEEDS_REBALANCE_other); ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts)) + if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts)) goto next; /* @@ -762,7 +613,7 @@ root_err: k = bkey_i_to_s_c(sk.k); if (!level) - ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); else if (!data_opts.scrub) ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level, k.k->p, data_opts.target, 0); @@ -824,7 +675,7 @@ static int bch2_move_data(struct bch_fs *c, unsigned min_depth_this_btree = min_depth; /* Stripe keys have pointers, but are handled separately */ - if (!btree_type_has_ptrs(id) || + if (!btree_type_has_data_ptrs(id) || id == BTREE_ID_stripes) min_depth_this_btree = max(min_depth_this_btree, 1); @@ -859,7 +710,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; bool is_kthread = current->flags & PF_KTHREAD; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_iter iter = {}; struct bkey_buf sk; struct bkey_s_c k; @@ -867,7 +717,11 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, u64 check_mismatch_done = bucket_start; int ret = 0; - CLASS(bch2_dev_tryget, ca)(c, dev); + struct bch_inode_opts io_opts; + bch2_inode_opts_get(c, &io_opts); + + /* Userspace might have supplied @dev: */ + CLASS(bch2_dev_tryget_noerror, ca)(c, dev); if (!ca) return 0; @@ -941,7 +795,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto next; if (!bp.v->level) { - ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); + ret = bch2_extent_get_apply_io_opts_one(trans, &io_opts, &iter, k, + SET_NEEDS_REBALANCE_other); if (ret) { bch2_trans_iter_exit(&iter); continue; @@ -1038,7 +893,7 @@ int bch2_move_data_phys(struct bch_fs *c, static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, enum btree_id btree, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { struct evacuate_bucket_arg *arg = _arg; @@ -1079,7 +934,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, } typedef bool (*move_btree_pred)(struct bch_fs *, void *, - struct btree *, struct bch_io_opts *, + struct btree *, struct bch_inode_opts *, struct data_update_opts *); static int bch2_move_btree(struct bch_fs *c, @@ -1089,7 +944,6 @@ static int bch2_move_btree(struct bch_fs *c, struct bch_move_stats *stats) { bool kthread = (current->flags & PF_KTHREAD) != 0; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct moving_context ctxt; struct btree_trans *trans; struct btree_iter iter; @@ -1098,6 +952,9 @@ static int bch2_move_btree(struct bch_fs *c, struct data_update_opts data_opts; int ret = 0; + struct bch_inode_opts io_opts; + bch2_inode_opts_get(c, &io_opts); + bch2_moving_ctxt_init(&ctxt, c, NULL, stats, writepoint_ptr(&c->btree_write_point), true); @@ -1158,7 +1015,7 @@ next: static bool rereplicate_pred(struct bch_fs *c, void *arg, enum btree_id btree, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { unsigned nr_good = bch2_bkey_durability(c, k); @@ -1189,7 +1046,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, static bool migrate_pred(struct bch_fs *c, void *arg, enum btree_id btree, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -1226,7 +1083,7 @@ static bool bformat_needs_redo(struct bkey_format *f) static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, struct btree *b, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { if (b->version_ondisk != c->sb.version || @@ -1263,7 +1120,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, enum btree_id btree, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { unsigned durability = bch2_bkey_durability(c, k); @@ -1301,7 +1158,7 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, static bool scrub_pred(struct bch_fs *c, void *_arg, enum btree_id btree, struct bkey_s_c k, - struct bch_io_opts *io_opts, + struct bch_inode_opts *io_opts, struct data_update_opts *data_opts) { struct bch_ioctl_data *arg = _arg; @@ -1404,7 +1261,7 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) prt_str(out, " pos="); bch2_bbpos_to_text(out, stats->pos); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved)); prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced)); @@ -1419,8 +1276,6 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) prt_printf(out, "bytes raced:\t"); prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); prt_newline(out); - - printbuf_indent_sub(out, 2); } static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) @@ -1429,7 +1284,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str printbuf_tabstop_push(out, 32); bch2_move_stats_to_text(out, ctxt->stats); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); prt_printf(out, "reads: ios %u/%u sectors %u/%u\n", atomic_read(&ctxt->read_ios), @@ -1443,15 +1298,13 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str atomic_read(&ctxt->write_sectors), c->opts.move_bytes_in_flight >> 9); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); scoped_guard(mutex, &ctxt->lock) { struct moving_io *io; list_for_each_entry(io, &ctxt->ios, io_list) bch2_data_update_inflight_to_text(out, &io->write); } - - printbuf_indent_sub(out, 4); } void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 481026ff99ab..754b0ad45950 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -73,7 +73,7 @@ do { \ } while (1) typedef bool (*move_pred_fn)(struct bch_fs *, void *, enum btree_id, struct bkey_s_c, - struct bch_io_opts *, struct data_update_opts *); + struct bch_inode_opts *, struct data_update_opts *); extern const char * const bch2_data_ops_strs[]; @@ -87,45 +87,15 @@ void bch2_moving_ctxt_flush_all(struct moving_context *); void bch2_move_ctxt_wait_for_io(struct moving_context *); int bch2_move_ratelimit(struct moving_context *); -/* Inodes in different snapshots may have different IO options: */ -struct snapshot_io_opts_entry { - u32 snapshot; - struct bch_io_opts io_opts; -}; - -struct per_snapshot_io_opts { - u64 cur_inum; - struct bch_io_opts fs_io_opts; - DARRAY(struct snapshot_io_opts_entry) d; -}; - -static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) -{ - memset(io_opts, 0, sizeof(*io_opts)); - io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts); -} - -static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) -{ - darray_exit(&io_opts->d); -} - -int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, - struct btree_iter *, struct bkey_s_c); - int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); int bch2_move_extent(struct moving_context *, struct move_bucket *, struct btree_iter *, struct bkey_s_c, - struct bch_io_opts, + struct bch_inode_opts, struct data_update_opts); -struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, - struct per_snapshot_io_opts *, struct bpos, - struct btree_iter *, struct bkey_s_c); - int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos, move_pred_fn, void *, enum btree_id, unsigned); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index c3ef35dc01e2..122bc98e4cbb 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -518,7 +518,7 @@ void bch2_opts_to_text(struct printbuf *out, } } -int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id id, u64 v) +int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, u64 v) { int ret = 0; @@ -531,6 +531,8 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id case Opt_compression: case Opt_background_compression: ret = bch2_check_set_has_compressed_data(c, v); + if (ret) + return ret; break; case Opt_erasure_code: if (v) @@ -546,7 +548,7 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id int bch2_opts_hooks_pre_set(struct bch_fs *c) { for (unsigned i = 0; i < bch2_opts_nr; i++) { - int ret = bch2_opt_hook_pre_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i)); + int ret = bch2_opt_hook_pre_set(c, NULL, 0, i, bch2_opt_get_by_id(&c->opts, i)); if (ret) return ret; } @@ -555,26 +557,15 @@ int bch2_opts_hooks_pre_set(struct bch_fs *c) } void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, - struct bch_opts *new_opts, enum bch_opt_id id) + enum bch_opt_id id, u64 v) { switch (id) { case Opt_foreground_target: - if (new_opts->foreground_target && - !new_opts->background_target) - bch2_set_rebalance_needs_scan(c, inum); - break; case Opt_compression: - if (new_opts->compression && - !new_opts->background_compression) - bch2_set_rebalance_needs_scan(c, inum); - break; case Opt_background_target: - if (new_opts->background_target) - bch2_set_rebalance_needs_scan(c, inum); - break; case Opt_background_compression: - if (new_opts->background_compression) - bch2_set_rebalance_needs_scan(c, inum); + bch2_set_rebalance_needs_scan(c, inum); + bch2_rebalance_wakeup(c); break; case Opt_rebalance_enabled: bch2_rebalance_wakeup(c); @@ -600,12 +591,14 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, * upgrades at runtime as well, but right now there's nothing * that does that: */ - if (new_opts->version_upgrade == BCH_VERSION_UPGRADE_incompatible) + if (v == BCH_VERSION_UPGRADE_incompatible) bch2_sb_upgrade_incompat(c); break; default: break; } + + atomic_inc(&c->opt_change_cookie); } int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, @@ -802,16 +795,17 @@ bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, /* io opts: */ -struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src) +void bch2_inode_opts_get(struct bch_fs *c, struct bch_inode_opts *ret) { - struct bch_io_opts opts = { -#define x(_name, _bits) ._name = src._name, + memset(ret, 0, sizeof(*ret)); + +#define x(_name, _bits) ret->_name = c->opts._name, BCH_INODE_OPTS() #undef x - }; - bch2_io_opts_fixups(&opts); - return opts; + ret->change_cookie = atomic_read(&c->opt_change_cookie); + + bch2_io_opts_fixups(ret); } bool bch2_opt_is_inode_opt(enum bch_opt_id id) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index f8828f4699c7..22cf109fb9c9 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -658,10 +658,9 @@ void bch2_opts_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, unsigned, unsigned, unsigned); -int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, enum bch_opt_id, u64); +int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64); int bch2_opts_hooks_pre_set(struct bch_fs *); -void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64, - struct bch_opts *, enum bch_opt_id); +void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64); int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *, struct printbuf *, const char *, const char *); @@ -670,16 +669,19 @@ int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *, /* inode opts: */ -struct bch_io_opts { +struct bch_inode_opts { #define x(_name, _bits) u##_bits _name; BCH_INODE_OPTS() #undef x + #define x(_name, _bits) u64 _name##_from_inode:1; BCH_INODE_OPTS() #undef x + + u32 change_cookie; }; -static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) +static inline void bch2_io_opts_fixups(struct bch_inode_opts *opts) { if (!opts->background_target) opts->background_target = opts->foreground_target; @@ -692,7 +694,7 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) } } -struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); +void bch2_inode_opts_get(struct bch_fs *, struct bch_inode_opts *); bool bch2_opt_is_inode_opt(enum bch_opt_id); #endif /* _BCACHEFS_OPTS_H */ diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h index 907e5c97550b..5fa5265d7ba8 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -299,4 +299,18 @@ DEFINE_GUARD(printbuf_atomic, struct printbuf *, printbuf_atomic_inc(_T), printbuf_atomic_dec(_T)); +static inline void printbuf_indent_add_2(struct printbuf *out) +{ + bch2_printbuf_indent_add(out, 2); +} + +static inline void printbuf_indent_sub_2(struct printbuf *out) +{ + bch2_printbuf_indent_sub(out, 2); +} + +DEFINE_GUARD(printbuf_indent, struct printbuf *, + printbuf_indent_add_2(_T), + printbuf_indent_sub_2(_T)); + #endif /* _BCACHEFS_PRINTBUF_H */ diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c index 792fc6fef270..541ee951d1c9 100644 --- a/fs/bcachefs/progress.c +++ b/fs/bcachefs/progress.c @@ -12,7 +12,7 @@ void bch2_progress_init(struct progress_indicator_state *s, s->next_print = jiffies + HZ * 10; - for (unsigned i = 0; i < BTREE_ID_NR; i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { if (!(btree_id_mask & BIT_ULL(i))) continue; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 25bf72dc6488..fa73de7890da 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -43,8 +43,57 @@ static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k)); } +void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, + const struct bch_extent_rebalance *r) +{ + prt_printf(out, "replicas=%u", r->data_replicas); + if (r->data_replicas_from_inode) + prt_str(out, " (inode)"); + + prt_str(out, " checksum="); + bch2_prt_csum_opt(out, r->data_checksum); + if (r->data_checksum_from_inode) + prt_str(out, " (inode)"); + + if (r->background_compression || r->background_compression_from_inode) { + prt_str(out, " background_compression="); + bch2_compression_opt_to_text(out, r->background_compression); + + if (r->background_compression_from_inode) + prt_str(out, " (inode)"); + } + + if (r->background_target || r->background_target_from_inode) { + prt_str(out, " background_target="); + if (c) + bch2_target_to_text(out, c, r->background_target); + else + prt_printf(out, "%u", r->background_target); + + if (r->background_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->promote_target || r->promote_target_from_inode) { + prt_str(out, " promote_target="); + if (c) + bch2_target_to_text(out, c, r->promote_target); + else + prt_printf(out, "%u", r->promote_target); + + if (r->promote_target_from_inode) + prt_str(out, " (inode)"); + } + + if (r->erasure_code || r->erasure_code_from_inode) { + prt_printf(out, " ec=%u", r->erasure_code); + if (r->erasure_code_from_inode) + prt_str(out, " (inode)"); + } +} + static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, - struct bch_io_opts *opts, + struct bch_inode_opts *opts, struct bkey_s_c k, struct bkey_ptrs_c ptrs) { @@ -71,7 +120,7 @@ static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, } static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, - struct bch_io_opts *opts, + struct bch_inode_opts *opts, struct bkey_ptrs_c ptrs) { if (!opts->background_target || @@ -92,7 +141,7 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, } static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, - struct bch_io_opts *opts, + struct bch_inode_opts *opts, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -145,7 +194,7 @@ incompressible: return sectors; } -static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, +static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_inode_opts *opts, struct bkey_s_c k) { if (!bkey_extent_is_direct_data(k.k)) @@ -161,8 +210,10 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opt } } -int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, - struct bkey_i *_k) +int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts, + struct bkey_i *_k, + enum set_needs_rebalance_ctx ctx, + u32 change_cookie) { if (!bkey_extent_is_direct_data(&_k->k)) return 0; @@ -186,10 +237,11 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, return 0; } -int bch2_get_update_rebalance_opts(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct btree_iter *iter, - struct bkey_s_c k) +static int bch2_get_update_rebalance_opts(struct btree_trans *trans, + struct bch_inode_opts *io_opts, + struct btree_iter *iter, + struct bkey_s_c k, + enum set_needs_rebalance_ctx ctx) { BUG_ON(iter->flags & BTREE_ITER_is_extents); BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); @@ -218,10 +270,121 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans, /* On successfull transaction commit, @k was invalidated: */ - return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: + return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n, ctx, 0) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0) ?: - bch_err_throw(trans->c, transaction_restart_nested); + bch_err_throw(trans->c, transaction_restart_commit); +} + +static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ + struct btree_iter *extent_iter, + struct bkey_s_c extent_k) +{ + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; + int ret = 0; + + if (btree_iter_path(trans, extent_iter)->level) + return &io_opts->fs_io_opts; + + if (extent_k.k->type == KEY_TYPE_reflink_v) + return &io_opts->fs_io_opts; + + if (io_opts->cur_inum != extent_pos.inode) { + io_opts->d.nr = 0; + + ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), + BTREE_ITER_all_snapshots, k, ({ + if (k.k->p.offset != extent_pos.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; + _ret3 = bch2_inode_unpack(k, &inode); + if (_ret3) + break; + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get_inode(c, &inode, &e.io_opts); + + darray_push(&io_opts->d, e); + })); + io_opts->cur_inum = extent_pos.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); + if (ret) + return ERR_PTR(ret); + + if (extent_k.k->p.snapshot) + darray_for_each(io_opts->d, i) + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) + return &i->io_opts; + + return &io_opts->fs_io_opts; +} + +struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *snapshot_io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + enum set_needs_rebalance_ctx ctx) +{ + struct bch_inode_opts *opts = + bch2_extent_get_io_opts(trans, snapshot_io_opts, extent_pos, extent_iter, extent_k); + if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level) + return opts; + + int ret = bch2_get_update_rebalance_opts(trans, opts, extent_iter, extent_k, ctx); + return ret ? ERR_PTR(ret) : opts; +} + +int bch2_extent_get_io_opts_one(struct btree_trans *trans, + struct bch_inode_opts *io_opts, + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + enum set_needs_rebalance_ctx ctx) +{ + struct bch_fs *c = trans->c; + + bch2_inode_opts_get(c, io_opts); + + /* reflink btree? */ + if (extent_k.k->p.inode) { + CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_cached); + struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter); + int ret = bkey_err(inode_k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + + if (!ret && bkey_is_inode(inode_k.k)) { + struct bch_inode_unpacked inode; + bch2_inode_unpack(inode_k, &inode); + bch2_inode_opts_get_inode(c, &inode, io_opts); + } + } + + return 0; +} + +int bch2_extent_get_apply_io_opts_one(struct btree_trans *trans, + struct bch_inode_opts *io_opts, + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + enum set_needs_rebalance_ctx ctx) +{ + int ret = bch2_extent_get_io_opts_one(trans, io_opts, extent_iter, extent_k, ctx); + if (ret || btree_iter_path(trans, extent_iter)->level) + return ret; + + return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, ctx); } #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) @@ -354,9 +517,10 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, } static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, + struct per_snapshot_io_opts *snapshot_io_opts, struct bpos work_pos, struct btree_iter *extent_iter, - struct bch_io_opts *io_opts, + struct bch_inode_opts **opts_ret, struct data_update_opts *data_opts) { struct bch_fs *c = trans->c; @@ -370,13 +534,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, if (bkey_err(k)) return k; - int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); + struct bch_inode_opts *opts = + bch2_extent_get_apply_io_opts(trans, snapshot_io_opts, + extent_iter->pos, extent_iter, k, + SET_NEEDS_REBALANCE_other); + int ret = PTR_ERR_OR_ZERO(opts); if (ret) return bkey_s_c_err(ret); + *opts_ret = opts; + memset(data_opts, 0, sizeof(*data_opts)); - data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); - data_opts->target = io_opts->background_target; + data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, opts, k); + data_opts->target = opts->background_target; data_opts->write_flags |= BCH_WRITE_only_specified_devs; if (!data_opts->rewrite_ptrs) { @@ -401,19 +571,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); + unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs); if (p) { prt_str(&buf, "compression="); - bch2_compression_opt_to_text(&buf, io_opts->background_compression); + bch2_compression_opt_to_text(&buf, opts->background_compression); prt_str(&buf, " "); bch2_prt_u64_base2(&buf, p); prt_newline(&buf); } - p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); + p = bch2_bkey_ptrs_need_move(c, opts, ptrs); if (p) { prt_str(&buf, "move="); - bch2_target_to_text(&buf, c, io_opts->background_target); + bch2_target_to_text(&buf, c, opts->background_target); prt_str(&buf, " "); bch2_prt_u64_base2(&buf, p); prt_newline(&buf); @@ -428,6 +598,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, noinline_for_stack static int do_rebalance_extent(struct moving_context *ctxt, + struct per_snapshot_io_opts *snapshot_io_opts, struct bpos work_pos, struct btree_iter *extent_iter) { @@ -435,7 +606,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, struct bch_fs *c = trans->c; struct bch_fs_rebalance *r = &trans->c->rebalance; struct data_update_opts data_opts; - struct bch_io_opts io_opts; + struct bch_inode_opts *io_opts; struct bkey_s_c k; struct bkey_buf sk; int ret; @@ -446,8 +617,8 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_init(&sk); ret = lockrestart_do(trans, - bkey_err(k = next_rebalance_extent(trans, work_pos, - extent_iter, &io_opts, &data_opts))); + bkey_err(k = next_rebalance_extent(trans, snapshot_io_opts, + work_pos, extent_iter, &io_opts, &data_opts))); if (ret || !k.k) goto out; @@ -460,7 +631,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); + ret = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); if (ret) { if (bch2_err_matches(ret, ENOMEM)) { /* memory allocation failure, wait for some IO to finish */ @@ -479,7 +650,31 @@ out: return ret; } +static int do_rebalance_scan_indirect(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + struct bch_inode_opts *opts) +{ + u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad); + u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); + u32 restart_count = trans->restart_count; + + int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink, + POS(0, idx), BTREE_ITER_not_extents, k, ({ + if (bpos_ge(bkey_start_pos(k.k), POS(0, end))) + break; + bch2_get_update_rebalance_opts(trans, opts, &iter, k, + SET_NEEDS_REBALANCE_opt_change_indirect); + })); + if (ret) + return ret; + + /* suppress trans_was_restarted() check */ + trans->restart_count = restart_count; + return 0; +} + static int do_rebalance_scan(struct moving_context *ctxt, + struct per_snapshot_io_opts *snapshot_io_opts, u64 inum, u64 cookie, u64 *sectors_scanned) { struct btree_trans *trans = ctxt->trans; @@ -499,32 +694,33 @@ static int do_rebalance_scan(struct moving_context *ctxt, r->state = BCH_REBALANCE_scanning; - struct per_snapshot_io_opts snapshot_io_opts; - per_snapshot_io_opts_init(&snapshot_io_opts, c); - int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, r->scan_start.pos, r->scan_end.pos, BTREE_ITER_all_snapshots| BTREE_ITER_prefetch, k, ({ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans, - &snapshot_io_opts, iter.pos, &iter, k); - PTR_ERR_OR_ZERO(io_opts); + struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans, + snapshot_io_opts, iter.pos, &iter, k, + SET_NEEDS_REBALANCE_opt_change); + PTR_ERR_OR_ZERO(opts) ?: + (inum && + k.k->type == KEY_TYPE_reflink_p && + REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v) + ? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts) + : 0); })) ?: commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_clear_rebalance_needs_scan(trans, inum, cookie)); - per_snapshot_io_opts_exit(&snapshot_io_opts); *sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen); - bch2_move_stats_exit(&r->scan_stats, c); - /* * Ensure that the rebalance_work entries we created are seen by the * next iteration of do_rebalance(), so we don't end up stuck in * rebalance_wait(): */ *sectors_scanned += 1; + bch2_move_stats_exit(&r->scan_stats, c); bch2_btree_write_buffer_flush_sync(trans); @@ -576,6 +772,9 @@ static int do_rebalance(struct moving_context *ctxt) bch2_move_stats_init(&r->work_stats, "rebalance_work"); + struct per_snapshot_io_opts snapshot_io_opts; + per_snapshot_io_opts_init(&snapshot_io_opts, c); + while (!bch2_move_ratelimit(ctxt)) { if (!bch2_rebalance_enabled(c)) { bch2_moving_ctxt_flush_all(ctxt); @@ -590,15 +789,18 @@ static int do_rebalance(struct moving_context *ctxt) break; ret = k->k.type == KEY_TYPE_cookie - ? do_rebalance_scan(ctxt, k->k.p.inode, + ? do_rebalance_scan(ctxt, &snapshot_io_opts, + k->k.p.inode, le64_to_cpu(bkey_i_to_cookie(k)->v.cookie), §ors_scanned) - : do_rebalance_extent(ctxt, k->k.p, &extent_iter); + : do_rebalance_extent(ctxt, &snapshot_io_opts, + k->k.p, &extent_iter); if (ret) break; } bch2_trans_iter_exit(&extent_iter); + per_snapshot_io_opts_exit(&snapshot_io_opts); bch2_move_stats_exit(&r->work_stats, c); if (!ret && @@ -661,7 +863,7 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) prt_str(out, bch2_rebalance_state_strs[r->state]); prt_newline(out); - printbuf_indent_add(out, 2); + guard(printbuf_indent)(out); switch (r->state) { case BCH_REBALANCE_waiting: { @@ -700,8 +902,6 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); put_task_struct(t); } - - printbuf_indent_sub(out, 2); } void bch2_rebalance_stop(struct bch_fs *c) diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 7a565ea7dbfc..bff91aa0102e 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -8,7 +8,7 @@ #include "rebalance_types.h" static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c, - struct bch_io_opts *opts) + struct bch_inode_opts *opts) { struct bch_extent_rebalance r = { .type = BIT(BCH_EXTENT_ENTRY_rebalance), @@ -26,12 +26,55 @@ static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_f return r; }; +void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *, + const struct bch_extent_rebalance *); + u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); -int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); -int bch2_get_update_rebalance_opts(struct btree_trans *, - struct bch_io_opts *, - struct btree_iter *, - struct bkey_s_c); + +enum set_needs_rebalance_ctx { + SET_NEEDS_REBALANCE_opt_change, + SET_NEEDS_REBALANCE_opt_change_indirect, + SET_NEEDS_REBALANCE_foreground, + SET_NEEDS_REBALANCE_other, +}; + +int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *, + struct bkey_i *, enum set_needs_rebalance_ctx, u32); + +/* Inodes in different snapshots may have different IO options: */ +struct snapshot_io_opts_entry { + u32 snapshot; + struct bch_inode_opts io_opts; +}; + +struct per_snapshot_io_opts { + u64 cur_inum; + struct bch_inode_opts fs_io_opts; + DARRAY(struct snapshot_io_opts_entry) d; +}; + +static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) +{ + memset(io_opts, 0, sizeof(*io_opts)); + bch2_inode_opts_get(c, &io_opts->fs_io_opts); +} + +static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) +{ + darray_exit(&io_opts->d); +} + +struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *, + struct per_snapshot_io_opts *, struct bpos, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); + +int bch2_extent_get_io_opts_one(struct btree_trans *, struct bch_inode_opts *, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); +int bch2_extent_get_apply_io_opts_one(struct btree_trans *, struct bch_inode_opts *, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 6319144a440c..531c2ef128ae 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -15,6 +15,7 @@ #include "error.h" #include "journal_io.h" #include "journal_reclaim.h" +#include "journal_sb.h" #include "journal_seq_blacklist.h" #include "logged_ops.h" #include "move.h" @@ -67,9 +68,12 @@ int bch2_btree_lost_data(struct bch_fs *c, #endif write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent); switch (btree) { case BTREE_ID_alloc: @@ -644,6 +648,10 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "recovering from clean shutdown, journal seq %llu", le64_to_cpu(clean->journal_seq)); + + ret = bch2_sb_journal_sort(c); + if (ret) + goto err; } else { bch_info(c, "recovering from unclean shutdown"); } @@ -829,33 +837,39 @@ use_clean: bch2_async_btree_node_rewrites_flush(c); /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + bool errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags) || + test_bit(BCH_FS_errors_fixed_silent, &c->flags); + + if (errors_fixed) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); } /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && - test_bit(BCH_FS_errors_fixed, &c->flags) && + errors_fixed && !test_bit(BCH_FS_errors_not_fixed, &c->flags) && !test_bit(BCH_FS_error, &c->flags)) { bch2_flush_fsck_errs(c); bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); + errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags); clear_bit(BCH_FS_errors_fixed, &c->flags); + clear_bit(BCH_FS_errors_fixed_silent, &c->flags); ret = bch2_run_recovery_passes(c, BCH_RECOVERY_PASS_check_alloc_info); if (ret) goto err; - if (test_bit(BCH_FS_errors_fixed, &c->flags) || + if (errors_fixed || test_bit(BCH_FS_errors_not_fixed, &c->flags)) { bch_err(c, "Second fsck run was not clean"); set_bit(BCH_FS_errors_not_fixed, &c->flags); } - set_bit(BCH_FS_errors_fixed, &c->flags); + if (errors_fixed) + set_bit(BCH_FS_errors_fixed, &c->flags); } if (enabled_qtypes(c)) { diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h index 2696eee00345..d5654de64e4c 100644 --- a/fs/bcachefs/recovery_passes_format.h +++ b/fs/bcachefs/recovery_passes_format.h @@ -29,6 +29,7 @@ x(stripes_read, 1, 0) \ x(initialize_subvolumes, 2, 0) \ x(snapshots_read, 3, PASS_ALWAYS) \ + x(delete_dead_interior_snapshots, 44, 0) \ x(check_allocations, 5, PASS_FSCK_ALLOC) \ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 238a362de19e..d54468fdcb18 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -589,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c, struct bpos dst_start = POS(dst_inum.inum, dst_offset); struct bpos src_start = POS(src_inum.inum, src_offset); struct bpos dst_end = dst_start, src_end = src_start; - struct bch_io_opts opts; struct bpos src_want; u64 dst_done = 0; u32 dst_snapshot, src_snapshot; @@ -609,10 +608,6 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); CLASS(btree_trans, trans)(c); - ret = bch2_inum_opts_get(trans, src_inum, &opts); - if (ret) - goto err; - bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, BTREE_ITER_intent); bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, @@ -709,11 +704,10 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?: - bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, - true); + ret = bch2_extent_update(trans, dst_inum, &dst_iter, + new_dst.k, &disk_res, + new_i_size, i_sectors_delta, + true, 0); bch2_disk_reservation_put(c, &disk_res); } bch2_trans_iter_exit(&dst_iter); @@ -744,7 +738,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(&inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); -err: + bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index 41a259eab4fb..b356e80135fd 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -54,23 +54,41 @@ static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f, return 0; } +static int error_entry_cmp(const void *_l, const void *_r) +{ + const struct bch_sb_field_error_entry *l = _l; + const struct bch_sb_field_error_entry *r = _r; + + return -cmp_int(l->last_error_time, r->last_error_time); +} + static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) { struct bch_sb_field_errors *e = field_to_type(f, errors); - unsigned i, nr = bch2_sb_field_errors_nr_entries(e); + unsigned nr = bch2_sb_field_errors_nr_entries(e); + + struct bch_sb_field_error_entry *sorted = kvmalloc_array(nr, sizeof(*sorted), GFP_KERNEL); + + if (sorted) + sort(sorted, nr, sizeof(*sorted), error_entry_cmp, NULL); + else + sorted = e->entries; if (out->nr_tabstops <= 1) printbuf_tabstop_push(out, 16); - for (i = 0; i < nr; i++) { - bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); + for (struct bch_sb_field_error_entry *i = sorted; i < sorted + nr; i++) { + bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i)); prt_tab(out); - prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i])); + prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i)); prt_tab(out); - bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time)); + bch2_prt_datetime(out, le64_to_cpu(i->last_error_time)); prt_newline(out); } + + if (sorted != e->entries) + kvfree(sorted); } const struct bch_sb_field_ops bch_sb_field_ops_errors = { diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index d26a0ca4a59d..963f8c2690c9 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -36,12 +36,10 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { - if (dev != BCH_SB_MEMBER_INVALID) { + if (dev != BCH_SB_MEMBER_INVALID) bch2_fs_inconsistent(c, "pointer to %s device %u", test_bit(dev, c->devs_removed.d) ? "removed" : "nonexistent", dev); - dump_stack(); - } } void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) @@ -287,10 +285,9 @@ static void member_to_text(struct printbuf *out, return; prt_printf(out, "Device:\t%u\n", idx); + guard(printbuf_indent)(out); - printbuf_indent_add(out, 2); bch2_member_to_text(out, &m, gi, sb, idx); - printbuf_indent_sub(out, 2); } static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -437,21 +434,19 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) prt_str(out, "IO errors since filesystem creation"); prt_newline(out); - printbuf_indent_add(out, 2); - for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) - prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) + prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); prt_str(out, "IO errors since "); bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); prt_str(out, " ago"); prt_newline(out); - printbuf_indent_add(out, 2); - for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) - prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], - atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) + prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], + atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); } void bch2_dev_errors_reset(struct bch_dev *ca) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index eab0c1e3ff56..00546b59dca6 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -309,7 +309,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); - t->state = !BCH_SNAPSHOT_DELETED(s.v) + t->state = !BCH_SNAPSHOT_DELETED(s.v) && !BCH_SNAPSHOT_NO_KEYS(s.v) ? SNAPSHOT_ID_live : SNAPSHOT_ID_deleted; t->parent = le32_to_cpu(s.v->parent); @@ -1101,6 +1101,20 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) return 0; } +static int bch2_snapshot_node_set_no_keys(struct btree_trans *trans, u32 id) +{ + struct bkey_i_snapshot *s = + bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot); + int ret = PTR_ERR_OR_ZERO(s); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", id); + if (unlikely(ret)) + return ret; + + SET_BCH_SNAPSHOT_NO_KEYS(&s->v, true); + s->v.subvol = 0; + return 0; +} + static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) { if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) @@ -1783,22 +1797,9 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) if (ret) goto err; } - - /* - * Fixing children of deleted snapshots can't be done completely - * atomically, if we crash between here and when we delete the interior - * nodes some depth fields will be off: - */ - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); - if (ret) - goto err; - darray_for_each(d->delete_interior, i) { ret = commit_do(trans, NULL, NULL, 0, - bch2_snapshot_node_delete(trans, i->id)); + bch2_snapshot_node_set_no_keys(trans, i->id)); if (!bch2_err_matches(ret, EROFS)) bch_err_msg(c, ret, "deleting snapshot %u", i->id); if (ret) @@ -1887,6 +1888,66 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, return ret; } +static int bch2_get_dead_interior_snapshots(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + + if (k.k->type == KEY_TYPE_snapshot && + BCH_SNAPSHOT_NO_KEYS(bkey_s_c_to_snapshot(k).v)) { + struct snapshot_interior_delete n = { + .id = k.k->p.offset, + .live_child = live_child(c, k.k->p.offset), + }; + + if (!n.live_child) { + bch_err(c, "error finding live child of snapshot %u", n.id); + return -EINVAL; + } + + return darray_push(&c->snapshot_delete.delete_interior, n); + } + + return 0; +} + +int bch2_delete_dead_interior_snapshots(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MAX, 0, k, + bch2_get_dead_interior_snapshots(trans, k)); + if (ret) + goto err; + + struct snapshot_delete *d = &c->snapshot_delete; + if (d->delete_interior.nr) { + /* + * Fixing children of deleted snapshots can't be done completely + * atomically, if we crash between here and when we delete the interior + * nodes some depth fields will be off: + */ + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, + BTREE_ITER_intent, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); + if (ret) + goto err; + + darray_for_each(d->delete_interior, i) { + ret = commit_do(trans, NULL, NULL, 0, + bch2_snapshot_node_delete(trans, i->id)); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "deleting snapshot %u", i->id); + if (ret) + goto err; + } + + darray_exit(&d->delete_interior); + } +err: + bch_err_fn(c, ret); + return ret; +} + static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) { /* If there's one child, it's redundant and keys will be moved to the child */ @@ -1895,13 +1956,18 @@ static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) { + struct bch_fs *c = trans->c; + if (k.k->type != KEY_TYPE_snapshot) return 0; - struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_WILL_DELETE(snap.v) || - interior_snapshot_needs_delete(snap)) - set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); + struct bkey_s_c_snapshot s= bkey_s_c_to_snapshot(k); + + if (BCH_SNAPSHOT_NO_KEYS(s.v)) + c->recovery.passes_to_run |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_interior_snapshots); + if (BCH_SNAPSHOT_WILL_DELETE(s.v) || + interior_snapshot_needs_delete(s)) + set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); return 0; } @@ -1909,6 +1975,15 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct int bch2_snapshots_read(struct bch_fs *c) { /* + * It's important that we check if we need to reconstruct snapshots + * before going RW, so we mark that pass as required in the superblock - + * otherwise, we could end up deleting keys with missing snapshot nodes + * instead + */ + BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && + test_bit(BCH_FS_may_go_rw, &c->flags)); + + /* * Initializing the is_ancestor bitmaps requires ancestors to already be * initialized - so mark in reverse: */ @@ -1919,15 +1994,6 @@ int bch2_snapshots_read(struct bch_fs *c) bch2_check_snapshot_needs_deletion(trans, k)); bch_err_fn(c, ret); - /* - * It's important that we check if we need to reconstruct snapshots - * before going RW, so we mark that pass as required in the superblock - - * otherwise, we could end up deleting keys with missing snapshot nodes - * instead - */ - BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && - test_bit(BCH_FS_may_go_rw, &c->flags)); - return ret; } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 28d9a29a1fd0..65d43a7ab877 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -291,6 +291,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *); void bch2_delete_dead_snapshots_async(struct bch_fs *); void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *); +int bch2_delete_dead_interior_snapshots(struct bch_fs *); int bch2_snapshots_read(struct bch_fs *); void bch2_fs_snapshots_exit(struct bch_fs *); void bch2_fs_snapshots_init_early(struct bch_fs *); diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h index 9bccae1f3590..444885106140 100644 --- a/fs/bcachefs/snapshot_format.h +++ b/fs/bcachefs/snapshot_format.h @@ -15,10 +15,35 @@ struct bch_snapshot { bch_le128 btime; }; +/* + * WILL_DELETE: leaf node that's no longer referenced by a subvolume, still has + * keys, will be deleted by delete_dead_snapshots + * + * SUBVOL: true if a subvol points to this snapshot (why do we have this? + * subvols are nonzero) + * + * DELETED: we never delete snapshot keys, we mark them as deleted so that we + * can distinguish between a key for a missing snapshot (and we have no idea + * what happened) and a key for a deleted snapshot (delete_dead_snapshots() missed + * something, key should be deleted) + * + * NO_KEYS: we don't remove interior snapshot nodes from snapshot trees at + * runtime, since we can't do the adjustements for the depth/skiplist field + * atomically - and that breaks e.g. is_ancestor(). Instead, we mark it to be + * deleted at the next remount; this tells us that we don't need to run the full + * delete_dead_snapshots(). + * + * + * XXX - todo item: + * + * We should guard against a bitflip causing us to delete a snapshot incorrectly + * by cross checking with the subvolume btree: delete_dead_snapshots() can take + * out more data than any other codepath if it runs incorrectly + */ LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1) -/* True if a subvolume points to this snapshot node: */ LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3) +LE32_BITMASK(BCH_SNAPSHOT_NO_KEYS, struct bch_snapshot, flags, 3, 4) /* * Snapshot trees: diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 61eeac671283..98d31a1f9630 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1516,8 +1516,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_newline(out); prt_printf(out, "Options:"); prt_newline(out); - printbuf_indent_add(out, 2); - { + scoped_guard(printbuf_indent, out) { enum bch_opt_id id; for (id = 0; id < bch2_opts_nr; id++) { @@ -1534,15 +1533,12 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, } } - printbuf_indent_sub(out, 2); - if (print_layout) { prt_newline(out); prt_printf(out, "layout:"); prt_newline(out); - printbuf_indent_add(out, 2); - bch2_sb_layout_to_text(out, &sb->layout); - printbuf_indent_sub(out, 2); + scoped_guard(printbuf_indent, out) + bch2_sb_layout_to_text(out, &sb->layout); } vstruct_for_each(sb, f) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e908fc77b671..ed504ce75169 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -277,6 +277,17 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) return c; } +void bch2_devs_list_to_text(struct printbuf *out, struct bch_devs_list *d) +{ + prt_char(out, '['); + darray_for_each(*d, i) { + if (i != d->data) + prt_char(out, ' '); + prt_printf(out, "%u", *i); + } + prt_char(out, ']'); +} + /* Filesystem RO/RW: */ /* @@ -461,9 +472,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou bch2_fs_read_only_async(c); wake_up(&bch2_read_only_wait); - if (ret) + if (ret) { prt_printf(out, "emergency read only at seq %llu\n", journal_cur_seq(&c->journal)); + bch2_prt_task_backtrace(out, current, 2, out->atomic ? GFP_ATOMIC : GFP_KERNEL); + } return ret; } @@ -1273,7 +1286,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; - if (go_rw_in_recovery(c)) { + /* + * just make sure this is always allocated if we might need it - mount + * failing due to kthread_create() failing is _very_ annoying + */ + if (!(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) || + go_rw_in_recovery(c)) { /* * start workqueues/kworkers early - kthread creation checks for * pending signals, which is _very_ annoying diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index d13dbf2b8227..351dc5911645 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -16,6 +16,8 @@ extern const char * const bch2_dev_write_refs[]; struct bch_fs *bch2_dev_to_fs(dev_t); struct bch_fs *bch2_uuid_to_fs(__uuid_t); +void bch2_devs_list_to_text(struct printbuf *, struct bch_devs_list *); + bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, enum bch_member_state, int, struct printbuf *); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 6b071dcc062b..4c6e6c46d18a 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -784,7 +784,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, u64 v; ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?: - bch2_opt_hook_pre_set(c, ca, id, v); + bch2_opt_hook_pre_set(c, ca, 0, id, v); kfree(tmp); if (ret < 0) @@ -807,7 +807,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, bch2_opt_set_by_id(&c->opts, id, v); if (changed) - bch2_opt_hook_post_set(c, ca, 0, &c->opts, id); + bch2_opt_hook_post_set(c, ca, 0, id, v); ret = size; err: diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 2ded7f3c835f..2a9462275f92 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -415,45 +415,41 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats printbuf_tabstop_push(out, TABSTOP_SIZE); prt_printf(out, "duration of events\n"); - printbuf_indent_add(out, 2); - - pr_name_and_units(out, "min:", stats->min_duration); - pr_name_and_units(out, "max:", stats->max_duration); - pr_name_and_units(out, "total:", stats->total_duration); - - prt_printf(out, "mean:\t"); - bch2_pr_time_units_aligned(out, d_mean); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); - prt_newline(out); - - prt_printf(out, "stddev:\t"); - bch2_pr_time_units_aligned(out, d_stddev); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + scoped_guard(printbuf_indent, out) { + pr_name_and_units(out, "min:", stats->min_duration); + pr_name_and_units(out, "max:", stats->max_duration); + pr_name_and_units(out, "total:", stats->total_duration); + + prt_printf(out, "mean:\t"); + bch2_pr_time_units_aligned(out, d_mean); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); - printbuf_indent_sub(out, 2); - prt_newline(out); + prt_printf(out, "stddev:\t"); + bch2_pr_time_units_aligned(out, d_stddev); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); + } prt_printf(out, "time between events\n"); - printbuf_indent_add(out, 2); - - pr_name_and_units(out, "min:", stats->min_freq); - pr_name_and_units(out, "max:", stats->max_freq); - - prt_printf(out, "mean:\t"); - bch2_pr_time_units_aligned(out, f_mean); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); - prt_newline(out); - - prt_printf(out, "stddev:\t"); - bch2_pr_time_units_aligned(out, f_stddev); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + scoped_guard(printbuf_indent, out) { + pr_name_and_units(out, "min:", stats->min_freq); + pr_name_and_units(out, "max:", stats->max_freq); + + prt_printf(out, "mean:\t"); + bch2_pr_time_units_aligned(out, f_mean); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); - printbuf_indent_sub(out, 2); - prt_newline(out); + prt_printf(out, "stddev:\t"); + bch2_pr_time_units_aligned(out, f_stddev); + prt_tab(out); + bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + prt_newline(out); + } printbuf_tabstops_reset(out); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 6d7303008b19..784e75a21132 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -535,10 +535,9 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, return -EINVAL; s.id = inode_opt_id; + u64 v = 0; if (value) { - u64 v = 0; - buf = kmalloc(size + 1, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -551,7 +550,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, if (ret < 0) goto err; - ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v); + ret = bch2_opt_hook_pre_set(c, NULL, inode->ei_inode.bi_inum, opt_id, v); if (ret < 0) goto err; @@ -591,6 +590,8 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); } + + bch2_opt_hook_post_set(c, NULL, inode->ei_inode.bi_inum, opt_id, v); err: return bch2_err_class(ret); } |