diff options
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/error.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/fs-io-buffered.c | 56 | ||||
-rw-r--r-- | fs/bcachefs/io_write.c | 25 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 97 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 30 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.c | 178 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.h | 37 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 14 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 2 |
11 files changed, 270 insertions, 180 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 6f25e2687cd2..c62d2e4ab50b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -483,6 +483,7 @@ BCH_DEBUG_PARAMS_ALL() x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ + x(blocked_writeback_throttle) \ x(nocow_lock_contended) enum bch_time_stats { @@ -675,6 +676,7 @@ struct bch_dev { x(error) \ x(topology_error) \ x(errors_fixed) \ + x(errors_fixed_silent) \ x(errors_not_fixed) \ x(no_invalid_checks) \ x(discard_mount_opt_set) \ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 0a6a808d797b..52d21259ed6f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -27,12 +27,13 @@ #include <linux/moduleparam.h> #include <linux/sched/mm.h> +static __maybe_unused unsigned bch2_btree_read_corrupt_ratio; +static __maybe_unused int bch2_btree_read_corrupt_device; + #ifdef CONFIG_BCACHEFS_DEBUG -static unsigned bch2_btree_read_corrupt_ratio; module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644); MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); -static int bch2_btree_read_corrupt_device; module_param_named(btree_read_corrupt_device, bch2_btree_read_corrupt_device, int, 0644); MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); #endif diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 9e69263eb796..a16f55d98d97 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -468,10 +468,10 @@ int __bch2_fsck_err(struct bch_fs *c, if ((flags & FSCK_ERR_SILENT) || test_bit(err, c->sb.errors_silent)) { - ret = flags & FSCK_CAN_FIX + set_bit(BCH_FS_errors_fixed_silent, &c->flags); + return flags & FSCK_CAN_FIX ? bch_err_throw(c, fsck_fix) : bch_err_throw(c, fsck_ignore); - goto err; } printbuf_indent_add_nextline(out, 2); diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index aab30571b056..d9db9f1082d3 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -532,6 +532,39 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); } +static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closure *cl) +{ + unsigned reserved = OPEN_BUCKETS_COUNT - + (OPEN_BUCKETS_COUNT - bch2_open_buckets_reserved(BCH_WATERMARK_normal)) / 2; + + if (unlikely(c->open_buckets_nr_free <= reserved)) { + closure_wait(&c->open_buckets_wait, cl); + return false; + } + + if (BCH_WATERMARK_normal < c->journal.watermark) { + closure_wait(&c->journal.async_wait, cl); + return false; + } + + return true; +} + +static void throttle_writes(struct bch_fs *c, unsigned replicas_want, struct closure *cl) +{ + u64 start = 0; + while (!can_write_now(c, replicas_want, cl)) { + if (!start) + start = local_clock(); + closure_sync(cl); + } + + BUG_ON(closure_nr_remaining(cl) > 1); + + if (start) + bch2_time_stats_update(&c->times[BCH_TIME_blocked_writeback_throttle], start); +} + static int __bch2_writepage(struct folio *folio, struct writeback_control *wbc, void *data) @@ -667,17 +700,6 @@ do_io: return 0; } -static int bch2_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, void *data) -{ - struct folio *folio = NULL; - int error; - - while ((folio = writeback_iter(mapping, wbc, folio, &error))) - error = __bch2_writepage(folio, wbc, data); - return error; -} - int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct bch_fs *c = mapping->host->i_sb->s_fs_info; @@ -686,7 +708,17 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc bch2_inode_opts_get_inode(c, &to_bch_ei(mapping->host)->ei_inode, &w->opts); blk_start_plug(&w->plug); - int ret = bch2_write_cache_pages(mapping, wbc, w); + + struct closure cl; + closure_init_stack(&cl); + + struct folio *folio = NULL; + int ret = 0; + + while (throttle_writes(c, w->opts.data_replicas, &cl), + (folio = writeback_iter(mapping, wbc, folio, &ret))) + ret = __bch2_writepage(folio, wbc, w); + if (w->io) bch2_writepage_do_io(w); blk_finish_plug(&w->plug); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 6a5da02ce266..b86e4d93a24e 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1520,6 +1520,9 @@ static void __bch2_write(struct bch_write_op *op) unsigned nofs_flags; int ret; + struct closure alloc_cl; + closure_init_stack(&alloc_cl); + nofs_flags = memalloc_nofs_save(); if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) { @@ -1561,14 +1564,25 @@ again: op->nr_replicas_required, op->watermark, op->flags, - &op->cl, &wp)); - if (unlikely(ret)) { - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) - break; + &alloc_cl, &wp)); - goto err; + + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { + bch2_wait_on_allocator(c, &alloc_cl); + continue; } +#if 0 + XXX: it's currently possible for the allocator to return success + with cl on a waitlist, this is a bug and should be resolved by + the allocator flow control rework + + BUG_ON(closure_nr_remaining(&alloc_cl) > 1); +#endif + + if (unlikely(ret)) + goto err; + EBUG_ON(!wp); bch2_open_bucket_get(c, wp, &op->open_buckets); @@ -1624,6 +1638,7 @@ err: } out_nofs_restore: memalloc_nofs_restore(nofs_flags); + closure_sync(&alloc_cl); } static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 09cd19946fbe..9a440d3f7180 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -451,95 +451,6 @@ err: return ret; } -struct bch_inode_opts *bch2_move_get_io_opts(struct btree_trans *trans, - struct per_snapshot_io_opts *io_opts, - struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ - struct btree_iter *extent_iter, - struct bkey_s_c extent_k) -{ - struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; - struct bch_inode_opts *opts_ret = &io_opts->fs_io_opts; - int ret = 0; - - if (btree_iter_path(trans, extent_iter)->level) - return opts_ret; - - if (extent_k.k->type == KEY_TYPE_reflink_v) - goto out; - - if (io_opts->cur_inum != extent_pos.inode) { - io_opts->d.nr = 0; - - ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), - BTREE_ITER_all_snapshots, k, ({ - if (k.k->p.offset != extent_pos.inode) - break; - - if (!bkey_is_inode(k.k)) - continue; - - struct bch_inode_unpacked inode; - _ret3 = bch2_inode_unpack(k, &inode); - if (_ret3) - break; - - struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; - bch2_inode_opts_get_inode(trans->c, &inode, &e.io_opts); - - darray_push(&io_opts->d, e); - })); - io_opts->cur_inum = extent_pos.inode; - } - - ret = ret ?: trans_was_restarted(trans, restart_count); - if (ret) - return ERR_PTR(ret); - - if (extent_k.k->p.snapshot) - darray_for_each(io_opts->d, i) - if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) { - opts_ret = &i->io_opts; - break; - } -out: - ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k, - SET_NEEDS_REBALANCE_other); - if (ret) - return ERR_PTR(ret); - return opts_ret; -} - -int bch2_move_get_io_opts_one(struct btree_trans *trans, - struct bch_inode_opts *io_opts, - struct btree_iter *extent_iter, - struct bkey_s_c extent_k) -{ - struct bch_fs *c = trans->c; - - bch2_inode_opts_get(c, io_opts); - - /* reflink btree? */ - if (extent_k.k->p.inode) { - CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, - SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), - BTREE_ITER_cached); - struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter); - int ret = bkey_err(inode_k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - - if (!ret && bkey_is_inode(inode_k.k)) { - struct bch_inode_unpacked inode; - bch2_inode_unpack(inode_k, &inode); - bch2_inode_opts_get_inode(c, &inode, io_opts); - } - } - - return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, - SET_NEEDS_REBALANCE_other); -} - int bch2_move_ratelimit(struct moving_context *ctxt) { struct bch_fs *c = ctxt->trans->c; @@ -683,8 +594,9 @@ root_err: if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, - iter.pos, &iter, k); + io_opts = bch2_extent_get_apply_io_opts(trans, &snapshot_io_opts, + iter.pos, &iter, k, + SET_NEEDS_REBALANCE_other); ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; @@ -883,7 +795,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto next; if (!bp.v->level) { - ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); + ret = bch2_extent_get_apply_io_opts_one(trans, &io_opts, &iter, k, + SET_NEEDS_REBALANCE_other); if (ret) { bch2_trans_iter_exit(&iter); continue; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 18021d2c51d0..754b0ad45950 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -87,32 +87,6 @@ void bch2_moving_ctxt_flush_all(struct moving_context *); void bch2_move_ctxt_wait_for_io(struct moving_context *); int bch2_move_ratelimit(struct moving_context *); -/* Inodes in different snapshots may have different IO options: */ -struct snapshot_io_opts_entry { - u32 snapshot; - struct bch_inode_opts io_opts; -}; - -struct per_snapshot_io_opts { - u64 cur_inum; - struct bch_inode_opts fs_io_opts; - DARRAY(struct snapshot_io_opts_entry) d; -}; - -static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) -{ - memset(io_opts, 0, sizeof(*io_opts)); - bch2_inode_opts_get(c, &io_opts->fs_io_opts); -} - -static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) -{ - darray_exit(&io_opts->d); -} - -int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_inode_opts *, - struct btree_iter *, struct bkey_s_c); - int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); int bch2_move_extent(struct moving_context *, @@ -122,10 +96,6 @@ int bch2_move_extent(struct moving_context *, struct bch_inode_opts, struct data_update_opts); -struct bch_inode_opts *bch2_move_get_io_opts(struct btree_trans *, - struct per_snapshot_io_opts *, struct bpos, - struct btree_iter *, struct bkey_s_c); - int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos, move_pred_fn, void *, enum btree_id, unsigned); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 849bdbf800d4..fa73de7890da 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -237,11 +237,11 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts, return 0; } -int bch2_get_update_rebalance_opts(struct btree_trans *trans, - struct bch_inode_opts *io_opts, - struct btree_iter *iter, - struct bkey_s_c k, - enum set_needs_rebalance_ctx ctx) +static int bch2_get_update_rebalance_opts(struct btree_trans *trans, + struct bch_inode_opts *io_opts, + struct btree_iter *iter, + struct bkey_s_c k, + enum set_needs_rebalance_ctx ctx) { BUG_ON(iter->flags & BTREE_ITER_is_extents); BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); @@ -273,7 +273,118 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans, return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n, ctx, 0) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0) ?: - bch_err_throw(trans->c, transaction_restart_nested); + bch_err_throw(trans->c, transaction_restart_commit); +} + +static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ + struct btree_iter *extent_iter, + struct bkey_s_c extent_k) +{ + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; + int ret = 0; + + if (btree_iter_path(trans, extent_iter)->level) + return &io_opts->fs_io_opts; + + if (extent_k.k->type == KEY_TYPE_reflink_v) + return &io_opts->fs_io_opts; + + if (io_opts->cur_inum != extent_pos.inode) { + io_opts->d.nr = 0; + + ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), + BTREE_ITER_all_snapshots, k, ({ + if (k.k->p.offset != extent_pos.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; + _ret3 = bch2_inode_unpack(k, &inode); + if (_ret3) + break; + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get_inode(c, &inode, &e.io_opts); + + darray_push(&io_opts->d, e); + })); + io_opts->cur_inum = extent_pos.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); + if (ret) + return ERR_PTR(ret); + + if (extent_k.k->p.snapshot) + darray_for_each(io_opts->d, i) + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) + return &i->io_opts; + + return &io_opts->fs_io_opts; +} + +struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *snapshot_io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + enum set_needs_rebalance_ctx ctx) +{ + struct bch_inode_opts *opts = + bch2_extent_get_io_opts(trans, snapshot_io_opts, extent_pos, extent_iter, extent_k); + if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level) + return opts; + + int ret = bch2_get_update_rebalance_opts(trans, opts, extent_iter, extent_k, ctx); + return ret ? ERR_PTR(ret) : opts; +} + +int bch2_extent_get_io_opts_one(struct btree_trans *trans, + struct bch_inode_opts *io_opts, + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + enum set_needs_rebalance_ctx ctx) +{ + struct bch_fs *c = trans->c; + + bch2_inode_opts_get(c, io_opts); + + /* reflink btree? */ + if (extent_k.k->p.inode) { + CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_cached); + struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter); + int ret = bkey_err(inode_k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + + if (!ret && bkey_is_inode(inode_k.k)) { + struct bch_inode_unpacked inode; + bch2_inode_unpack(inode_k, &inode); + bch2_inode_opts_get_inode(c, &inode, io_opts); + } + } + + return 0; +} + +int bch2_extent_get_apply_io_opts_one(struct btree_trans *trans, + struct bch_inode_opts *io_opts, + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + enum set_needs_rebalance_ctx ctx) +{ + int ret = bch2_extent_get_io_opts_one(trans, io_opts, extent_iter, extent_k, ctx); + if (ret || btree_iter_path(trans, extent_iter)->level) + return ret; + + return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, ctx); } #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) @@ -406,9 +517,10 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, } static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, + struct per_snapshot_io_opts *snapshot_io_opts, struct bpos work_pos, struct btree_iter *extent_iter, - struct bch_inode_opts *io_opts, + struct bch_inode_opts **opts_ret, struct data_update_opts *data_opts) { struct bch_fs *c = trans->c; @@ -422,13 +534,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, if (bkey_err(k)) return k; - int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); + struct bch_inode_opts *opts = + bch2_extent_get_apply_io_opts(trans, snapshot_io_opts, + extent_iter->pos, extent_iter, k, + SET_NEEDS_REBALANCE_other); + int ret = PTR_ERR_OR_ZERO(opts); if (ret) return bkey_s_c_err(ret); + *opts_ret = opts; + memset(data_opts, 0, sizeof(*data_opts)); - data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); - data_opts->target = io_opts->background_target; + data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, opts, k); + data_opts->target = opts->background_target; data_opts->write_flags |= BCH_WRITE_only_specified_devs; if (!data_opts->rewrite_ptrs) { @@ -453,19 +571,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); + unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs); if (p) { prt_str(&buf, "compression="); - bch2_compression_opt_to_text(&buf, io_opts->background_compression); + bch2_compression_opt_to_text(&buf, opts->background_compression); prt_str(&buf, " "); bch2_prt_u64_base2(&buf, p); prt_newline(&buf); } - p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); + p = bch2_bkey_ptrs_need_move(c, opts, ptrs); if (p) { prt_str(&buf, "move="); - bch2_target_to_text(&buf, c, io_opts->background_target); + bch2_target_to_text(&buf, c, opts->background_target); prt_str(&buf, " "); bch2_prt_u64_base2(&buf, p); prt_newline(&buf); @@ -480,6 +598,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, noinline_for_stack static int do_rebalance_extent(struct moving_context *ctxt, + struct per_snapshot_io_opts *snapshot_io_opts, struct bpos work_pos, struct btree_iter *extent_iter) { @@ -487,7 +606,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, struct bch_fs *c = trans->c; struct bch_fs_rebalance *r = &trans->c->rebalance; struct data_update_opts data_opts; - struct bch_inode_opts io_opts; + struct bch_inode_opts *io_opts; struct bkey_s_c k; struct bkey_buf sk; int ret; @@ -498,8 +617,8 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_init(&sk); ret = lockrestart_do(trans, - bkey_err(k = next_rebalance_extent(trans, work_pos, - extent_iter, &io_opts, &data_opts))); + bkey_err(k = next_rebalance_extent(trans, snapshot_io_opts, + work_pos, extent_iter, &io_opts, &data_opts))); if (ret || !k.k) goto out; @@ -512,7 +631,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); + ret = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); if (ret) { if (bch2_err_matches(ret, ENOMEM)) { /* memory allocation failure, wait for some IO to finish */ @@ -555,6 +674,7 @@ static int do_rebalance_scan_indirect(struct btree_trans *trans, } static int do_rebalance_scan(struct moving_context *ctxt, + struct per_snapshot_io_opts *snapshot_io_opts, u64 inum, u64 cookie, u64 *sectors_scanned) { struct btree_trans *trans = ctxt->trans; @@ -574,17 +694,15 @@ static int do_rebalance_scan(struct moving_context *ctxt, r->state = BCH_REBALANCE_scanning; - struct per_snapshot_io_opts snapshot_io_opts; - per_snapshot_io_opts_init(&snapshot_io_opts, c); - int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, r->scan_start.pos, r->scan_end.pos, BTREE_ITER_all_snapshots| BTREE_ITER_prefetch, k, ({ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - struct bch_inode_opts *opts = bch2_move_get_io_opts(trans, - &snapshot_io_opts, iter.pos, &iter, k); + struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans, + snapshot_io_opts, iter.pos, &iter, k, + SET_NEEDS_REBALANCE_opt_change); PTR_ERR_OR_ZERO(opts) ?: (inum && k.k->type == KEY_TYPE_reflink_p && @@ -595,16 +713,14 @@ static int do_rebalance_scan(struct moving_context *ctxt, commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_clear_rebalance_needs_scan(trans, inum, cookie)); - per_snapshot_io_opts_exit(&snapshot_io_opts); *sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen); - bch2_move_stats_exit(&r->scan_stats, c); - /* * Ensure that the rebalance_work entries we created are seen by the * next iteration of do_rebalance(), so we don't end up stuck in * rebalance_wait(): */ *sectors_scanned += 1; + bch2_move_stats_exit(&r->scan_stats, c); bch2_btree_write_buffer_flush_sync(trans); @@ -656,6 +772,9 @@ static int do_rebalance(struct moving_context *ctxt) bch2_move_stats_init(&r->work_stats, "rebalance_work"); + struct per_snapshot_io_opts snapshot_io_opts; + per_snapshot_io_opts_init(&snapshot_io_opts, c); + while (!bch2_move_ratelimit(ctxt)) { if (!bch2_rebalance_enabled(c)) { bch2_moving_ctxt_flush_all(ctxt); @@ -670,15 +789,18 @@ static int do_rebalance(struct moving_context *ctxt) break; ret = k->k.type == KEY_TYPE_cookie - ? do_rebalance_scan(ctxt, k->k.p.inode, + ? do_rebalance_scan(ctxt, &snapshot_io_opts, + k->k.p.inode, le64_to_cpu(bkey_i_to_cookie(k)->v.cookie), §ors_scanned) - : do_rebalance_extent(ctxt, k->k.p, &extent_iter); + : do_rebalance_extent(ctxt, &snapshot_io_opts, + k->k.p, &extent_iter); if (ret) break; } bch2_trans_iter_exit(&extent_iter); + per_snapshot_io_opts_exit(&snapshot_io_opts); bch2_move_stats_exit(&r->work_stats, c); if (!ret && diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 6558a7626272..bff91aa0102e 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -41,9 +41,40 @@ enum set_needs_rebalance_ctx { int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *, struct bkey_i *, enum set_needs_rebalance_ctx, u32); -int bch2_get_update_rebalance_opts(struct btree_trans *, struct bch_inode_opts *, - struct btree_iter *, struct bkey_s_c, - enum set_needs_rebalance_ctx); +/* Inodes in different snapshots may have different IO options: */ +struct snapshot_io_opts_entry { + u32 snapshot; + struct bch_inode_opts io_opts; +}; + +struct per_snapshot_io_opts { + u64 cur_inum; + struct bch_inode_opts fs_io_opts; + DARRAY(struct snapshot_io_opts_entry) d; +}; + +static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) +{ + memset(io_opts, 0, sizeof(*io_opts)); + bch2_inode_opts_get(c, &io_opts->fs_io_opts); +} + +static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) +{ + darray_exit(&io_opts->d); +} + +struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *, + struct per_snapshot_io_opts *, struct bpos, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); + +int bch2_extent_get_io_opts_one(struct btree_trans *, struct bch_inode_opts *, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); +int bch2_extent_get_apply_io_opts_one(struct btree_trans *, struct bch_inode_opts *, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 8679c8aad0e7..531c2ef128ae 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -837,33 +837,39 @@ use_clean: bch2_async_btree_node_rewrites_flush(c); /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + bool errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags) || + test_bit(BCH_FS_errors_fixed_silent, &c->flags); + + if (errors_fixed) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); } /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && - test_bit(BCH_FS_errors_fixed, &c->flags) && + errors_fixed && !test_bit(BCH_FS_errors_not_fixed, &c->flags) && !test_bit(BCH_FS_error, &c->flags)) { bch2_flush_fsck_errs(c); bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); + errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags); clear_bit(BCH_FS_errors_fixed, &c->flags); + clear_bit(BCH_FS_errors_fixed_silent, &c->flags); ret = bch2_run_recovery_passes(c, BCH_RECOVERY_PASS_check_alloc_info); if (ret) goto err; - if (test_bit(BCH_FS_errors_fixed, &c->flags) || + if (errors_fixed || test_bit(BCH_FS_errors_not_fixed, &c->flags)) { bch_err(c, "Second fsck run was not clean"); set_bit(BCH_FS_errors_not_fixed, &c->flags); } - set_bit(BCH_FS_errors_fixed, &c->flags); + if (errors_fixed) + set_bit(BCH_FS_errors_fixed, &c->flags); } if (enabled_qtypes(c)) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index de1e8912975c..ed504ce75169 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -833,8 +833,6 @@ int bch2_fs_init_rw(struct bch_fs *c) if (test_bit(BCH_FS_rw_init_done, &c->flags)) return 0; - bch_verbose(c, "doing rw allocations"); - if (!(c->btree_update_wq = alloc_workqueue("bcachefs", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete", |