diff options
Diffstat (limited to 'fs/bcachefs/rebalance.c')
-rw-r--r-- | fs/bcachefs/rebalance.c | 202 |
1 files changed, 186 insertions, 16 deletions
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index d1e064be1b9f..8cb8d3779975 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -25,6 +25,8 @@ #include <linux/kthread.h> #include <linux/sched/cputime.h> +#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) + /* bch_extent_rebalance: */ static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct bkey_ptrs_c ptrs) @@ -186,21 +188,61 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, return 0; } -int bch2_get_update_rebalance_opts(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct btree_iter *iter, - struct bkey_s_c k) +static int have_rebalance_scan_cookie(struct btree_trans *trans, u64 inum) +{ + /* + * If opts need to be propagated to the extent, a scan cookie should be + * present: + */ + CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work, + SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), + BTREE_ITER_intent); + struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); + int ret = bkey_err(k); + if (ret) + return ret; + + if (k.k->type == KEY_TYPE_cookie) + return 1; + + if (!inum) + return 0; + + bch2_btree_iter_set_pos(&iter, SPOS(0, REBALANCE_WORK_SCAN_OFFSET, U32_MAX)); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + return ret; + + return k.k->type == KEY_TYPE_cookie; +} + +static int bch2_get_update_rebalance_opts(struct btree_trans *trans, + struct bch_io_opts *io_opts, + struct btree_iter *iter, + struct bkey_s_c k, + bool may_update_indirect) { + int ret = 0; + BUG_ON(iter->flags & BTREE_ITER_is_extents); BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); + /* + * If it's an indirect extent, and we walked to it directly, we won't + * have the options from the inode that were directly applied: options + * from the extent take precedence - unless the io_opts option came from + * the inode and may_update_indirect is true (walked from a + * REFLINK_P_MAY_UPDATE_OPTIONS pointer). + */ const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v ? bch2_bkey_rebalance_opts(k) : NULL; if (r) { -#define x(_name) \ - if (r->_name##_from_inode) { \ - io_opts->_name = r->_name; \ - io_opts->_name##_from_inode = true; \ +#define x(_name) \ + if (r->_name##_from_inode && \ + !(may_update_indirect && io_opts->_name##_from_inode)) { \ + io_opts->_name = r->_name; \ + io_opts->_name##_from_inode = true; \ } BCH_REBALANCE_OPTS() #undef x @@ -209,8 +251,22 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans, if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) return 0; + if (k.k->type != KEY_TYPE_reflink_v) { + ret = have_rebalance_scan_cookie(trans, k.k->p.inode); + if (ret < 0) + return ret; + + if (ret) { + CLASS(printbuf, buf)(); + + bch2_bkey_val_to_text(&buf, trans->c, k); + + fsck_err(trans, extent_io_opts_not_set, "%s", buf.buf); + } + } + struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); - int ret = PTR_ERR_OR_ZERO(n); + ret = PTR_ERR_OR_ZERO(n); if (ret) return ret; @@ -218,13 +274,113 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans, /* On successfull transaction commit, @k was invalidated: */ - return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: + ret = bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0) ?: - bch_err_throw(trans->c, transaction_restart_nested); + bch_err_throw(trans->c, transaction_restart_commit); +fsck_err: + return ret; } -#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) +static struct bch_io_opts *bch2_extent_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ + struct btree_iter *extent_iter, + struct bkey_s_c extent_k) +{ + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; + int ret = 0; + + if (btree_iter_path(trans, extent_iter)->level) + return &io_opts->fs_io_opts; + + if (extent_k.k->type == KEY_TYPE_reflink_v) + return &io_opts->fs_io_opts; + + if (io_opts->cur_inum != extent_pos.inode) { + io_opts->d.nr = 0; + + ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), + BTREE_ITER_all_snapshots, k, ({ + if (k.k->p.offset != extent_pos.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; + _ret3 = bch2_inode_unpack(k, &inode); + if (_ret3) + break; + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get(&e.io_opts, trans->c, &inode); + + darray_push(&io_opts->d, e); + })); + io_opts->cur_inum = extent_pos.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); + if (ret) + return ERR_PTR(ret); + + if (extent_k.k->p.snapshot) + darray_for_each(io_opts->d, i) + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) + return &i->io_opts; + + return &io_opts->fs_io_opts; +} + +struct bch_io_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + bool may_update_indirect) +{ + struct bch_io_opts *opts = + bch2_extent_get_io_opts(trans, io_opts, extent_pos, extent_iter, extent_k); + if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level) + return opts; + + int ret = bch2_get_update_rebalance_opts(trans, opts, extent_iter, extent_k, + may_update_indirect); + return ret ? ERR_PTR(ret) : opts; +} + +int bch2_extent_get_io_opts_one(struct btree_trans *trans, + struct bch_io_opts *io_opts, + struct btree_iter *extent_iter, + struct bkey_s_c extent_k, + bool may_update_indirect) +{ + struct bch_fs *c = trans->c; + + *io_opts = bch2_opts_to_inode_opts(c->opts); + + /* reflink btree? */ + if (extent_k.k->p.inode) { + CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_cached); + struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter); + int ret = bkey_err(inode_k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + + if (!ret && bkey_is_inode(inode_k.k)) { + struct bch_inode_unpacked inode; + bch2_inode_unpack(inode_k, &inode); + bch2_inode_opts_get(io_opts, c, &inode); + } + } + + return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, + may_update_indirect); +} static const char * const bch2_rebalance_state_strs[] = { #define x(t) #t, @@ -370,7 +526,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, if (bkey_err(k)) return k; - int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); + int ret = bch2_extent_get_io_opts_one(trans, io_opts, extent_iter, k, false); if (ret) return bkey_s_c_err(ret); @@ -507,8 +663,9 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) BTREE_ITER_prefetch, k, ({ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans, - &snapshot_io_opts, iter.pos, &iter, k); + struct bch_io_opts *io_opts = bch2_extent_get_apply_io_opts(trans, + &snapshot_io_opts, iter.pos, &iter, k, + false); PTR_ERR_OR_ZERO(io_opts); })) ?: commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, @@ -781,6 +938,7 @@ int bch2_fs_rebalance_init(struct bch_fs *c) static int check_rebalance_work_one(struct btree_trans *trans, struct btree_iter *extent_iter, struct btree_iter *rebalance_iter, + struct per_snapshot_io_opts *snapshot_io_opts, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; @@ -851,6 +1009,12 @@ static int check_rebalance_work_one(struct btree_trans *trans, return ret; } + struct bch_io_opts *io_opts = bch2_extent_get_apply_io_opts(trans, + snapshot_io_opts, extent_iter->pos, extent_iter, extent_k, false); + ret = PTR_ERR_OR_ZERO(io_opts); + if (ret) + return ret; + if (cmp <= 0) bch2_btree_iter_advance(extent_iter); if (cmp >= 0) @@ -863,10 +1027,14 @@ int bch2_check_rebalance_work(struct bch_fs *c) { CLASS(btree_trans, trans)(c); CLASS(btree_iter, extent_iter)(trans, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_not_extents| BTREE_ITER_prefetch); CLASS(btree_iter, rebalance_iter)(trans, BTREE_ID_rebalance_work, POS_MIN, BTREE_ITER_prefetch); + struct per_snapshot_io_opts snapshot_io_opts; + per_snapshot_io_opts_init(&snapshot_io_opts, c); + struct bkey_buf last_flushed; bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); @@ -880,12 +1048,14 @@ int bch2_check_rebalance_work(struct bch_fs *c) bch2_trans_begin(trans); - ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed); + ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, + &snapshot_io_opts, &last_flushed); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; } + per_snapshot_io_opts_exit(&snapshot_io_opts); bch2_bkey_buf_exit(&last_flushed, c); return ret < 0 ? ret : 0; } |