summaryrefslogtreecommitdiff
path: root/fs/bcachefs/rebalance.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/rebalance.c')
-rw-r--r--fs/bcachefs/rebalance.c202
1 files changed, 186 insertions, 16 deletions
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index d1e064be1b9f..8cb8d3779975 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -25,6 +25,8 @@
#include <linux/kthread.h>
#include <linux/sched/cputime.h>
+#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
+
/* bch_extent_rebalance: */
static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct bkey_ptrs_c ptrs)
@@ -186,21 +188,61 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts,
return 0;
}
-int bch2_get_update_rebalance_opts(struct btree_trans *trans,
- struct bch_io_opts *io_opts,
- struct btree_iter *iter,
- struct bkey_s_c k)
+static int have_rebalance_scan_cookie(struct btree_trans *trans, u64 inum)
+{
+ /*
+ * If opts need to be propagated to the extent, a scan cookie should be
+ * present:
+ */
+ CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work,
+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
+ BTREE_ITER_intent);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (k.k->type == KEY_TYPE_cookie)
+ return 1;
+
+ if (!inum)
+ return 0;
+
+ bch2_btree_iter_set_pos(&iter, SPOS(0, REBALANCE_WORK_SCAN_OFFSET, U32_MAX));
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ return k.k->type == KEY_TYPE_cookie;
+}
+
+static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
+ struct bch_io_opts *io_opts,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ bool may_update_indirect)
{
+ int ret = 0;
+
BUG_ON(iter->flags & BTREE_ITER_is_extents);
BUG_ON(iter->flags & BTREE_ITER_filter_snapshots);
+ /*
+ * If it's an indirect extent, and we walked to it directly, we won't
+ * have the options from the inode that were directly applied: options
+ * from the extent take precedence - unless the io_opts option came from
+ * the inode and may_update_indirect is true (walked from a
+ * REFLINK_P_MAY_UPDATE_OPTIONS pointer).
+ */
const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v
? bch2_bkey_rebalance_opts(k) : NULL;
if (r) {
-#define x(_name) \
- if (r->_name##_from_inode) { \
- io_opts->_name = r->_name; \
- io_opts->_name##_from_inode = true; \
+#define x(_name) \
+ if (r->_name##_from_inode && \
+ !(may_update_indirect && io_opts->_name##_from_inode)) { \
+ io_opts->_name = r->_name; \
+ io_opts->_name##_from_inode = true; \
}
BCH_REBALANCE_OPTS()
#undef x
@@ -209,8 +251,22 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans,
if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k))
return 0;
+ if (k.k->type != KEY_TYPE_reflink_v) {
+ ret = have_rebalance_scan_cookie(trans, k.k->p.inode);
+ if (ret < 0)
+ return ret;
+
+ if (ret) {
+ CLASS(printbuf, buf)();
+
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+
+ fsck_err(trans, extent_io_opts_not_set, "%s", buf.buf);
+ }
+ }
+
struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8);
- int ret = PTR_ERR_OR_ZERO(n);
+ ret = PTR_ERR_OR_ZERO(n);
if (ret)
return ret;
@@ -218,13 +274,113 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans,
/* On successfull transaction commit, @k was invalidated: */
- return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?:
+ ret = bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
- bch_err_throw(trans->c, transaction_restart_nested);
+ bch_err_throw(trans->c, transaction_restart_commit);
+fsck_err:
+ return ret;
}
-#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
+static struct bch_io_opts *bch2_extent_get_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ if (btree_iter_path(trans, extent_iter)->level)
+ return &io_opts->fs_io_opts;
+
+ if (extent_k.k->type == KEY_TYPE_reflink_v)
+ return &io_opts->fs_io_opts;
+
+ if (io_opts->cur_inum != extent_pos.inode) {
+ io_opts->d.nr = 0;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
+ BTREE_ITER_all_snapshots, k, ({
+ if (k.k->p.offset != extent_pos.inode)
+ break;
+
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ struct bch_inode_unpacked inode;
+ _ret3 = bch2_inode_unpack(k, &inode);
+ if (_ret3)
+ break;
+
+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+ bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
+
+ darray_push(&io_opts->d, e);
+ }));
+ io_opts->cur_inum = extent_pos.inode;
+ }
+
+ ret = ret ?: trans_was_restarted(trans, restart_count);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (extent_k.k->p.snapshot)
+ darray_for_each(io_opts->d, i)
+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+ return &i->io_opts;
+
+ return &io_opts->fs_io_opts;
+}
+
+struct bch_io_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ bool may_update_indirect)
+{
+ struct bch_io_opts *opts =
+ bch2_extent_get_io_opts(trans, io_opts, extent_pos, extent_iter, extent_k);
+ if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level)
+ return opts;
+
+ int ret = bch2_get_update_rebalance_opts(trans, opts, extent_iter, extent_k,
+ may_update_indirect);
+ return ret ? ERR_PTR(ret) : opts;
+}
+
+int bch2_extent_get_io_opts_one(struct btree_trans *trans,
+ struct bch_io_opts *io_opts,
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ bool may_update_indirect)
+{
+ struct bch_fs *c = trans->c;
+
+ *io_opts = bch2_opts_to_inode_opts(c->opts);
+
+ /* reflink btree? */
+ if (extent_k.k->p.inode) {
+ CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes,
+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+ BTREE_ITER_cached);
+ struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter);
+ int ret = bkey_err(inode_k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+
+ if (!ret && bkey_is_inode(inode_k.k)) {
+ struct bch_inode_unpacked inode;
+ bch2_inode_unpack(inode_k, &inode);
+ bch2_inode_opts_get(io_opts, c, &inode);
+ }
+ }
+
+ return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k,
+ may_update_indirect);
+}
static const char * const bch2_rebalance_state_strs[] = {
#define x(t) #t,
@@ -370,7 +526,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
if (bkey_err(k))
return k;
- int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k);
+ int ret = bch2_extent_get_io_opts_one(trans, io_opts, extent_iter, k, false);
if (ret)
return bkey_s_c_err(ret);
@@ -507,8 +663,9 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
BTREE_ITER_prefetch, k, ({
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
- struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans,
- &snapshot_io_opts, iter.pos, &iter, k);
+ struct bch_io_opts *io_opts = bch2_extent_get_apply_io_opts(trans,
+ &snapshot_io_opts, iter.pos, &iter, k,
+ false);
PTR_ERR_OR_ZERO(io_opts);
})) ?:
commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
@@ -781,6 +938,7 @@ int bch2_fs_rebalance_init(struct bch_fs *c)
static int check_rebalance_work_one(struct btree_trans *trans,
struct btree_iter *extent_iter,
struct btree_iter *rebalance_iter,
+ struct per_snapshot_io_opts *snapshot_io_opts,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
@@ -851,6 +1009,12 @@ static int check_rebalance_work_one(struct btree_trans *trans,
return ret;
}
+ struct bch_io_opts *io_opts = bch2_extent_get_apply_io_opts(trans,
+ snapshot_io_opts, extent_iter->pos, extent_iter, extent_k, false);
+ ret = PTR_ERR_OR_ZERO(io_opts);
+ if (ret)
+ return ret;
+
if (cmp <= 0)
bch2_btree_iter_advance(extent_iter);
if (cmp >= 0)
@@ -863,10 +1027,14 @@ int bch2_check_rebalance_work(struct bch_fs *c)
{
CLASS(btree_trans, trans)(c);
CLASS(btree_iter, extent_iter)(trans, BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_not_extents|
BTREE_ITER_prefetch);
CLASS(btree_iter, rebalance_iter)(trans, BTREE_ID_rebalance_work, POS_MIN,
BTREE_ITER_prefetch);
+ struct per_snapshot_io_opts snapshot_io_opts;
+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
+
struct bkey_buf last_flushed;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
@@ -880,12 +1048,14 @@ int bch2_check_rebalance_work(struct bch_fs *c)
bch2_trans_begin(trans);
- ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed);
+ ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter,
+ &snapshot_io_opts, &last_flushed);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
}
+ per_snapshot_io_opts_exit(&snapshot_io_opts);
bch2_bkey_buf_exit(&last_flushed, c);
return ret < 0 ? ret : 0;
}