summaryrefslogtreecommitdiff
path: root/fs/bcachefs/rebalance.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/rebalance.c')
-rw-r--r--fs/bcachefs/rebalance.c323
1 files changed, 234 insertions, 89 deletions
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 9e22ff0e2d28..f1497302332f 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -25,6 +25,8 @@
#include <linux/kthread.h>
#include <linux/sched/cputime.h>
+#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
+
/* bch_extent_rebalance: */
static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct bkey_ptrs_c ptrs)
@@ -43,108 +45,148 @@ static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s
return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k));
}
-static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_s_c k,
- struct bkey_ptrs_c ptrs)
+void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
+ const struct bch_extent_rebalance *r)
{
- if (!opts->background_compression)
- return 0;
+ prt_str(out, "rebalance:");
- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- unsigned ptr_bit = 1;
- unsigned rewrite_ptrs = 0;
+ prt_printf(out, " replicas=%u", r->data_replicas);
+ if (r->data_replicas_from_inode)
+ prt_str(out, " (inode)");
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
- p.ptr.unwritten)
- return 0;
+ prt_str(out, " checksum=");
+ bch2_prt_csum_opt(out, r->data_checksum);
+ if (r->data_checksum_from_inode)
+ prt_str(out, " (inode)");
+
+ if (r->background_compression || r->background_compression_from_inode) {
+ prt_str(out, " background_compression=");
+ bch2_compression_opt_to_text(out, r->background_compression);
- if (!p.ptr.cached && p.crc.compression_type != compression_type)
- rewrite_ptrs |= ptr_bit;
- ptr_bit <<= 1;
+ if (r->background_compression_from_inode)
+ prt_str(out, " (inode)");
}
- return rewrite_ptrs;
-}
+ if (r->background_target || r->background_target_from_inode) {
+ prt_str(out, " background_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->background_target);
+ else
+ prt_printf(out, "%u", r->background_target);
-static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_ptrs_c ptrs)
-{
- if (!opts->background_target ||
- !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target))
- return 0;
+ if (r->background_target_from_inode)
+ prt_str(out, " (inode)");
+ }
- unsigned ptr_bit = 1;
- unsigned rewrite_ptrs = 0;
+ if (r->promote_target || r->promote_target_from_inode) {
+ prt_str(out, " promote_target=");
+ if (c)
+ bch2_target_to_text(out, c, r->promote_target);
+ else
+ prt_printf(out, "%u", r->promote_target);
- guard(rcu)();
- bkey_for_each_ptr(ptrs, ptr) {
- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target))
- rewrite_ptrs |= ptr_bit;
- ptr_bit <<= 1;
+ if (r->promote_target_from_inode)
+ prt_str(out, " (inode)");
}
- return rewrite_ptrs;
+ if (r->erasure_code || r->erasure_code_from_inode) {
+ prt_printf(out, " ec=%u", r->erasure_code);
+ if (r->erasure_code_from_inode)
+ prt_str(out, " (inode)");
+ }
}
-static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_s_c k)
+static void bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_inode_opts *io_opts,
+ unsigned *move_ptrs,
+ unsigned *compress_ptrs,
+ u64 *sectors)
{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-
- if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
- return 0;
+ *move_ptrs = 0;
+ *compress_ptrs = 0;
+ *sectors = 0;
- return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) |
- bch2_bkey_ptrs_need_move(c, opts, ptrs);
-}
-
-u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
-{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const struct bch_extent_rebalance *opts = bch2_bkey_ptrs_rebalance_opts(ptrs);
- if (!opts)
- return 0;
+ const struct bch_extent_rebalance *rb_opts = bch2_bkey_ptrs_rebalance_opts(ptrs);
+ if (!io_opts && !rb_opts)
+ return;
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
- return 0;
+ return;
+
+ unsigned compression_type =
+ bch2_compression_opt_to_type(io_opts
+ ? io_opts->background_compression
+ : rb_opts->background_compression);
+ unsigned target = io_opts
+ ? io_opts->background_target
+ : rb_opts->background_target;
+ if (target && !bch2_target_accepts_data(c, BCH_DATA_user, target))
+ target = 0;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- u64 sectors = 0;
+ bool incompressible = false, unwritten = false;
- if (opts->background_compression) {
- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression);
+ unsigned ptr_idx = 1;
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
- p.ptr.unwritten) {
- sectors = 0;
- goto incompressible;
- }
+ guard(rcu)();
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ incompressible |= p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
+ unwritten |= p.ptr.unwritten;
+
+ if (!p.ptr.cached) {
+ if (p.crc.compression_type != compression_type)
+ *compress_ptrs |= ptr_idx;
- if (!p.ptr.cached && p.crc.compression_type != compression_type)
- sectors += p.crc.compressed_size;
+ if (target && !bch2_dev_in_target(c, p.ptr.dev, target))
+ *move_ptrs |= ptr_idx;
}
+
+ ptr_idx <<= 1;
}
-incompressible:
- if (opts->background_target) {
- guard(rcu)();
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached &&
- !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
- sectors += p.crc.compressed_size;
+
+ if (unwritten)
+ *compress_ptrs = 0;
+ if (incompressible)
+ *compress_ptrs = 0;
+
+ unsigned rb_ptrs = *move_ptrs | *compress_ptrs;
+
+ if (!rb_ptrs)
+ return;
+
+ ptr_idx = 1;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (rb_ptrs & ptr_idx)
+ *sectors += p.crc.compressed_size;
+ ptr_idx <<= 1;
}
+}
+
+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+ unsigned move_ptrs = 0;
+ unsigned compress_ptrs = 0;
+ u64 sectors = 0;
+ bch2_bkey_needs_rebalance(c, k, NULL, &move_ptrs, &compress_ptrs, &sectors);
return sectors;
}
+static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
+ struct bch_inode_opts *opts,
+ struct bkey_s_c k)
+{
+ unsigned move_ptrs = 0;
+ unsigned compress_ptrs = 0;
+ u64 sectors = 0;
+
+ bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, &sectors);
+ return move_ptrs|compress_ptrs;
+}
+
static inline bool bkey_should_have_rb_opts(struct bch_fs *c,
struct bch_inode_opts *opts,
struct bkey_s_c k)
@@ -179,6 +221,35 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts,
return 0;
}
+static int have_rebalance_scan_cookie(struct btree_trans *trans, u64 inum)
+{
+ /*
+ * If opts need to be propagated to the extent, a scan cookie should be
+ * present:
+ */
+ CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work,
+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
+ BTREE_ITER_intent);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (k.k->type == KEY_TYPE_cookie)
+ return 1;
+
+ if (!inum)
+ return 0;
+
+ bch2_btree_iter_set_pos(&iter, SPOS(0, REBALANCE_WORK_SCAN_OFFSET, U32_MAX));
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ return k.k->type == KEY_TYPE_cookie;
+}
+
static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
struct bch_inode_opts *io_opts,
struct btree_iter *iter,
@@ -186,6 +257,7 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
enum set_needs_rebalance_ctx ctx)
{
struct bch_fs *c = trans->c;
+ int ret = 0;
BUG_ON(iter->flags & BTREE_ITER_is_extents);
BUG_ON(iter->flags & BTREE_ITER_filter_snapshots);
@@ -216,13 +288,61 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, io_opts);
- if (bkey_should_have_rb_opts(c, io_opts, k)
+ bool should_have_rb_opts = bkey_should_have_rb_opts(c, io_opts, k);
+
+ if (should_have_rb_opts
? old && !memcmp(old, &new, sizeof(new))
: !old)
return 0;
+ if (k.k->type != KEY_TYPE_reflink_v) {
+ if (old && !should_have_rb_opts) {
+ CLASS(printbuf, buf)();
+
+ prt_printf(&buf, "extent with unneeded rebalance opts:\n");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ fsck_err(trans, extent_io_opts_not_set, "%s", buf.buf);
+ } else {
+ ret = have_rebalance_scan_cookie(trans, k.k->p.inode);
+ if (ret < 0)
+ return ret;
+
+ if (!ret) {
+ CLASS(printbuf, buf)();
+
+ prt_printf(&buf, "extent with incorrect/missing rebalance opts:\n");
+ bch2_bkey_val_to_text(&buf, c, k);
+ const struct bch_extent_rebalance _old = {};
+ if (!old)
+ old = &_old;
+
+#define x(_name) \
+ if (old->_name != new._name) \
+ prt_printf(&buf, "\n" #_name " %u != %u", \
+ old->_name, new._name); \
+ if (old->_name##_from_inode != new._name##_from_inode) \
+ prt_printf(&buf, "\n" #_name "_from_inode %u != %u", \
+ old->_name##_from_inode, new._name##_from_inode);
+ BCH_REBALANCE_OPTS()
+#undef x
+
+ if (old->unused != new.unused)
+ prt_printf(&buf, "\nunused %u != %u", old->unused, new.unused);
+
+ if (old->type != new.type)
+ prt_printf(&buf, "\ntype %u != %u", old->type, new.type);
+
+ prt_newline(&buf);
+ bch2_extent_rebalance_to_text(&buf, c, &new);
+
+ fsck_err(trans, extent_io_opts_not_set, "%s", buf.buf);
+ }
+ }
+ }
+
struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8);
- int ret = PTR_ERR_OR_ZERO(n);
+ ret = PTR_ERR_OR_ZERO(n);
if (ret)
return ret;
@@ -230,10 +350,12 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
/* On successfull transaction commit, @k was invalidated: */
- return bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?:
+ ret = bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
bch_err_throw(c, transaction_restart_commit);
+fsck_err:
+ return ret;
}
static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans,
@@ -336,8 +458,6 @@ int bch2_extent_get_io_opts_one(struct btree_trans *trans,
ctx);
}
-#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
-
static const char * const bch2_rebalance_state_strs[] = {
#define x(t) #t,
BCH_REBALANCE_STATES()
@@ -518,23 +638,25 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned move_ptrs = 0;
+ unsigned compress_ptrs = 0;
+ u64 sectors = 0;
- unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs);
- if (p) {
- prt_str(&buf, "compression=");
- bch2_compression_opt_to_text(&buf, opts->background_compression);
+ bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, &sectors);
+
+ if (move_ptrs) {
+ prt_str(&buf, "move=");
+ bch2_target_to_text(&buf, c, opts->background_target);
prt_str(&buf, " ");
- bch2_prt_u64_base2(&buf, p);
+ bch2_prt_u64_base2(&buf, move_ptrs);
prt_newline(&buf);
}
- p = bch2_bkey_ptrs_need_move(c, opts, ptrs);
- if (p) {
- prt_str(&buf, "move=");
- bch2_target_to_text(&buf, c, opts->background_target);
+ if (compress_ptrs) {
+ prt_str(&buf, "compression=");
+ bch2_compression_opt_to_text(&buf, opts->background_compression);
prt_str(&buf, " ");
- bch2_prt_u64_base2(&buf, p);
+ bch2_prt_u64_base2(&buf, compress_ptrs);
prt_newline(&buf);
}
@@ -649,6 +771,8 @@ static int do_rebalance_scan(struct moving_context *ctxt,
BTREE_ITER_prefetch, k, ({
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+ atomic64_add(k.k->size, &r->scan_stats.sectors_seen);
+
struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans,
snapshot_io_opts, iter.pos, &iter, k,
SET_NEEDS_REBALANCE_opt_change);
@@ -658,10 +782,31 @@ static int do_rebalance_scan(struct moving_context *ctxt,
REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)
? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts)
: 0);
- })) ?:
- commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+ }));
+ if (ret)
+ goto out;
+
+ if (!inum) {
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_reflink,
+ POS_MIN, POS_MAX,
+ BTREE_ITER_all_snapshots|
+ BTREE_ITER_prefetch, k, ({
+ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+
+ atomic64_add(k.k->size, &r->scan_stats.sectors_seen);
+
+ struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans,
+ snapshot_io_opts, iter.pos, &iter, k,
+ SET_NEEDS_REBALANCE_opt_change);
+ PTR_ERR_OR_ZERO(opts);
+ }));
+ if (ret)
+ goto out;
+ }
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+out:
*sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen);
/*
* Ensure that the rebalance_work entries we created are seen by the