diff options
Diffstat (limited to 'fs/bcachefs/rebalance.c')
-rw-r--r-- | fs/bcachefs/rebalance.c | 323 |
1 files changed, 234 insertions, 89 deletions
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 9e22ff0e2d28..f1497302332f 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -25,6 +25,8 @@ #include <linux/kthread.h> #include <linux/sched/cputime.h> +#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) + /* bch_extent_rebalance: */ static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct bkey_ptrs_c ptrs) @@ -43,108 +45,148 @@ static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k)); } -static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, - struct bch_inode_opts *opts, - struct bkey_s_c k, - struct bkey_ptrs_c ptrs) +void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, + const struct bch_extent_rebalance *r) { - if (!opts->background_compression) - return 0; + prt_str(out, "rebalance:"); - unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned ptr_bit = 1; - unsigned rewrite_ptrs = 0; + prt_printf(out, " replicas=%u", r->data_replicas); + if (r->data_replicas_from_inode) + prt_str(out, " (inode)"); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || - p.ptr.unwritten) - return 0; + prt_str(out, " checksum="); + bch2_prt_csum_opt(out, r->data_checksum); + if (r->data_checksum_from_inode) + prt_str(out, " (inode)"); + + if (r->background_compression || r->background_compression_from_inode) { + prt_str(out, " background_compression="); + bch2_compression_opt_to_text(out, r->background_compression); - if (!p.ptr.cached && p.crc.compression_type != compression_type) - rewrite_ptrs |= ptr_bit; - ptr_bit <<= 1; + if (r->background_compression_from_inode) + prt_str(out, " (inode)"); } - return rewrite_ptrs; -} + if (r->background_target || r->background_target_from_inode) { + prt_str(out, " background_target="); + if (c) + bch2_target_to_text(out, c, r->background_target); + else + prt_printf(out, "%u", r->background_target); -static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, - struct bch_inode_opts *opts, - struct bkey_ptrs_c ptrs) -{ - if (!opts->background_target || - !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) - return 0; + if (r->background_target_from_inode) + prt_str(out, " (inode)"); + } - unsigned ptr_bit = 1; - unsigned rewrite_ptrs = 0; + if (r->promote_target || r->promote_target_from_inode) { + prt_str(out, " promote_target="); + if (c) + bch2_target_to_text(out, c, r->promote_target); + else + prt_printf(out, "%u", r->promote_target); - guard(rcu)(); - bkey_for_each_ptr(ptrs, ptr) { - if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) - rewrite_ptrs |= ptr_bit; - ptr_bit <<= 1; + if (r->promote_target_from_inode) + prt_str(out, " (inode)"); } - return rewrite_ptrs; + if (r->erasure_code || r->erasure_code_from_inode) { + prt_printf(out, " ec=%u", r->erasure_code); + if (r->erasure_code_from_inode) + prt_str(out, " (inode)"); + } } -static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, - struct bch_inode_opts *opts, - struct bkey_s_c k) +static void bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k, + struct bch_inode_opts *io_opts, + unsigned *move_ptrs, + unsigned *compress_ptrs, + u64 *sectors) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - - if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return 0; + *move_ptrs = 0; + *compress_ptrs = 0; + *sectors = 0; - return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | - bch2_bkey_ptrs_need_move(c, opts, ptrs); -} - -u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) -{ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_rebalance *opts = bch2_bkey_ptrs_rebalance_opts(ptrs); - if (!opts) - return 0; + const struct bch_extent_rebalance *rb_opts = bch2_bkey_ptrs_rebalance_opts(ptrs); + if (!io_opts && !rb_opts) + return; if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return 0; + return; + + unsigned compression_type = + bch2_compression_opt_to_type(io_opts + ? io_opts->background_compression + : rb_opts->background_compression); + unsigned target = io_opts + ? io_opts->background_target + : rb_opts->background_target; + if (target && !bch2_target_accepts_data(c, BCH_DATA_user, target)) + target = 0; const union bch_extent_entry *entry; struct extent_ptr_decoded p; - u64 sectors = 0; + bool incompressible = false, unwritten = false; - if (opts->background_compression) { - unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); + unsigned ptr_idx = 1; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || - p.ptr.unwritten) { - sectors = 0; - goto incompressible; - } + guard(rcu)(); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + incompressible |= p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible; + unwritten |= p.ptr.unwritten; + + if (!p.ptr.cached) { + if (p.crc.compression_type != compression_type) + *compress_ptrs |= ptr_idx; - if (!p.ptr.cached && p.crc.compression_type != compression_type) - sectors += p.crc.compressed_size; + if (target && !bch2_dev_in_target(c, p.ptr.dev, target)) + *move_ptrs |= ptr_idx; } + + ptr_idx <<= 1; } -incompressible: - if (opts->background_target) { - guard(rcu)(); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (!p.ptr.cached && - !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) - sectors += p.crc.compressed_size; + + if (unwritten) + *compress_ptrs = 0; + if (incompressible) + *compress_ptrs = 0; + + unsigned rb_ptrs = *move_ptrs | *compress_ptrs; + + if (!rb_ptrs) + return; + + ptr_idx = 1; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (rb_ptrs & ptr_idx) + *sectors += p.crc.compressed_size; + ptr_idx <<= 1; } +} + +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + unsigned move_ptrs = 0; + unsigned compress_ptrs = 0; + u64 sectors = 0; + bch2_bkey_needs_rebalance(c, k, NULL, &move_ptrs, &compress_ptrs, §ors); return sectors; } +static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, + struct bch_inode_opts *opts, + struct bkey_s_c k) +{ + unsigned move_ptrs = 0; + unsigned compress_ptrs = 0; + u64 sectors = 0; + + bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, §ors); + return move_ptrs|compress_ptrs; +} + static inline bool bkey_should_have_rb_opts(struct bch_fs *c, struct bch_inode_opts *opts, struct bkey_s_c k) @@ -179,6 +221,35 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts, return 0; } +static int have_rebalance_scan_cookie(struct btree_trans *trans, u64 inum) +{ + /* + * If opts need to be propagated to the extent, a scan cookie should be + * present: + */ + CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work, + SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), + BTREE_ITER_intent); + struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); + int ret = bkey_err(k); + if (ret) + return ret; + + if (k.k->type == KEY_TYPE_cookie) + return 1; + + if (!inum) + return 0; + + bch2_btree_iter_set_pos(&iter, SPOS(0, REBALANCE_WORK_SCAN_OFFSET, U32_MAX)); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + return ret; + + return k.k->type == KEY_TYPE_cookie; +} + static int bch2_get_update_rebalance_opts(struct btree_trans *trans, struct bch_inode_opts *io_opts, struct btree_iter *iter, @@ -186,6 +257,7 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans, enum set_needs_rebalance_ctx ctx) { struct bch_fs *c = trans->c; + int ret = 0; BUG_ON(iter->flags & BTREE_ITER_is_extents); BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); @@ -216,13 +288,61 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans, struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, io_opts); - if (bkey_should_have_rb_opts(c, io_opts, k) + bool should_have_rb_opts = bkey_should_have_rb_opts(c, io_opts, k); + + if (should_have_rb_opts ? old && !memcmp(old, &new, sizeof(new)) : !old) return 0; + if (k.k->type != KEY_TYPE_reflink_v) { + if (old && !should_have_rb_opts) { + CLASS(printbuf, buf)(); + + prt_printf(&buf, "extent with unneeded rebalance opts:\n"); + bch2_bkey_val_to_text(&buf, c, k); + + fsck_err(trans, extent_io_opts_not_set, "%s", buf.buf); + } else { + ret = have_rebalance_scan_cookie(trans, k.k->p.inode); + if (ret < 0) + return ret; + + if (!ret) { + CLASS(printbuf, buf)(); + + prt_printf(&buf, "extent with incorrect/missing rebalance opts:\n"); + bch2_bkey_val_to_text(&buf, c, k); + const struct bch_extent_rebalance _old = {}; + if (!old) + old = &_old; + +#define x(_name) \ + if (old->_name != new._name) \ + prt_printf(&buf, "\n" #_name " %u != %u", \ + old->_name, new._name); \ + if (old->_name##_from_inode != new._name##_from_inode) \ + prt_printf(&buf, "\n" #_name "_from_inode %u != %u", \ + old->_name##_from_inode, new._name##_from_inode); + BCH_REBALANCE_OPTS() +#undef x + + if (old->unused != new.unused) + prt_printf(&buf, "\nunused %u != %u", old->unused, new.unused); + + if (old->type != new.type) + prt_printf(&buf, "\ntype %u != %u", old->type, new.type); + + prt_newline(&buf); + bch2_extent_rebalance_to_text(&buf, c, &new); + + fsck_err(trans, extent_io_opts_not_set, "%s", buf.buf); + } + } + } + struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); - int ret = PTR_ERR_OR_ZERO(n); + ret = PTR_ERR_OR_ZERO(n); if (ret) return ret; @@ -230,10 +350,12 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans, /* On successfull transaction commit, @k was invalidated: */ - return bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?: + ret = bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0) ?: bch_err_throw(c, transaction_restart_commit); +fsck_err: + return ret; } static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans, @@ -336,8 +458,6 @@ int bch2_extent_get_io_opts_one(struct btree_trans *trans, ctx); } -#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) - static const char * const bch2_rebalance_state_strs[] = { #define x(t) #t, BCH_REBALANCE_STATES() @@ -518,23 +638,25 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned move_ptrs = 0; + unsigned compress_ptrs = 0; + u64 sectors = 0; - unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs); - if (p) { - prt_str(&buf, "compression="); - bch2_compression_opt_to_text(&buf, opts->background_compression); + bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, §ors); + + if (move_ptrs) { + prt_str(&buf, "move="); + bch2_target_to_text(&buf, c, opts->background_target); prt_str(&buf, " "); - bch2_prt_u64_base2(&buf, p); + bch2_prt_u64_base2(&buf, move_ptrs); prt_newline(&buf); } - p = bch2_bkey_ptrs_need_move(c, opts, ptrs); - if (p) { - prt_str(&buf, "move="); - bch2_target_to_text(&buf, c, opts->background_target); + if (compress_ptrs) { + prt_str(&buf, "compression="); + bch2_compression_opt_to_text(&buf, opts->background_compression); prt_str(&buf, " "); - bch2_prt_u64_base2(&buf, p); + bch2_prt_u64_base2(&buf, compress_ptrs); prt_newline(&buf); } @@ -649,6 +771,8 @@ static int do_rebalance_scan(struct moving_context *ctxt, BTREE_ITER_prefetch, k, ({ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); + atomic64_add(k.k->size, &r->scan_stats.sectors_seen); + struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans, snapshot_io_opts, iter.pos, &iter, k, SET_NEEDS_REBALANCE_opt_change); @@ -658,10 +782,31 @@ static int do_rebalance_scan(struct moving_context *ctxt, REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v) ? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts) : 0); - })) ?: - commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_clear_rebalance_needs_scan(trans, inum, cookie)); + })); + if (ret) + goto out; + + if (!inum) { + ret = for_each_btree_key_max(trans, iter, BTREE_ID_reflink, + POS_MIN, POS_MAX, + BTREE_ITER_all_snapshots| + BTREE_ITER_prefetch, k, ({ + ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); + + atomic64_add(k.k->size, &r->scan_stats.sectors_seen); + + struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans, + snapshot_io_opts, iter.pos, &iter, k, + SET_NEEDS_REBALANCE_opt_change); + PTR_ERR_OR_ZERO(opts); + })); + if (ret) + goto out; + } + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_clear_rebalance_needs_scan(trans, inum, cookie)); +out: *sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen); /* * Ensure that the rebalance_work entries we created are seen by the |