diff options
Diffstat (limited to 'fs/bcachefs/rebalance.c')
-rw-r--r-- | fs/bcachefs/rebalance.c | 308 |
1 files changed, 154 insertions, 154 deletions
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index f2d0040d043c..67d6a90e86ef 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -38,30 +38,15 @@ static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct b return NULL; } -const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) +static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) { return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k)); } -static const char * const rebalance_opts[] = { -#define x(n) #n, - BCH_REBALANCE_OPTS() -#undef x - NULL -}; - void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_rebalance *r) { - prt_str(out, "need_rb="); - prt_bitflags(out, rebalance_opts, r->need_rb); - - if (r->hipri) - prt_str(out, " hipri"); - if (r->pending) - prt_str(out, " pending"); - - prt_printf(out, " replicas=%u", r->data_replicas); + prt_printf(out, "replicas=%u", r->data_replicas); if (r->data_replicas_from_inode) prt_str(out, " (inode)"); @@ -107,54 +92,32 @@ void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, } } -/* - * XXX: check in bkey_validate that if r->hipri or r->pending are set, - * r->data_replicas are also set - */ - -static inline unsigned rb_accounting_counters(const struct bch_extent_rebalance *r) +int bch2_trigger_extent_rebalance(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + enum btree_iter_update_trigger_flags flags) { - if (!r) - return 0; - unsigned ret = r->need_rb; + struct bch_fs *c = trans->c; + int need_rebalance_delta = 0; + s64 need_rebalance_sectors_delta[1] = { 0 }; - if (r->hipri) - ret |= BIT(BCH_REBALANCE_ACCOUNTING_high_priority); - if (r->pending) { - ret |= BIT(BCH_REBALANCE_ACCOUNTING_pending); - ret &= ~BIT(BCH_REBALANCE_ACCOUNTING_background_target); - } - return ret; -} + s64 s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta -= s != 0; + need_rebalance_sectors_delta[0] -= s; -int __bch2_trigger_extent_rebalance(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned old_r, unsigned new_r, - enum btree_iter_update_trigger_flags flags) -{ - int delta = (int) !!new_r - (int) !!old_r; - if ((flags & BTREE_TRIGGER_transactional) && delta) { + s = bch2_bkey_sectors_need_rebalance(c, new); + need_rebalance_delta += s != 0; + need_rebalance_sectors_delta[0] += s; + + if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - new.k->p, delta > 0); + new.k->p, need_rebalance_delta > 0); if (ret) return ret; } - delta = old.k->size == new.k->size - ? old_r ^ new_r - : old_r | new_r; - while (delta) { - unsigned c = __ffs(delta); - delta ^= BIT(c); - - s64 v[1] = { 0 }; - if (old_r & BIT(c)) - v[0] -= (s64) old.k->size; - if (new_r & BIT(c)) - v[0] += (s64) new.k->size; - + if (need_rebalance_sectors_delta[0]) { int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, - v, rebalance_work_v2, c); + need_rebalance_sectors_delta, rebalance_work); if (ret) return ret; } @@ -162,45 +125,39 @@ int __bch2_trigger_extent_rebalance(struct btree_trans *trans, return 0; } -static struct bch_extent_rebalance -bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k, - struct bch_inode_opts *opts, - unsigned *move_ptrs, - unsigned *compress_ptrs, - bool may_update_indirect) +static void bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k, + struct bch_inode_opts *io_opts, + unsigned *move_ptrs, + unsigned *compress_ptrs, + u64 *sectors) { *move_ptrs = 0; *compress_ptrs = 0; + *sectors = 0; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - struct bch_extent_rebalance r = { .type = BIT(BCH_EXTENT_ENTRY_rebalance) }; - - if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return r; - - const struct bch_extent_rebalance *old_r = bch2_bkey_ptrs_rebalance_opts(ptrs); - if (old_r) { - r = *old_r; - r.need_rb = 0; - } -#define x(_name) \ - if (k.k->type != KEY_TYPE_reflink_v || \ - may_update_indirect || \ - (!opts->_name##_from_inode && !r._name##_from_inode)) { \ - r._name = opts->_name; \ - r._name##_from_inode = opts->_name##_from_inode; \ - } - BCH_REBALANCE_OPTS() -#undef x + const struct bch_extent_rebalance *rb_opts = bch2_bkey_ptrs_rebalance_opts(ptrs); + if (!io_opts && !rb_opts) + return; - unsigned compression_type = bch2_compression_opt_to_type(r.background_compression); + if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) + return; - bool incompressible = false, unwritten = false, ec = false; - unsigned durability = 0, min_durability = INT_MAX; + unsigned compression_type = + bch2_compression_opt_to_type(io_opts + ? io_opts->background_compression + : rb_opts->background_compression); + unsigned target = io_opts + ? io_opts->background_target + : rb_opts->background_target; + if (target && !bch2_target_accepts_data(c, BCH_DATA_user, target)) + target = 0; const union bch_extent_entry *entry; struct extent_ptr_decoded p; + bool incompressible = false, unwritten = false; + unsigned ptr_idx = 1; guard(rcu)(); @@ -209,50 +166,72 @@ bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k, unwritten |= p.ptr.unwritten; if (!p.ptr.cached) { - if (p.crc.compression_type != compression_type) { + if (p.crc.compression_type != compression_type) *compress_ptrs |= ptr_idx; - r.need_rb |= BIT(BCH_REBALANCE_background_compression); - } - if (r.background_target && - !bch2_dev_in_target(c, p.ptr.dev, r.background_target)) { + if (target && !bch2_dev_in_target(c, p.ptr.dev, target)) *move_ptrs |= ptr_idx; - r.need_rb |= BIT(BCH_REBALANCE_background_target); - } - - unsigned d = bch2_extent_ptr_durability(c, &p); - durability += d; - min_durability = min(min_durability, d); - - ec |= p.has_ec; } ptr_idx <<= 1; } - if (unwritten || incompressible) { + if (unwritten) *compress_ptrs = 0; - r.need_rb &= ~BIT(BCH_REBALANCE_background_compression); + if (incompressible) + *compress_ptrs = 0; + + unsigned rb_ptrs = *move_ptrs | *compress_ptrs; + + if (!rb_ptrs) + return; + + ptr_idx = 1; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (rb_ptrs & ptr_idx) + *sectors += p.crc.compressed_size; + ptr_idx <<= 1; } - return r; } -static inline bool bkey_should_have_rb_opts(struct bkey_s_c k, - struct bch_extent_rebalance new) +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + unsigned move_ptrs = 0; + unsigned compress_ptrs = 0; + u64 sectors = 0; + + bch2_bkey_needs_rebalance(c, k, NULL, &move_ptrs, &compress_ptrs, §ors); + return sectors; +} + +static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, + struct bch_inode_opts *opts, + struct bkey_s_c k) +{ + unsigned move_ptrs = 0; + unsigned compress_ptrs = 0; + u64 sectors = 0; + + bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, §ors); + return move_ptrs|compress_ptrs; +} + +static inline bool bkey_should_have_rb_opts(struct bch_fs *c, + struct bch_inode_opts *opts, + struct bkey_s_c k) { if (k.k->type == KEY_TYPE_reflink_v) { -#define x(n) if (new.n##_from_inode) return true; +#define x(n) if (opts->n##_from_inode) return true; BCH_REBALANCE_OPTS() #undef x } - return new.need_rb; + return bch2_bkey_ptrs_need_rebalance(c, opts, k); } -int bch2_bkey_set_needs_rebalance(struct bch_fs *c, - struct bch_inode_opts *opts, +int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts, struct bkey_i *_k, enum set_needs_rebalance_ctx ctx, - u32 opt_change_cookie) + u32 change_cookie) { if (!bkey_extent_is_direct_data(&_k->k)) return 0; @@ -261,27 +240,17 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_extent_rebalance *old = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); - unsigned move_ptrs = 0; - unsigned compress_ptrs = 0; - struct bch_extent_rebalance new = - bch2_bkey_needs_rebalance(c, k.s_c, opts, &move_ptrs, &compress_ptrs, - ctx == SET_NEEDS_REBALANCE_opt_change_indirect); - - bool should_have_rb = bkey_should_have_rb_opts(k.s_c, new); - - if (should_have_rb == !!old && - (should_have_rb ? !memcmp(old, &new, sizeof(new)) : !old)) - return 0; - - if (should_have_rb) { + if (bkey_should_have_rb_opts(c, opts, k.s_c)) { if (!old) { old = bkey_val_end(k); k.k->u64s += sizeof(*old) / sizeof(u64); } - *old = new; - } else if (old) - extent_entry_drop(k, (union bch_extent_entry *) old); + *old = io_opts_to_rebalance_opts(c, opts); + } else { + if (old) + extent_entry_drop(k, (union bch_extent_entry *) old); + } return 0; } @@ -300,19 +269,32 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans, if (!bkey_extent_is_direct_data(k.k)) return 0; - struct bch_extent_rebalance *old = - (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k); + bool may_update_indirect = ctx == SET_NEEDS_REBALANCE_opt_change_indirect; - unsigned move_ptrs = 0; - unsigned compress_ptrs = 0; - struct bch_extent_rebalance new = - bch2_bkey_needs_rebalance(c, k, io_opts, &move_ptrs, &compress_ptrs, - ctx == SET_NEEDS_REBALANCE_opt_change_indirect); + /* + * If it's an indirect extent, and we walked to it directly, we won't + * have the options from the inode that were directly applied: options + * from the extent take precedence - unless the io_opts option came from + * the inode and may_update_indirect is true (walked from a + * REFLINK_P_MAY_UPDATE_OPTIONS pointer). + */ + const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); + if (old && k.k->type == KEY_TYPE_reflink_v) { +#define x(_name) \ + if (old->_name##_from_inode && \ + !(may_update_indirect && io_opts->_name##_from_inode)) { \ + io_opts->_name = old->_name; \ + io_opts->_name##_from_inode = true; \ + } + BCH_REBALANCE_OPTS() +#undef x + } - bool should_have_rb = bkey_should_have_rb_opts(k, new); + struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, io_opts); - if (should_have_rb == !!old && - (should_have_rb ? !memcmp(old, &new, sizeof(new)) : !old)) + if (bkey_should_have_rb_opts(c, io_opts, k) + ? old && !memcmp(old, &new, sizeof(new)) + : !old) return 0; struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); @@ -324,7 +306,7 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans, /* On successfull transaction commit, @k was invalidated: */ - return bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?: + return bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0) ?: bch_err_throw(c, transaction_restart_commit); @@ -390,8 +372,7 @@ struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans, enum set_needs_rebalance_ctx ctx) { struct bch_inode_opts *opts = - bch2_extent_get_io_opts(trans, snapshot_io_opts, - extent_pos, extent_iter, extent_k); + bch2_extent_get_io_opts(trans, snapshot_io_opts, extent_pos, extent_iter, extent_k); if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level) return opts; @@ -554,6 +535,23 @@ static struct bkey_i *next_rebalance_entry(struct btree_trans *trans, return &(&darray_pop(buf))->k_i; } +static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k)) + return 0; + + struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + extent_entry_drop(bkey_i_to_s(n), + (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); + return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); +} + static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, struct per_snapshot_io_opts *snapshot_io_opts, struct bpos work_pos, @@ -572,10 +570,6 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, if (bkey_err(k)) return k; - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - if (!r || !r->need_rb) /* Write buffer race? */ - return bkey_s_c_null; - struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans, snapshot_io_opts, extent_iter->pos, extent_iter, k, @@ -586,23 +580,22 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, *opts_ret = opts; - unsigned move_ptrs = 0; - unsigned compress_ptrs = 0; - bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, false); - memset(data_opts, 0, sizeof(*data_opts)); - data_opts->rewrite_ptrs = move_ptrs|compress_ptrs; + data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, opts, k); data_opts->target = opts->background_target; data_opts->write_flags |= BCH_WRITE_only_specified_devs; - if (!data_opts->rewrite_ptrs && - !data_opts->kill_ptrs && - !data_opts->kill_ec_ptrs && - !data_opts->extra_replicas) { - CLASS(printbuf, buf)(); - prt_printf(&buf, "got extent to rebalance but nothing to do, confused\n "); - bch2_bkey_val_to_text(&buf, c, k); - bch_err(c, "%s", buf.buf); + if (!data_opts->rewrite_ptrs) { + /* + * device we would want to write to offline? devices in target + * changed? + * + * We'll now need a full scan before this extent is picked up + * again: + */ + int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k); + if (ret) + return bkey_s_c_err(ret); return bkey_s_c_null; } @@ -612,6 +605,12 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); + unsigned move_ptrs = 0; + unsigned compress_ptrs = 0; + u64 sectors = 0; + + bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, §ors); + if (move_ptrs) { prt_str(&buf, "move="); bch2_target_to_text(&buf, c, opts->background_target); @@ -1090,7 +1089,8 @@ static int check_rebalance_work_one(struct btree_trans *trans, extent_k.k = &deleted; } - bool should_have_rebalance = bch2_bkey_needs_rb(extent_k); + bool should_have_rebalance = + bch2_bkey_sectors_need_rebalance(c, extent_k) != 0; bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set; if (should_have_rebalance != have_rebalance) { |