summaryrefslogtreecommitdiff
path: root/fs/bcachefs/rebalance.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/rebalance.c')
-rw-r--r--fs/bcachefs/rebalance.c112
1 files changed, 112 insertions, 0 deletions
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 94de89d6a6cf..0e40e7bd3441 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -11,6 +11,7 @@
#include "clock.h"
#include "compress.h"
#include "disk_groups.h"
+#include "ec.h"
#include "errcode.h"
#include "error.h"
#include "inode.h"
@@ -270,6 +271,10 @@ bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k,
r.need_rb &= !BIT(BCH_REBALANCE_data_checksum);
}
+ if (durability < r.data_replicas || durability >= r.data_replicas + min_durability)
+ r.need_rb |= BIT(BCH_REBALANCE_data_replicas);
+ if (!unwritten && r.erasure_code != ec)
+ r.need_rb |= BIT(BCH_REBALANCE_erasure_code);
return r;
}
@@ -319,6 +324,17 @@ static int check_dev_rebalance_scan_cookie(struct btree_trans *trans, struct bke
return 0;
}
+static bool bkey_has_ec(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr)
+ return true;
+ return false;
+}
+
static int new_needs_rb_allowed(struct btree_trans *trans,
struct per_snapshot_io_opts *s,
struct bkey_s_c k,
@@ -348,9 +364,26 @@ static int new_needs_rb_allowed(struct btree_trans *trans,
ctx == SET_NEEDS_REBALANCE_opt_change_indirect)
return 0;
+ if ((new_need_rb & BIT(BCH_REBALANCE_erasure_code)) &&
+ !bkey_has_ec(k)) {
+ /* Foreground writes are not initially erasure coded - and we
+ * may crash before a stripe is created
+ */
+ new_need_rb &= ~BIT(BCH_REBALANCE_erasure_code);
+ }
+
if (ctx == SET_NEEDS_REBALANCE_foreground) {
new_need_rb &= ~(BIT(BCH_REBALANCE_background_compression)|
BIT(BCH_REBALANCE_background_target));
+
+ /*
+ * Foreground writes might end up degraded when a device is
+ * getting yanked:
+ *
+ * XXX: this is something we need to fix, but adding retries to
+ * the write path is something we have to do carefully.
+ */
+ new_need_rb &= ~BIT(BCH_REBALANCE_data_replicas);
if (!new_need_rb)
return 0;
@@ -748,6 +781,23 @@ static struct bkey_i *next_rebalance_entry(struct btree_trans *trans,
return &(&darray_pop(buf))->k_i;
}
+static int extent_ec_pending(struct btree_trans *trans, struct bkey_ptrs_c ptrs)
+{
+ struct bch_fs *c = trans->c;
+
+ guard(rcu)();
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
+ if (!ca)
+ continue;
+
+ struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
+ if (bch2_bucket_has_new_stripe(c, bucket_to_u64(bucket)))
+ return true;
+ }
+ return false;
+}
+
static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
struct per_snapshot_io_opts *snapshot_io_opts,
struct bpos work_pos,
@@ -790,6 +840,68 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
data_opts->target = opts->background_target;
data_opts->write_flags |= BCH_WRITE_only_specified_devs;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ if (r->need_rb & BIT(BCH_REBALANCE_data_replicas)) {
+ unsigned durability = bch2_bkey_durability(c, k);
+ unsigned ptr_bit = 1;
+
+ guard(rcu)();
+ if (durability <= opts->data_replicas) {
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
+ if (ca && !ptr->cached && !ca->mi.durability)
+ data_opts->kill_ptrs |= ptr_bit;
+ ptr_bit <<= 1;
+ }
+
+ data_opts->extra_replicas = opts->data_replicas - durability;
+ } else {
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ unsigned d = bch2_extent_ptr_durability(c, &p);
+
+ if (d && durability - d >= opts->data_replicas) {
+ data_opts->kill_ptrs |= ptr_bit;
+ durability -= d;
+ }
+
+ ptr_bit <<= 1;
+ }
+
+ ptr_bit = 1;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.has_ec && durability - p.ec.redundancy >= opts->data_replicas) {
+ data_opts->kill_ec_ptrs |= ptr_bit;
+ durability -= p.ec.redundancy;
+ }
+
+ ptr_bit <<= 1;
+ }
+ }
+ }
+
+ if (r->need_rb & BIT(BCH_REBALANCE_erasure_code)) {
+ if (opts->erasure_code) {
+ /* XXX: we'll need ratelimiting */
+ if (extent_ec_pending(trans, ptrs))
+ return bkey_s_c_null;
+
+ data_opts->extra_replicas = opts->data_replicas;
+ } else {
+ unsigned ptr_bit = 1;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.has_ec) {
+ data_opts->kill_ec_ptrs |= ptr_bit;
+ data_opts->extra_replicas += p.ec.redundancy;
+ }
+
+ ptr_bit <<= 1;
+ }
+ }
+ }
+
if (!data_opts->rewrite_ptrs &&
!data_opts->kill_ptrs &&
!data_opts->kill_ec_ptrs &&