summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-03-13 22:01:47 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:57 -0400
commitb40901b0f7182557851c8e9af31bacfbbd76b1ec (patch)
treedc79e846434408cba88247620c828dca3c17c278
parentb9fa375bab2786d0d2c5435b5e3fceaf6594aaf3 (diff)
bcachefs: New erasure coding shutdown path
This implements a new shutdown path for erasure coding, which is needed for the upcoming BCH_WRITE_WAIT_FOR_EC write path. The process is: - Cancel new stripes being built up - Close out/cancel open buckets on write points or the partial list that are for stripes - Shutdown rebalance/copygc - Then wait for in flight new stripes to finish With BCH_WRITE_WAIT_FOR_EC, move ops will be waiting on stripes to fill up before they complete; the new ec shutdown path is needed for shutting down copygc/rebalance without deadlocking. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_background.c39
-rw-r--r--fs/bcachefs/alloc_foreground.c96
-rw-r--r--fs/bcachefs/alloc_foreground.h6
-rw-r--r--fs/bcachefs/bcachefs.h7
-rw-r--r--fs/bcachefs/data_update.c1
-rw-r--r--fs/bcachefs/ec.c54
-rw-r--r--fs/bcachefs/ec.h4
-rw-r--r--fs/bcachefs/io.c10
-rw-r--r--fs/bcachefs/move.c6
-rw-r--r--fs/bcachefs/super.c12
10 files changed, 141 insertions, 94 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index e5abe6406afe..17bcebbd1f2a 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -2158,44 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
*/
bch2_recalc_capacity(c);
- /* Next, close write points that point to this device... */
- for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
- bch2_writepoint_stop(c, ca, &c->write_points[i]);
-
- bch2_writepoint_stop(c, ca, &c->copygc_write_point);
- bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
- bch2_writepoint_stop(c, ca, &c->btree_write_point);
-
- mutex_lock(&c->btree_reserve_cache_lock);
- while (c->btree_reserve_cache_nr) {
- struct btree_alloc *a =
- &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-
- bch2_open_buckets_put(c, &a->ob);
- }
- mutex_unlock(&c->btree_reserve_cache_lock);
-
- spin_lock(&c->freelist_lock);
- i = 0;
- while (i < c->open_buckets_partial_nr) {
- struct open_bucket *ob =
- c->open_buckets + c->open_buckets_partial[i];
-
- if (ob->dev == ca->dev_idx) {
- --c->open_buckets_partial_nr;
- swap(c->open_buckets_partial[i],
- c->open_buckets_partial[c->open_buckets_partial_nr]);
- ob->on_partial_list = false;
- spin_unlock(&c->freelist_lock);
- bch2_open_bucket_put(c, ob);
- spin_lock(&c->freelist_lock);
- } else {
- i++;
- }
- }
- spin_unlock(&c->freelist_lock);
-
- bch2_ec_stop_dev(c, ca);
+ bch2_open_buckets_stop(c, ca, false);
/*
* Wake up threads that were blocked on allocation, so they can notice
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 7c81189bcd62..20c64882104e 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1023,45 +1023,96 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
return ret < 0 ? ret : 0;
}
-void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
- struct open_buckets *obs)
+static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
+ struct bch_dev *ca, bool ec)
{
- struct open_buckets ptrs = { .nr = 0 };
- struct open_bucket *ob, *ob2;
- unsigned i, j;
-
- open_bucket_for_each(c, obs, ob, i) {
- bool drop = !ca || ob->dev == ca->dev_idx;
+ if (ec) {
+ return ob->ec != NULL;
+ } else if (ca) {
+ bool drop = ob->dev == ca->dev_idx;
+ struct open_bucket *ob2;
+ unsigned i;
if (!drop && ob->ec) {
mutex_lock(&ob->ec->lock);
- for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
- if (!ob->ec->blocks[j])
+ for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) {
+ if (!ob->ec->blocks[i])
continue;
- ob2 = c->open_buckets + ob->ec->blocks[j];
+ ob2 = c->open_buckets + ob->ec->blocks[i];
drop |= ob2->dev == ca->dev_idx;
}
mutex_unlock(&ob->ec->lock);
}
- if (drop)
- bch2_open_bucket_put(c, ob);
- else
- ob_push(c, &ptrs, ob);
+ return drop;
+ } else {
+ return true;
}
-
- *obs = ptrs;
}
-void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
- struct write_point *wp)
+static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
+ bool ec, struct write_point *wp)
{
+ struct open_buckets ptrs = { .nr = 0 };
+ struct open_bucket *ob;
+ unsigned i;
+
mutex_lock(&wp->lock);
- bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ if (should_drop_bucket(ob, c, ca, ec))
+ bch2_open_bucket_put(c, ob);
+ else
+ ob_push(c, &ptrs, ob);
+ wp->ptrs = ptrs;
mutex_unlock(&wp->lock);
}
+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
+ bool ec)
+{
+ unsigned i;
+
+ /* Next, close write points that point to this device... */
+ for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
+ bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
+
+ bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
+ bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
+ bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
+
+ mutex_lock(&c->btree_reserve_cache_lock);
+ while (c->btree_reserve_cache_nr) {
+ struct btree_alloc *a =
+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+
+ bch2_open_buckets_put(c, &a->ob);
+ }
+ mutex_unlock(&c->btree_reserve_cache_lock);
+
+ spin_lock(&c->freelist_lock);
+ i = 0;
+ while (i < c->open_buckets_partial_nr) {
+ struct open_bucket *ob =
+ c->open_buckets + c->open_buckets_partial[i];
+
+ if (should_drop_bucket(ob, c, ca, ec)) {
+ --c->open_buckets_partial_nr;
+ swap(c->open_buckets_partial[i],
+ c->open_buckets_partial[c->open_buckets_partial_nr]);
+ ob->on_partial_list = false;
+ spin_unlock(&c->freelist_lock);
+ bch2_open_bucket_put(c, ob);
+ spin_lock(&c->freelist_lock);
+ } else {
+ i++;
+ }
+ }
+ spin_unlock(&c->freelist_lock);
+
+ bch2_ec_stop_dev(c, ca);
+}
+
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
unsigned long write_point)
{
@@ -1107,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c)
return true;
}
-static bool try_decrease_writepoints(struct bch_fs *c,
- unsigned old_nr)
+static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr)
{
struct write_point *wp;
@@ -1129,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c,
hlist_del_rcu(&wp->node);
mutex_unlock(&c->write_points_hash_lock);
- bch2_writepoint_stop(c, NULL, wp);
+ bch2_writepoint_stop(c, NULL, false, wp);
return true;
}
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 1fa96f8c6879..8a1cf425091b 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
struct bkey_i *, unsigned, bool);
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
-void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
- struct open_buckets *);
-
-void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
- struct write_point *);
+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
static inline struct write_point_specifier writepoint_hashed(unsigned long v)
{
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 8be65ebb34ad..05fc0f7434dd 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -655,7 +655,6 @@ typedef struct {
x(fallocate) \
x(discard) \
x(invalidate) \
- x(move) \
x(delete_dead_snapshots) \
x(snapshot_delete_pagecache) \
x(sysfs)
@@ -958,14 +957,14 @@ struct bch_fs {
struct list_head ec_stripe_new_list;
struct mutex ec_stripe_new_lock;
+ wait_queue_head_t ec_stripe_new_wait;
struct work_struct ec_stripe_create_work;
u64 ec_stripe_hint;
- struct bio_set ec_bioset;
-
struct work_struct ec_stripe_delete_work;
- struct llist_head ec_stripe_delete_list;
+
+ struct bio_set ec_bioset;
/* REFLINK */
u64 reflink_hint;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 447863825a89..5ec884a222f8 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -252,6 +252,7 @@ restart_drop_extra_replicas:
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, &op->res,
NULL,
+ BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
m->data_opts.btree_insert_flags);
if (!ret) {
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index af3a72acc67f..1e621dcc1d37 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -989,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
while (1) {
ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL,
ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
s, &bp_offset));
@@ -1127,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
- ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL,
+ ret = bch2_trans_do(c, &s->res, NULL,
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_NOFAIL,
ec_stripe_key_update(&trans, &s->new_stripe.key,
!s->have_existing_stripe));
if (ret) {
@@ -1409,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans,
if (ret)
return ERR_PTR(ret);
+ if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
+ h = ERR_PTR(-EROFS);
+ goto found;
+ }
+
list_for_each_entry(h, &c->ec_stripe_head_list, list)
if (h->target == target &&
h->algo == algo &&
@@ -1753,7 +1761,7 @@ err:
return ERR_PTR(ret);
}
-void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
+static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
{
struct ec_stripe_head *h;
struct open_bucket *ob;
@@ -1761,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
mutex_lock(&c->ec_stripe_head_lock);
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-
mutex_lock(&h->lock);
if (!h->s)
goto unlock;
+ if (!ca)
+ goto found;
+
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
if (!h->s->blocks[i])
continue;
@@ -1784,6 +1794,32 @@ unlock:
mutex_unlock(&c->ec_stripe_head_lock);
}
+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
+{
+ __bch2_ec_stop(c, ca);
+}
+
+void bch2_fs_ec_stop(struct bch_fs *c)
+{
+ __bch2_ec_stop(c, NULL);
+}
+
+static bool bch2_fs_ec_flush_done(struct bch_fs *c)
+{
+ bool ret;
+
+ mutex_lock(&c->ec_stripe_new_lock);
+ ret = list_empty(&c->ec_stripe_new_list);
+ mutex_unlock(&c->ec_stripe_new_lock);
+
+ return ret;
+}
+
+void bch2_fs_ec_flush(struct bch_fs *c)
+{
+ wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
+}
+
int bch2_stripes_read(struct bch_fs *c)
{
struct btree_trans trans;
@@ -1915,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c)
void bch2_fs_ec_init_early(struct bch_fs *c)
{
+ spin_lock_init(&c->ec_stripes_new_lock);
+ mutex_init(&c->ec_stripes_heap_lock);
+
+ INIT_LIST_HEAD(&c->ec_stripe_head_list);
+ mutex_init(&c->ec_stripe_head_lock);
+
+ INIT_LIST_HEAD(&c->ec_stripe_new_list);
+ mutex_init(&c->ec_stripe_new_lock);
+ init_waitqueue_head(&c->ec_stripe_new_wait);
+
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
}
int bch2_fs_ec_init(struct bch_fs *c)
{
- spin_lock_init(&c->ec_stripes_new_lock);
-
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 8f777a37e43d..7c08a49d7419 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -245,8 +245,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
}
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
-
-void bch2_ec_flush_new_stripes(struct bch_fs *);
+void bch2_fs_ec_stop(struct bch_fs *);
+void bch2_fs_ec_flush(struct bch_fs *);
int bch2_stripes_read(struct bch_fs *);
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 1b093650ff9a..e82da496b3f8 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -705,7 +705,8 @@ static void bch2_write_done(struct closure *cl)
struct bch_fs *c = op->c;
bch2_disk_reservation_put(c, &op->res);
- bch2_write_ref_put(c, BCH_WRITE_REF_write);
+ if (!(op->flags & BCH_WRITE_MOVE))
+ bch2_write_ref_put(c, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys);
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
@@ -1842,7 +1843,12 @@ void bch2_write(struct closure *cl)
goto err;
}
- if (c->opts.nochanges ||
+ if (c->opts.nochanges) {
+ op->error = -BCH_ERR_erofs_no_writes;
+ goto err;
+ }
+
+ if (!(op->flags & BCH_WRITE_MOVE) &&
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index f74ef947cac5..4a9ffca7be62 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -59,7 +59,6 @@ struct moving_io {
static void move_free(struct moving_io *io)
{
struct moving_context *ctxt = io->write.ctxt;
- struct bch_fs *c = ctxt->c;
if (io->b)
atomic_dec(&io->b->count);
@@ -71,7 +70,6 @@ static void move_free(struct moving_io *io)
wake_up(&ctxt->wait);
mutex_unlock(&ctxt->lock);
- bch2_write_ref_put(c, BCH_WRITE_REF_move);
kfree(io);
}
@@ -280,9 +278,6 @@ static int bch2_move_extent(struct btree_trans *trans,
return 0;
}
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
- return -BCH_ERR_erofs_no_writes;
-
/*
* Before memory allocations & taking nocow locks in
* bch2_data_update_init():
@@ -378,7 +373,6 @@ err_free_pages:
err_free:
kfree(io);
err:
- bch2_write_ref_put(c, BCH_WRITE_REF_move);
trace_and_count(c, move_extent_alloc_mem_fail, k.k);
return ret;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index bf3aabdb0fc9..278f8f19a230 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -205,9 +205,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
unsigned i, clean_passes = 0;
u64 seq = 0;
+ bch2_fs_ec_stop(c);
+ bch2_open_buckets_stop(c, NULL, true);
bch2_rebalance_stop(c);
bch2_copygc_stop(c);
bch2_gc_thread_stop(c);
+ bch2_fs_ec_flush(c);
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
journal_cur_seq(&c->journal));
@@ -700,15 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock);
- INIT_LIST_HEAD(&c->ec_stripe_head_list);
- mutex_init(&c->ec_stripe_head_lock);
-
- INIT_LIST_HEAD(&c->ec_stripe_new_list);
- mutex_init(&c->ec_stripe_new_lock);
-
-
- mutex_init(&c->ec_stripes_heap_lock);
-
seqcount_init(&c->gc_pos_lock);
seqcount_init(&c->usage_lock);