diff options
Diffstat (limited to 'libbcachefs/super.c')
-rw-r--r-- | libbcachefs/super.c | 88 |
1 files changed, 52 insertions, 36 deletions
diff --git a/libbcachefs/super.c b/libbcachefs/super.c index d3473897..f14fe55f 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -207,7 +207,7 @@ int bch2_congested(void *data, int bdi_bits) static void __bch2_fs_read_only(struct bch_fs *c) { struct bch_dev *ca; - bool wrote; + bool wrote = false; unsigned i, clean_passes = 0; int ret; @@ -224,48 +224,68 @@ static void __bch2_fs_read_only(struct bch_fs *c) */ bch2_journal_flush_all_pins(&c->journal); + /* + * If the allocator threads didn't all start up, the btree updates to + * write out alloc info aren't going to work: + */ if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags)) - goto allocator_not_running; + goto nowrote_alloc; - do { - wrote = false; + bch_verbose(c, "writing alloc info"); + /* + * This should normally just be writing the bucket read/write clocks: + */ + ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?: + bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); + bch_verbose(c, "writing alloc info complete"); - ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?: - bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); + if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) + bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); - if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) - bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); + if (ret) + goto nowrote_alloc; - if (ret) - break; + bch_verbose(c, "flushing journal and stopping allocators"); - for_each_member_device(ca, c, i) - bch2_dev_allocator_quiesce(c, ca); + bch2_journal_flush_all_pins(&c->journal); + set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags); - bch2_journal_flush_all_pins(&c->journal); + do { + clean_passes++; + + if (bch2_journal_flush_all_pins(&c->journal)) + clean_passes = 0; /* - * We need to explicitly wait on btree interior updates to complete - * before stopping the journal, flushing all journal pins isn't - * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree - * interior updates have to drop their journal pin before they're - * fully complete: + * In flight interior btree updates will generate more journal + * updates and btree updates (alloc btree): */ - closure_wait_event(&c->btree_interior_update_wait, - !bch2_btree_interior_updates_nr_pending(c)); + if (bch2_btree_interior_updates_nr_pending(c)) { + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); + clean_passes = 0; + } + flush_work(&c->btree_interior_update_work); - clean_passes = wrote ? 0 : clean_passes + 1; + if (bch2_journal_flush_all_pins(&c->journal)) + clean_passes = 0; } while (clean_passes < 2); -allocator_not_running: + bch_verbose(c, "flushing journal and stopping allocators complete"); + + set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); +nowrote_alloc: + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); + flush_work(&c->btree_interior_update_work); + for_each_member_device(ca, c, i) bch2_dev_allocator_stop(ca); clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags); bch2_fs_journal_stop(&c->journal); - /* XXX: mark super that alloc info is persistent */ - /* * the journal kicks off btree writes via reclaim - wait for in flight * writes after stopping journal: @@ -338,8 +358,11 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && test_bit(BCH_FS_STARTED, &c->flags) && - !c->opts.norecovery) + test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) && + !c->opts.norecovery) { + bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); + } clear_bit(BCH_FS_RW, &c->flags); } @@ -426,6 +449,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (ret) goto err; + clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags); + for_each_rw_member(ca, c, i) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); @@ -494,6 +519,7 @@ static void bch2_fs_free(struct bch_fs *c) bch2_fs_ec_exit(c); bch2_fs_encryption_exit(c); bch2_fs_io_exit(c); + bch2_fs_btree_interior_update_exit(c); bch2_fs_btree_iter_exit(c); bch2_fs_btree_cache_exit(c); bch2_fs_journal_exit(&c->journal); @@ -511,8 +537,6 @@ static void bch2_fs_free(struct bch_fs *c) mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); - mempool_exit(&c->btree_interior_update_pool); - mempool_exit(&c->btree_reserve_pool); mempool_exit(&c->fill_iter); percpu_ref_exit(&c->writes); kfree(c->replicas.entries); @@ -675,11 +699,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->list); - INIT_LIST_HEAD(&c->btree_interior_update_list); - INIT_LIST_HEAD(&c->btree_interior_updates_unwritten); - mutex_init(&c->btree_reserve_cache_lock); - mutex_init(&c->btree_interior_update_lock); - mutex_init(&c->usage_scratch_lock); mutex_init(&c->bio_bounce_pages_lock); @@ -752,10 +771,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || percpu_ref_init(&c->writes, bch2_writes_disabled, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || - mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1, - sizeof(struct btree_reserve)) || - mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, - sizeof(struct btree_update)) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || bioset_init(&c->btree_bio, 1, max(offsetof(struct btree_read_bio, bio), @@ -771,6 +786,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_replicas_init(c) || bch2_fs_btree_cache_init(c) || bch2_fs_btree_iter_init(c) || + bch2_fs_btree_interior_update_init(c) || bch2_fs_io_init(c) || bch2_fs_encryption_init(c) || bch2_fs_compress_init(c) || |