diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-01-04 02:04:12 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-01-10 03:01:53 -0500 |
commit | a3ed95a51794df6f076d796db984f029affbb190 (patch) | |
tree | 70e977b9b521543e432865db4a10409c18f6c373 | |
parent | 3e5533d02405cbf50f3c99ffaa59ca095b196dfb (diff) |
bcachefs: fix allocator startup, once and for all
This fixes the "ptr with missing gen in alloc btree" bugs
-rw-r--r-- | fs/bcachefs/alloc.c | 394 | ||||
-rw-r--r-- | fs/bcachefs/alloc.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 17 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 11 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 29 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/fifo.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 31 |
13 files changed, 320 insertions, 187 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index f5b0fdfdddf6..fc492da530e3 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -55,6 +55,8 @@ #include "bcachefs.h" #include "alloc.h" +#include "btree_cache.h" +#include "btree_io.h" #include "btree_update.h" #include "btree_gc.h" #include "buckets.h" @@ -401,7 +403,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos) return ret; } -static int bch2_alloc_write(struct bch_fs *c, struct bch_dev *ca, u64 *journal_seq) +static int bch2_alloc_write(struct bch_fs *c, struct bch_dev *ca) { struct btree_iter iter; unsigned long bucket; @@ -412,7 +414,7 @@ static int bch2_alloc_write(struct bch_fs *c, struct bch_dev *ca, u64 *journal_s down_read(&ca->bucket_lock); for_each_set_bit(bucket, ca->buckets_dirty, ca->mi.nbuckets) { - ret = __bch2_alloc_write_key(c, ca, bucket, &iter, journal_seq); + ret = __bch2_alloc_write_key(c, ca, bucket, &iter, NULL); if (ret) break; @@ -692,7 +694,7 @@ static inline int bucket_alloc_cmp(alloc_heap *h, return (l.key > r.key) - (l.key < r.key); } -static void invalidate_buckets_lru(struct bch_fs *c, struct bch_dev *ca) +static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca) { struct bucket_array *buckets; struct alloc_heap_entry e; @@ -740,7 +742,7 @@ static void invalidate_buckets_lru(struct bch_fs *c, struct bch_dev *ca) bch2_invalidate_one_bucket(c, ca, e.bucket); } -static void invalidate_buckets_fifo(struct bch_fs *c, struct bch_dev *ca) +static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca) { struct bucket_array *buckets = bucket_array(ca); struct bucket_mark m; @@ -762,7 +764,7 @@ static void invalidate_buckets_fifo(struct bch_fs *c, struct bch_dev *ca) } } -static void invalidate_buckets_random(struct bch_fs *c, struct bch_dev *ca) +static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca) { struct bucket_array *buckets = bucket_array(ca); struct bucket_mark m; @@ -782,21 +784,21 @@ static void invalidate_buckets_random(struct bch_fs *c, struct bch_dev *ca) } } -static void invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) +static void find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca) { ca->inc_gen_needs_gc = 0; ca->inc_gen_really_needs_gc = 0; switch (ca->mi.replacement) { - case CACHE_REPLACEMENT_LRU: - invalidate_buckets_lru(c, ca); - break; - case CACHE_REPLACEMENT_FIFO: - invalidate_buckets_fifo(c, ca); - break; - case CACHE_REPLACEMENT_RANDOM: - invalidate_buckets_random(c, ca); - break; + case CACHE_REPLACEMENT_LRU: + find_reclaimable_buckets_lru(c, ca); + break; + case CACHE_REPLACEMENT_FIFO: + find_reclaimable_buckets_fifo(c, ca); + break; + case CACHE_REPLACEMENT_RANDOM: + find_reclaimable_buckets_random(c, ca); + break; } } @@ -807,79 +809,119 @@ static int size_t_cmp(const void *_l, const void *_r) return (*l > *r) - (*l < *r); } +static void sort_free_inc(struct bch_fs *c, struct bch_dev *ca) +{ + BUG_ON(ca->free_inc.front); + + spin_lock(&c->freelist_lock); + sort(ca->free_inc.data, + ca->free_inc.back, + sizeof(ca->free_inc.data[0]), + size_t_cmp, NULL); + spin_unlock(&c->freelist_lock); +} + static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca, - u64 *journal_seq) + u64 *journal_seq, size_t nr) { struct btree_iter iter; - unsigned nr_invalidated = 0; - size_t b, i; int ret = 0; bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), BTREE_ITER_INTENT); - fifo_for_each_entry(b, &ca->free_inc, i) { + /* + * XXX: if ca->nr_invalidated != 0, just return if we'd block doing the + * btree update or journal_res_get + */ + while (ca->nr_invalidated < min(nr, fifo_used(&ca->free_inc))) { + size_t b = fifo_idx_entry(&ca->free_inc, ca->nr_invalidated); + ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq); if (ret) break; - nr_invalidated++; + ca->nr_invalidated++; } bch2_btree_iter_unlock(&iter); - return nr_invalidated ?: ret; + return ret; } -/* - * Given an invalidated, ready to use bucket: issue a discard to it if enabled, - * then add it to the freelist, waiting until there's room if necessary: - */ -static void discard_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, - long bucket) +static bool __push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket) { - if (ca->mi.discard && - blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev))) - blkdev_issue_discard(ca->disk_sb.bdev, - bucket_to_sector(ca, bucket), - ca->mi.bucket_size, GFP_NOIO, 0); + unsigned i; - while (1) { - bool pushed = false; - unsigned i; + /* + * Don't remove from free_inc until after it's added to + * freelist, so gc can find it: + */ + spin_lock(&c->freelist_lock); + for (i = 0; i < RESERVE_NR; i++) + if (fifo_push(&ca->free[i], bucket)) { + fifo_pop(&ca->free_inc, bucket); + --ca->nr_invalidated; + closure_wake_up(&c->freelist_wait); + spin_unlock(&c->freelist_lock); + return true; + } + spin_unlock(&c->freelist_lock); - set_current_state(TASK_INTERRUPTIBLE); + return false; +} - /* - * Don't remove from free_inc until after it's added to - * freelist, so gc can find it: - */ - spin_lock(&c->freelist_lock); - for (i = 0; i < RESERVE_NR; i++) - if (fifo_push(&ca->free[i], bucket)) { - fifo_pop(&ca->free_inc, bucket); - closure_wake_up(&c->freelist_wait); - pushed = true; - break; - } - spin_unlock(&c->freelist_lock); +static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket) +{ + int ret = 0; - if (pushed) + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + if (__push_invalidated_bucket(c, ca, bucket)) break; - if (kthread_should_stop()) + if ((current->flags & PF_KTHREAD) && + kthread_should_stop()) { + ret = -1; break; + } schedule(); try_to_freeze(); } __set_current_state(TASK_RUNNING); + return ret; +} + +/* + * Given an invalidated, ready to use bucket: issue a discard to it if enabled, + * then add it to the freelist, waiting until there's room if necessary: + */ +static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca) +{ + while (ca->nr_invalidated) { + size_t bucket = fifo_peek(&ca->free_inc); + + BUG_ON(fifo_empty(&ca->free_inc) || !ca->nr_invalidated); + + if (ca->mi.discard && + blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev))) + blkdev_issue_discard(ca->disk_sb.bdev, + bucket_to_sector(ca, bucket), + ca->mi.bucket_size, GFP_NOIO, 0); + + if (push_invalidated_bucket(c, ca, bucket)) + return -1; + } + + return 0; } /** * bch_allocator_thread - move buckets from free_inc to reserves * - * The free_inc FIFO is populated by invalidate_buckets(), and + * The free_inc FIFO is populated by find_reclaimable_buckets(), and * the reserves are depleted by bucket allocation. When we run out * of free_inc, try to invalidate some buckets and write out * prios and gens. @@ -889,44 +931,36 @@ static int bch2_allocator_thread(void *arg) struct bch_dev *ca = arg; struct bch_fs *c = ca->fs; u64 journal_seq; - size_t bucket; int ret; set_freezable(); while (1) { while (1) { - while (ca->nr_invalidated) { - BUG_ON(fifo_empty(&ca->free_inc)); - - bucket = fifo_peek(&ca->free_inc); - discard_invalidated_bucket(c, ca, bucket); - --ca->nr_invalidated; - - if (kthread_should_stop()) - return 0; - } + ret = discard_invalidated_buckets(c, ca); + if (ret) + return 0; if (fifo_empty(&ca->free_inc)) break; journal_seq = 0; - ret = bch2_invalidate_free_inc(c, ca, &journal_seq); - if (ret < 0) + ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX); + if (ret) return 0; - ca->nr_invalidated = ret; - - if (ca->nr_invalidated == fifo_used(&ca->free_inc)) { - ca->alloc_thread_started = true; - bch2_alloc_write(c, ca, &journal_seq); - } - if (ca->allocator_invalidating_data) - bch2_journal_flush_seq(&c->journal, journal_seq); + ret = bch2_journal_flush_seq(&c->journal, journal_seq); else if (ca->allocator_journal_seq_flush) - bch2_journal_flush_seq(&c->journal, + ret = bch2_journal_flush_seq(&c->journal, ca->allocator_journal_seq_flush); + + /* + * journal error - buckets haven't actually been + * invalidated, can't discard them: + */ + if (ret) + return 0; } /* Reset front/back so we can easily sort fifo entries later: */ @@ -948,7 +982,7 @@ static int bch2_allocator_thread(void *arg) * another cache tier */ - invalidate_buckets(c, ca); + find_reclaimable_buckets(c, ca); trace_alloc_batch(ca, fifo_used(&ca->free_inc), ca->free_inc.size); @@ -971,14 +1005,7 @@ static int bch2_allocator_thread(void *arg) } up_read(&c->gc_lock); - BUG_ON(ca->free_inc.front); - - spin_lock(&c->freelist_lock); - sort(ca->free_inc.data, - ca->free_inc.back, - sizeof(ca->free_inc.data[0]), - size_t_cmp, NULL); - spin_unlock(&c->freelist_lock); + sort_free_inc(c, ca); /* * free_inc is now full of newly-invalidated buckets: next, @@ -1038,47 +1065,27 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) return ob; } -/* - * XXX: allocation on startup is still sketchy. There is insufficient - * synchronization for bch2_bucket_alloc_startup() to work correctly after - * bch2_alloc_write() has been called, and we aren't currently doing anything - * to guarantee that this won't happen. - * - * Even aside from that, it's really difficult to avoid situations where on - * startup we write out a pointer to a freshly allocated bucket before the - * corresponding gen - when we're still digging ourself out of the "i need to - * allocate to write bucket gens, but i need to write bucket gens to allocate" - * hole. - * - * Fortunately, bch2_btree_mark_key_initial() will detect and repair this - * easily enough... - */ -static long bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca) +/* _only_ for allocating the journal and btree roots on a brand new fs: */ +int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca) { struct bucket_array *buckets; ssize_t b; - if (!down_read_trylock(&c->gc_lock)) - return -1; - - if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) { - up_read(&c->gc_lock); - return -1; - } - rcu_read_lock(); buckets = bucket_array(ca); for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) - if (is_startup_available_bucket(buckets->b[b].mark) && - bch2_mark_alloc_bucket_startup(c, ca, b)) { + if (is_available_bucket(buckets->b[b].mark)) { + bch2_mark_alloc_bucket(c, ca, b, true, + gc_pos_alloc(c, NULL), + BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| + BCH_BUCKET_MARK_GC_LOCK_HELD); set_bit(b, ca->buckets_dirty); goto success; } b = -1; success: rcu_read_unlock(); - up_read(&c->gc_lock); return b; } @@ -1147,8 +1154,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, break; } - if (unlikely(!ca->alloc_thread_started) && - (reserve == RESERVE_ALLOC) && + if (unlikely(test_bit(BCH_FS_BRAND_NEW_FS, &c->flags)) && (bucket = bch2_bucket_alloc_startup(c, ca)) >= 0) goto out; @@ -1855,6 +1861,170 @@ int bch2_dev_allocator_start(struct bch_dev *ca) return 0; } +static int __bch2_fs_allocator_start(struct bch_fs *c) +{ + struct bch_dev *ca; + size_t bu, i, devs_have_enough = 0; + unsigned dev_iter; + u64 journal_seq = 0; + bool invalidating_data = false; + int ret = 0; + + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) + return -1; + + /* Scan for buckets that are already invalidated: */ + for_each_rw_member(ca, c, dev_iter) { + struct btree_iter iter; + struct bucket_mark m; + struct bkey_s_c k; + + for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), 0, k) { + if (k.k->type != BCH_ALLOC) + continue; + + bu = k.k->p.offset; + m = READ_ONCE(bucket(ca, bu)->mark); + + if (!is_available_bucket(m) || m.cached_sectors) + continue; + + bch2_mark_alloc_bucket(c, ca, bu, true, + gc_pos_alloc(c, NULL), + BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| + BCH_BUCKET_MARK_GC_LOCK_HELD); + + fifo_push(&ca->free_inc, bu); + ca->nr_invalidated++; + + if (fifo_full(&ca->free_inc)) + break; + } + bch2_btree_iter_unlock(&iter); + } + + /* did we find enough buckets? */ + for_each_rw_member(ca, c, dev_iter) + devs_have_enough += (fifo_used(&ca->free_inc) >= + ca->free[RESERVE_BTREE].size); + + if (devs_have_enough >= c->opts.metadata_replicas) + return 0; + + /* clear out free_inc - find_reclaimable_buckets() assumes it's empty */ + for_each_rw_member(ca, c, dev_iter) + discard_invalidated_buckets(c, ca); + + for_each_rw_member(ca, c, dev_iter) { + BUG_ON(!fifo_empty(&ca->free_inc)); + ca->free_inc.front = ca->free_inc.back = 0; + + find_reclaimable_buckets(c, ca); + sort_free_inc(c, ca); + + invalidating_data |= ca->allocator_invalidating_data; + + fifo_for_each_entry(bu, &ca->free_inc, i) + if (!fifo_push(&ca->free[RESERVE_BTREE], bu)) + break; + } + + /* + * We're moving buckets to freelists _before_ they've been marked as + * invalidated on disk - we have to so that we can allocate new btree + * nodes to mark them as invalidated on disk. + * + * However, we can't _write_ to any of these buckets yet - they might + * have cached data in them, which is live until they're marked as + * invalidated on disk: + */ + if (invalidating_data) + set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); + + /* + * XXX: it's possible for this to deadlock waiting on journal reclaim, + * since we're holding btree writes. What then? + */ + + for_each_rw_member(ca, c, dev_iter) { + ret = bch2_invalidate_free_inc(c, ca, &journal_seq, + ca->free[RESERVE_BTREE].size); + if (ret) { + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + if (invalidating_data) { + ret = bch2_journal_flush_seq(&c->journal, journal_seq); + if (ret) + return ret; + } + + for_each_rw_member(ca, c, dev_iter) + while (ca->nr_invalidated) { + BUG_ON(!fifo_pop(&ca->free_inc, bu)); + blkdev_issue_discard(ca->disk_sb.bdev, + bucket_to_sector(ca, bu), + ca->mi.bucket_size, GFP_NOIO, 0); + ca->nr_invalidated--; + } + + /* now flush dirty btree nodes: */ + if (invalidating_data) { + struct bucket_table *tbl; + struct rhash_head *pos; + struct btree *b; + + clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); +again: + rcu_read_lock(); + for_each_cached_btree(b, c, tbl, i, pos) + if (btree_node_dirty(b) && (!b->written || b->level)) { + rcu_read_unlock(); + six_lock_read(&b->lock); + bch2_btree_node_write(c, b, NULL, SIX_LOCK_read); + six_unlock_read(&b->lock); + goto again; + } + rcu_read_unlock(); + } + + return 0; +} + +int bch2_fs_allocator_start(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + int ret; + + down_read(&c->gc_lock); + ret = __bch2_fs_allocator_start(c); + up_read(&c->gc_lock); + + if (ret) + return ret; + + for_each_rw_member(ca, c, i) { + ret = bch2_dev_allocator_start(ca); + if (ret) { + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + for_each_rw_member(ca, c, i) { + ret = bch2_alloc_write(c, ca); + if (ret) { + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + return 0; +} + void bch2_fs_allocator_init(struct bch_fs *c) { struct open_bucket *ob; diff --git a/fs/bcachefs/alloc.h b/fs/bcachefs/alloc.h index ee771ee1c09a..1b9d960b03f4 100644 --- a/fs/bcachefs/alloc.h +++ b/fs/bcachefs/alloc.h @@ -118,6 +118,7 @@ static inline void writepoint_init(struct write_point *wp, wp->type = type; } +int bch2_fs_allocator_start(struct bch_fs *); void bch2_fs_allocator_init(struct bch_fs *); extern const struct bkey_ops bch2_bkey_alloc_ops; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 02e38410f5dc..f161d83160fe 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -283,9 +283,6 @@ do { \ #include "keylist_types.h" #include "super_types.h" -/* 256k, in sectors */ -#define BTREE_NODE_SIZE_MAX 512 - /* * Number of nodes we might have to allocate in a worst case btree split * operation - we split all the way up to the root, then allocate a new root. @@ -380,7 +377,6 @@ struct bch_dev { alloc_fifo free_inc; spinlock_t freelist_lock; unsigned nr_invalidated; - bool alloc_thread_started; u8 open_buckets_partial[OPEN_BUCKETS_COUNT]; unsigned open_buckets_partial_nr; @@ -423,18 +419,27 @@ struct bch_dev { * won't automatically reattach). */ enum { + /* startup: */ + BCH_FS_BRAND_NEW_FS, BCH_FS_ALLOC_READ_DONE, BCH_FS_INITIAL_GC_DONE, + BCH_FS_FSCK_DONE, + + /* shutdown: */ BCH_FS_EMERGENCY_RO, BCH_FS_WRITE_DISABLE_COMPLETE, BCH_FS_GC_STOPPING, + + /* errors: */ + BCH_FS_ERROR, BCH_FS_GC_FAILURE, + + /* misc: */ BCH_FS_BDEV_MOUNTED, - BCH_FS_ERROR, BCH_FS_FSCK_FIXED_ERRORS, - BCH_FS_FSCK_DONE, BCH_FS_FIXED_GENS, BCH_FS_REBUILD_REPLICAS, + BCH_FS_HOLD_BTREE_WRITES, }; struct btree_debug { diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 46d536eb36a8..e021d6e9422a 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -45,8 +45,8 @@ static inline bool btree_node_hashed(struct btree *b) } #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ - for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \ - &(_c)->btree_cache_table), \ + for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl, \ + &(_c)->btree_cache.table), \ _iter = 0; _iter < (_tbl)->size; _iter++) \ rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 87a8ddf9215e..3c7ec1704190 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1602,6 +1602,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned long old, new; void *data; + if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) + return; + /* * We may only have a read lock on the btree node - the dirty bit is our * "lock" against racing with other threads that may be trying to start @@ -1905,11 +1908,7 @@ void bch2_btree_verify_flushed(struct bch_fs *c) unsigned i; rcu_read_lock(); - tbl = rht_dereference_rcu(c->btree_cache.table.tbl, - &c->btree_cache.table); - - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_rcu(b, pos, tbl, i, hash) - BUG_ON(btree_node_dirty(b)); + for_each_cached_btree(b, c, tbl, i, pos) + BUG_ON(btree_node_dirty(b)); rcu_read_unlock(); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2dbe7d379126..2e249d66ab00 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -382,7 +382,6 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, } new.owned_by_allocator = 1; - new.touched_this_mount = 1; new.data_type = 0; new.cached_sectors = 0; new.dirty_sectors = 0; @@ -396,29 +395,6 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, return true; } -bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca, - size_t b) -{ - struct bucket *g; - struct bucket_mark new, old; - - lg_local_lock(&c->usage_lock); - g = bucket(ca, b); - - old = bucket_data_cmpxchg(c, ca, g, new, ({ - if (!is_startup_available_bucket(new)) { - lg_local_unlock(&c->usage_lock); - return false; - } - - new.owned_by_allocator = 1; - new.touched_this_mount = 1; - })); - lg_local_unlock(&c->usage_lock); - - return true; -} - void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, bool owned_by_allocator, struct gc_pos pos, unsigned flags) @@ -436,7 +412,6 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, } old = bucket_data_cmpxchg(c, ca, g, new, ({ - new.touched_this_mount = 1; new.owned_by_allocator = owned_by_allocator; })); lg_local_unlock(&c->usage_lock); @@ -481,7 +456,6 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, saturated_add(ca, new.dirty_sectors, sectors, GC_MAX_SECTORS_USED); new.data_type = type; - new.touched_this_mount = 1; })); lg_local_unlock(&c->usage_lock); @@ -539,7 +513,6 @@ static void bch2_mark_pointer(struct bch_fs *c, if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) { if (journal_seq) bucket_cmpxchg(g, new, ({ - new.touched_this_mount = 1; new.journal_seq_valid = 1; new.journal_seq = journal_seq; })); @@ -588,8 +561,6 @@ static void bch2_mark_pointer(struct bch_fs *c, new.data_type = data_type; } - new.touched_this_mount = 1; - if (flags & BCH_BUCKET_MARK_NOATOMIC) { g->_mark = new; break; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 782431293c0d..8cebc2b31da6 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -192,11 +192,6 @@ static inline bool is_available_bucket(struct bucket_mark mark) !mark.nouse); } -static inline bool is_startup_available_bucket(struct bucket_mark mark) -{ - return !mark.touched_this_mount && is_available_bucket(mark); -} - static inline bool bucket_needs_journal_commit(struct bucket_mark m, u16 last_seq_ondisk) { @@ -208,8 +203,6 @@ void bch2_bucket_seq_cleanup(struct bch_fs *); bool bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *, size_t, struct bucket_mark *); -bool bch2_mark_alloc_bucket_startup(struct bch_fs *, struct bch_dev *, - size_t); void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *, size_t, bool, struct gc_pos, unsigned); void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 7cd8439a0227..6f52a109d102 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -15,8 +15,7 @@ struct bucket_mark { gen_valid:1, owned_by_allocator:1, nouse:1, - journal_seq_valid:1, - touched_this_mount:1; + journal_seq_valid:1; u16 dirty_sectors; u16 cached_sectors; diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h index 98f22f6a58e4..08739d26bd4d 100644 --- a/fs/bcachefs/fifo.h +++ b/fs/bcachefs/fifo.h @@ -57,6 +57,7 @@ do { \ #define fifo_peek_back(fifo) ((fifo)->data[((fifo)->back - 1) & (fifo)->mask]) #define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask) +#define fifo_idx_entry(fifo, i) (fifo)->data[((fifo)->front + (i)) & (fifo)->mask] #define fifo_push_back_ref(f) \ (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask]) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index fb081b89a950..a65a8f15ca8d 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1662,7 +1662,7 @@ err: return ret; } -int bch2_dev_journal_alloc(struct bch_dev *ca) +int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca) { unsigned nr; @@ -1678,7 +1678,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) min(1 << 10, (1 << 20) / ca->mi.bucket_size)); - return bch2_set_nr_journal_buckets(ca->fs, ca, nr); + return bch2_set_nr_journal_buckets(c, ca, nr); } /* Journalling */ diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 61197e578cf2..f2b67ba75dc9 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -390,7 +390,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j) ssize_t bch2_journal_print_debug(struct journal *, char *); ssize_t bch2_journal_print_pins(struct journal *, char *); -int bch2_dev_journal_alloc(struct bch_dev *); +int bch2_dev_journal_alloc(struct bch_fs *, struct bch_dev *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 1472bfec8291..60120e5a95e7 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -330,9 +330,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb))) return "Btree node size not a power of two"; - if (BCH_SB_BTREE_NODE_SIZE(sb) > BTREE_NODE_SIZE_MAX) - return "Btree node size too large"; - if (BCH_SB_GC_RESERVE(sb) < 5) return "gc reserve percentage too small"; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 41b85575528a..c42629563686 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -755,12 +755,9 @@ static const char *__bch2_fs_start(struct bch_fs *c) */ bch2_journal_start(c); - err = "error starting allocator thread"; - for_each_rw_member(ca, c, i) - if (bch2_dev_allocator_start(ca)) { - percpu_ref_put(&ca->io_ref); - goto err; - } + err = "error starting allocator"; + if (bch2_fs_allocator_start(c)) + goto err; bch_verbose(c, "starting journal replay:"); err = "journal replay failed"; @@ -785,6 +782,7 @@ static const char *__bch2_fs_start(struct bch_fs *c) bch_notice(c, "initializing new filesystem"); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); + set_bit(BCH_FS_BRAND_NEW_FS, &c->flags); ret = bch2_initial_gc(c, &journal); if (ret) @@ -792,7 +790,7 @@ static const char *__bch2_fs_start(struct bch_fs *c) err = "unable to allocate journal buckets"; for_each_rw_member(ca, c, i) - if (bch2_dev_journal_alloc(ca)) { + if (bch2_dev_journal_alloc(c, ca)) { percpu_ref_put(&ca->io_ref); goto err; } @@ -802,6 +800,8 @@ static const char *__bch2_fs_start(struct bch_fs *c) if (bch2_btree_root_alloc(c, i, &cl)) goto err; + clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags); + /* * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: @@ -809,12 +809,9 @@ static const char *__bch2_fs_start(struct bch_fs *c) bch2_journal_start(c); bch2_journal_set_replay_done(&c->journal); - err = "error starting allocator thread"; - for_each_rw_member(ca, c, i) - if (bch2_dev_allocator_start(ca)) { - percpu_ref_put(&ca->io_ref); - goto err; - } + err = "error starting allocator"; + if (bch2_fs_allocator_start(c)) + goto err; /* Wait for new btree roots to be written: */ closure_sync(&cl); @@ -1521,13 +1518,13 @@ have_slot: ca = bch_dev_locked(c, dev_idx); if (ca->mi.state == BCH_MEMBER_STATE_RW) { - err = "journal alloc failed"; - if (bch2_dev_journal_alloc(ca)) - goto err; - err = __bch2_dev_read_write(c, ca); if (err) goto err; + + err = "journal alloc failed"; + if (bch2_dev_journal_alloc(c, ca)) + goto err; } mutex_unlock(&c->state_lock); |