diff options
-rw-r--r-- | fs/bcachefs/alloc.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/bset.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 58 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 58 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 36 |
10 files changed, 93 insertions, 88 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index fc492da530e3..f7ff8027661c 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -292,9 +292,6 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) unsigned i; int ret; - if (!c->btree_roots[BTREE_ID_ALLOC].b) - return 0; - for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) { bch2_alloc_read_key(c, k); bch2_btree_iter_cond_resched(&iter); @@ -539,7 +536,8 @@ static void bch2_prio_timer_init(struct bch_fs *c, int rw) static void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca, size_t bucket) { - if (expensive_debug_checks(c)) { + if (expensive_debug_checks(c) && + test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) { size_t iter; long i; unsigned j; @@ -1970,6 +1968,8 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) ca->nr_invalidated--; } + set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); + /* now flush dirty btree nodes: */ if (invalidating_data) { struct bucket_table *tbl; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index e04408f10d5d..78c427fa17a6 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -423,6 +423,7 @@ enum { /* startup: */ BCH_FS_BRAND_NEW_FS, BCH_FS_ALLOC_READ_DONE, + BCH_FS_ALLOCATOR_STARTED, BCH_FS_INITIAL_GC_DONE, BCH_FS_FSCK_DONE, diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 10f3f3f353a6..02be5bb42e4e 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -1550,9 +1550,6 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, __bch2_btree_node_iter_init(iter, is_extents); - //if (bkey_cmp(search, b->curr_max_key) > 0) - // return; - switch (bch2_bkey_pack_pos_lossy(&p, search, b)) { case BKEY_PACK_POS_EXACT: packed_search = &p; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 01694f9f02e0..9f1071e5ac38 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -285,7 +285,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id) mutex_lock(&c->btree_root_lock); b = c->btree_roots[btree_id].b; - bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0); + if (!btree_node_fake(b)) + bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0); gc_pos_set(c, gc_pos_btree_root(b->btree_id)); mutex_unlock(&c->btree_root_lock); @@ -992,8 +993,10 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id) if (!c->btree_roots[id].b) return 0; - ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&c->btree_roots[id].b->key)); + b = c->btree_roots[id].b; + if (!btree_node_fake(b)) + ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE, + bkey_i_to_s_c(&b->key)); if (ret) return ret; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 6e3ad732c032..8b77cc20524d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1649,6 +1649,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, new ^= (1 << BTREE_NODE_write_idx); } while (cmpxchg_acquire(&b->flags, old, new) != old); + BUG_ON(btree_node_fake(b)); BUG_ON(!list_empty(&b->write_blocked)); BUG_ON((b->will_make_reachable != NULL) != !b->written); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index f0e6896a8a5e..fb2f7e21d7d7 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -197,6 +197,7 @@ enum btree_flags { BTREE_NODE_write_in_flight, BTREE_NODE_just_written, BTREE_NODE_dying, + BTREE_NODE_fake, }; BTREE_FLAG(read_in_flight); @@ -209,6 +210,7 @@ BTREE_FLAG(accessed); BTREE_FLAG(write_in_flight); BTREE_FLAG(just_written); BTREE_FLAG(dying); +BTREE_FLAG(fake); static inline struct btree_write *btree_current_write(struct btree *b) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 62d1faa3f7a2..a0f37c4ceb09 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -915,6 +915,10 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, struct bset_tree *t; set_btree_node_dying(b); + + if (btree_node_fake(b)) + return; + btree_interior_update_add_node_reference(as, b); /* @@ -1052,7 +1056,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) gc_pos_btree_root(b->btree_id), &stats, 0, 0); - if (old) + if (old && !btree_node_fake(old)) bch2_btree_node_free_index(as, NULL, bkey_i_to_s_c(&old->key), &stats); @@ -1422,7 +1426,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, bch2_btree_node_lock_for_insert(c, b, iter); - if (bch_keylist_u64s(keys) > bch_btree_keys_u64s_remaining(c, b)) { + if (!bch2_btree_node_insert_fits(c, b, bch_keylist_u64s(keys))) { bch2_btree_node_unlock_write(b, iter); return -1; } @@ -1994,45 +1998,43 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b) bch2_btree_set_root_ondisk(c, b, READ); } -int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id, - struct closure *writes) +void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) { - struct btree_update *as; struct closure cl; struct btree *b; + int ret; - memset(&as, 0, sizeof(as)); closure_init_stack(&cl); - while (1) { - /* XXX haven't calculated capacity yet :/ */ - as = bch2_btree_update_start(c, id, 1, - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE, - &cl); + do { + ret = bch2_btree_cache_cannibalize_lock(c, &cl); closure_sync(&cl); + } while (ret); - if (!IS_ERR(as)) - break; - - if (PTR_ERR(as) == -ENOSPC) - return PTR_ERR(as); - } + b = bch2_btree_node_mem_alloc(c); + bch2_btree_cache_cannibalize_unlock(c); - b = __btree_root_alloc(as, 0); + set_btree_node_fake(b); + b->level = 0; + b->btree_id = id; - bch2_btree_node_write(c, b, writes, SIX_LOCK_intent); - btree_update_drop_new_node(c, b); + bkey_extent_init(&b->key); + b->key.k.p = POS_MAX; + bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id; - BUG_ON(btree_node_root(c, b)); + bch2_bset_init_first(b, &b->data->keys); + bch2_btree_build_aux_trees(b); - bch2_btree_set_root_inmem(as, b); - bch2_btree_set_root_ondisk(c, b, WRITE); + b->data->min_key = POS_MIN; + b->data->max_key = POS_MAX; + b->data->format = bch2_btree_calc_format(b); + btree_node_set_format(b, b->data->format); - bch2_btree_open_bucket_put(c, b); - six_unlock_intent(&b->lock); + ret = bch2_btree_node_hash_insert(&c->btree_cache, b, b->level, b->btree_id); + BUG_ON(ret); - bch2_btree_update_free(as); + __bch2_btree_set_root_inmem(c, b); - return 0; + six_unlock_write(&b->lock); + six_unlock_intent(&b->lock); } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index e129b24ece76..23ee3980579e 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -150,7 +150,7 @@ int bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *, enum btree_node_sibling); void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); -int bch2_btree_root_alloc(struct bch_fs *, enum btree_id, struct closure *); +void bch2_btree_root_alloc(struct bch_fs *, enum btree_id); static inline unsigned btree_update_reserve_required(struct bch_fs *c, struct btree *b) @@ -280,6 +280,9 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, static inline bool bch2_btree_node_insert_fits(struct bch_fs *c, struct btree *b, unsigned u64s) { + if (unlikely(btree_node_fake(b))) + return false; + if (btree_node_is_extents(b)) { /* The insert key might split an existing key * (bch2_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case: diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index a2def9054c14..fa78c0b1826e 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -88,6 +88,9 @@ struct bkey_i *bch2_journal_find_btree_root(struct bch_fs *c, struct jset *j, if (!entry) return NULL; + if (!entry->u64s) + return ERR_PTR(-EINVAL); + k = entry->start; *level = entry->level; *level = entry->level; @@ -415,6 +418,7 @@ static struct nonce journal_nonce(const struct jset *jset) }}; } +/* this fills in a range with empty jset_entries: */ static void journal_entry_null_range(void *start, void *end) { struct jset_entry *entry; @@ -423,7 +427,7 @@ static void journal_entry_null_range(void *start, void *end) memset(entry, 0, sizeof(*entry)); } -static int journal_validate_key(struct bch_fs *c, struct jset *j, +static int journal_validate_key(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, struct bkey_i *k, enum bkey_type key_type, const char *type) @@ -458,7 +462,7 @@ static int journal_validate_key(struct bch_fs *c, struct jset *j, return 0; } - if (JSET_BIG_ENDIAN(j) != CPU_BIG_ENDIAN) + if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) bch2_bkey_swab(key_type, NULL, bkey_to_packed(k)); invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k)); @@ -497,26 +501,27 @@ fsck_err: #define journal_entry_err_on(cond, c, msg, ...) \ ((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false) -static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j, +static int journal_entry_validate_entries(struct bch_fs *c, struct jset *jset, int write) { struct jset_entry *entry; int ret = 0; - vstruct_for_each(j, entry) { + vstruct_for_each(jset, entry) { + void *next = vstruct_next(entry); struct bkey_i *k; if (journal_entry_err_on(vstruct_next(entry) > - vstruct_last(j), c, + vstruct_last(jset), c, "journal entry extends past end of jset")) { - j->u64s = cpu_to_le32((u64 *) entry - j->_data); + jset->u64s = cpu_to_le32((u64 *) entry - jset->_data); break; } switch (entry->type) { case JOURNAL_ENTRY_BTREE_KEYS: vstruct_for_each(entry, k) { - ret = journal_validate_key(c, j, entry, k, + ret = journal_validate_key(c, jset, entry, k, bkey_type(entry->level, entry->btree_id), "key"); @@ -531,12 +536,17 @@ static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j, if (journal_entry_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, "invalid btree root journal entry: wrong number of keys")) { - journal_entry_null_range(entry, - vstruct_next(entry)); + /* + * we don't want to null out this jset_entry, + * just the contents, so that later we can tell + * we were _supposed_ to have a btree root + */ + entry->u64s = 0; + journal_entry_null_range(vstruct_next(entry), next); continue; } - ret = journal_validate_key(c, j, entry, k, + ret = journal_validate_key(c, jset, entry, k, BKEY_TYPE_BTREE, "btree root"); if (ret) goto fsck_err; @@ -566,21 +576,21 @@ fsck_err: } static int journal_entry_validate(struct bch_fs *c, - struct jset *j, u64 sector, + struct jset *jset, u64 sector, unsigned bucket_sectors_left, unsigned sectors_read, int write) { - size_t bytes = vstruct_bytes(j); + size_t bytes = vstruct_bytes(jset); struct bch_csum csum; int ret = 0; - if (le64_to_cpu(j->magic) != jset_magic(c)) + if (le64_to_cpu(jset->magic) != jset_magic(c)) return JOURNAL_ENTRY_NONE; - if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) { + if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) { bch_err(c, "unknown journal entry version %u", - le32_to_cpu(j->version)); + le32_to_cpu(jset->version)); return BCH_FSCK_UNKNOWN_VERSION; } @@ -594,26 +604,26 @@ static int journal_entry_validate(struct bch_fs *c, if (bytes > sectors_read << 9) return JOURNAL_ENTRY_REREAD; - if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c, + if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c, "journal entry with unknown csum type %llu sector %lluu", - JSET_CSUM_TYPE(j), sector)) + JSET_CSUM_TYPE(jset), sector)) return JOURNAL_ENTRY_BAD; - csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j); - if (journal_entry_err_on(bch2_crc_cmp(csum, j->csum), c, + csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); + if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c, "journal checksum bad, sector %llu", sector)) { /* XXX: retry IO, when we start retrying checksum errors */ /* XXX: note we might have missing journal entries */ return JOURNAL_ENTRY_BAD; } - bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), - j->encrypted_start, - vstruct_end(j) - (void *) j->encrypted_start); + bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), + jset->encrypted_start, + vstruct_end(jset) - (void *) jset->encrypted_start); - if (journal_entry_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c, + if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c, "invalid journal entry: last_seq > seq")) - j->last_seq = j->seq; + jset->last_seq = jset->seq; return 0; fsck_err: diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index aadfca18c7ba..29ffba65cfc5 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -676,13 +676,10 @@ static const char *__bch2_fs_start(struct bch_fs *c) struct bch_dev *ca; LIST_HEAD(journal); struct jset *j; - struct closure cl; time64_t now; unsigned i; int ret = -EINVAL; - closure_init_stack(&cl); - mutex_lock(&c->state_lock); BUG_ON(c->state != BCH_FS_STARTING); @@ -710,14 +707,14 @@ static const char *__bch2_fs_start(struct bch_fs *c) unsigned level; struct bkey_i *k; - err = "missing btree root"; k = bch2_journal_find_btree_root(c, j, i, &level); - if (!k && i < BTREE_ID_ALLOC) - goto err; - if (!k) continue; + err = "invalid btree root pointer"; + if (IS_ERR(k)) + goto err; + err = "error reading btree root"; if (bch2_btree_root_read(c, i, k, level)) { if (i != BTREE_ID_ALLOC) @@ -727,6 +724,10 @@ static const char *__bch2_fs_start(struct bch_fs *c) } } + for (i = 0; i < BTREE_ID_NR; i++) + if (!c->btree_roots[i].b) + bch2_btree_root_alloc(c, i); + err = "error reading allocation information"; ret = bch2_alloc_read(c, &journal); if (ret) @@ -744,14 +745,6 @@ static const char *__bch2_fs_start(struct bch_fs *c) if (c->opts.noreplay) goto recovery_done; - err = "cannot allocate new btree root"; - for (i = 0; i < BTREE_ID_NR; i++) - if (!c->btree_roots[i].b && - bch2_btree_root_alloc(c, i, &cl)) - goto err; - - closure_sync(&cl); - /* * bch2_journal_start() can't happen sooner, or btree_gc_finish() * will give spurious errors about oldest_gen > bucket_gen - @@ -807,13 +800,11 @@ static const char *__bch2_fs_start(struct bch_fs *c) goto err; } - err = "cannot allocate new btree root"; - for (i = 0; i < BTREE_ID_NR; i++) - if (bch2_btree_root_alloc(c, i, &cl)) - goto err; - clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags); + for (i = 0; i < BTREE_ID_NR; i++) + bch2_btree_root_alloc(c, i); + /* * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: @@ -825,9 +816,6 @@ static const char *__bch2_fs_start(struct bch_fs *c) if (bch2_fs_allocator_start(c)) goto err; - /* Wait for new btree roots to be written: */ - closure_sync(&cl); - bch2_inode_init(c, &inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); inode.bi_inum = BCACHEFS_ROOT_INO; @@ -883,8 +871,6 @@ out: return err; err: fsck_err: - closure_sync(&cl); - switch (ret) { case BCH_FSCK_ERRORS_NOT_FIXED: bch_err(c, "filesystem contains errors: please report this to the developers"); |