summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/alloc.c8
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bset.c3
-rw-r--r--fs/bcachefs/btree_gc.c9
-rw-r--r--fs/bcachefs/btree_io.c1
-rw-r--r--fs/bcachefs/btree_types.h2
-rw-r--r--fs/bcachefs/btree_update_interior.c58
-rw-r--r--fs/bcachefs/btree_update_interior.h5
-rw-r--r--fs/bcachefs/journal.c58
-rw-r--r--fs/bcachefs/super.c36
10 files changed, 93 insertions, 88 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c
index fc492da530e3..f7ff8027661c 100644
--- a/fs/bcachefs/alloc.c
+++ b/fs/bcachefs/alloc.c
@@ -292,9 +292,6 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
unsigned i;
int ret;
- if (!c->btree_roots[BTREE_ID_ALLOC].b)
- return 0;
-
for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) {
bch2_alloc_read_key(c, k);
bch2_btree_iter_cond_resched(&iter);
@@ -539,7 +536,8 @@ static void bch2_prio_timer_init(struct bch_fs *c, int rw)
static void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
size_t bucket)
{
- if (expensive_debug_checks(c)) {
+ if (expensive_debug_checks(c) &&
+ test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
size_t iter;
long i;
unsigned j;
@@ -1970,6 +1968,8 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
ca->nr_invalidated--;
}
+ set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
+
/* now flush dirty btree nodes: */
if (invalidating_data) {
struct bucket_table *tbl;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index e04408f10d5d..78c427fa17a6 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -423,6 +423,7 @@ enum {
/* startup: */
BCH_FS_BRAND_NEW_FS,
BCH_FS_ALLOC_READ_DONE,
+ BCH_FS_ALLOCATOR_STARTED,
BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE,
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 10f3f3f353a6..02be5bb42e4e 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -1550,9 +1550,6 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
__bch2_btree_node_iter_init(iter, is_extents);
- //if (bkey_cmp(search, b->curr_max_key) > 0)
- // return;
-
switch (bch2_bkey_pack_pos_lossy(&p, search, b)) {
case BKEY_PACK_POS_EXACT:
packed_search = &p;
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 01694f9f02e0..9f1071e5ac38 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -285,7 +285,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
mutex_lock(&c->btree_root_lock);
b = c->btree_roots[btree_id].b;
- bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
+ if (!btree_node_fake(b))
+ bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
@@ -992,8 +993,10 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
if (!c->btree_roots[id].b)
return 0;
- ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&c->btree_roots[id].b->key));
+ b = c->btree_roots[id].b;
+ if (!btree_node_fake(b))
+ ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&b->key));
if (ret)
return ret;
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 6e3ad732c032..8b77cc20524d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1649,6 +1649,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
new ^= (1 << BTREE_NODE_write_idx);
} while (cmpxchg_acquire(&b->flags, old, new) != old);
+ BUG_ON(btree_node_fake(b));
BUG_ON(!list_empty(&b->write_blocked));
BUG_ON((b->will_make_reachable != NULL) != !b->written);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index f0e6896a8a5e..fb2f7e21d7d7 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -197,6 +197,7 @@ enum btree_flags {
BTREE_NODE_write_in_flight,
BTREE_NODE_just_written,
BTREE_NODE_dying,
+ BTREE_NODE_fake,
};
BTREE_FLAG(read_in_flight);
@@ -209,6 +210,7 @@ BTREE_FLAG(accessed);
BTREE_FLAG(write_in_flight);
BTREE_FLAG(just_written);
BTREE_FLAG(dying);
+BTREE_FLAG(fake);
static inline struct btree_write *btree_current_write(struct btree *b)
{
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 62d1faa3f7a2..a0f37c4ceb09 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -915,6 +915,10 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
struct bset_tree *t;
set_btree_node_dying(b);
+
+ if (btree_node_fake(b))
+ return;
+
btree_interior_update_add_node_reference(as, b);
/*
@@ -1052,7 +1056,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
- if (old)
+ if (old && !btree_node_fake(old))
bch2_btree_node_free_index(as, NULL,
bkey_i_to_s_c(&old->key),
&stats);
@@ -1422,7 +1426,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
bch2_btree_node_lock_for_insert(c, b, iter);
- if (bch_keylist_u64s(keys) > bch_btree_keys_u64s_remaining(c, b)) {
+ if (!bch2_btree_node_insert_fits(c, b, bch_keylist_u64s(keys))) {
bch2_btree_node_unlock_write(b, iter);
return -1;
}
@@ -1994,45 +1998,43 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
bch2_btree_set_root_ondisk(c, b, READ);
}
-int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
- struct closure *writes)
+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
{
- struct btree_update *as;
struct closure cl;
struct btree *b;
+ int ret;
- memset(&as, 0, sizeof(as));
closure_init_stack(&cl);
- while (1) {
- /* XXX haven't calculated capacity yet :/ */
- as = bch2_btree_update_start(c, id, 1,
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE,
- &cl);
+ do {
+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
closure_sync(&cl);
+ } while (ret);
- if (!IS_ERR(as))
- break;
-
- if (PTR_ERR(as) == -ENOSPC)
- return PTR_ERR(as);
- }
+ b = bch2_btree_node_mem_alloc(c);
+ bch2_btree_cache_cannibalize_unlock(c);
- b = __btree_root_alloc(as, 0);
+ set_btree_node_fake(b);
+ b->level = 0;
+ b->btree_id = id;
- bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
- btree_update_drop_new_node(c, b);
+ bkey_extent_init(&b->key);
+ b->key.k.p = POS_MAX;
+ bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id;
- BUG_ON(btree_node_root(c, b));
+ bch2_bset_init_first(b, &b->data->keys);
+ bch2_btree_build_aux_trees(b);
- bch2_btree_set_root_inmem(as, b);
- bch2_btree_set_root_ondisk(c, b, WRITE);
+ b->data->min_key = POS_MIN;
+ b->data->max_key = POS_MAX;
+ b->data->format = bch2_btree_calc_format(b);
+ btree_node_set_format(b, b->data->format);
- bch2_btree_open_bucket_put(c, b);
- six_unlock_intent(&b->lock);
+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b, b->level, b->btree_id);
+ BUG_ON(ret);
- bch2_btree_update_free(as);
+ __bch2_btree_set_root_inmem(c, b);
- return 0;
+ six_unlock_write(&b->lock);
+ six_unlock_intent(&b->lock);
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index e129b24ece76..23ee3980579e 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -150,7 +150,7 @@ int bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
enum btree_node_sibling);
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-int bch2_btree_root_alloc(struct bch_fs *, enum btree_id, struct closure *);
+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
struct btree *b)
@@ -280,6 +280,9 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
struct btree *b, unsigned u64s)
{
+ if (unlikely(btree_node_fake(b)))
+ return false;
+
if (btree_node_is_extents(b)) {
/* The insert key might split an existing key
* (bch2_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case:
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index a2def9054c14..fa78c0b1826e 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -88,6 +88,9 @@ struct bkey_i *bch2_journal_find_btree_root(struct bch_fs *c, struct jset *j,
if (!entry)
return NULL;
+ if (!entry->u64s)
+ return ERR_PTR(-EINVAL);
+
k = entry->start;
*level = entry->level;
*level = entry->level;
@@ -415,6 +418,7 @@ static struct nonce journal_nonce(const struct jset *jset)
}};
}
+/* this fills in a range with empty jset_entries: */
static void journal_entry_null_range(void *start, void *end)
{
struct jset_entry *entry;
@@ -423,7 +427,7 @@ static void journal_entry_null_range(void *start, void *end)
memset(entry, 0, sizeof(*entry));
}
-static int journal_validate_key(struct bch_fs *c, struct jset *j,
+static int journal_validate_key(struct bch_fs *c, struct jset *jset,
struct jset_entry *entry,
struct bkey_i *k, enum bkey_type key_type,
const char *type)
@@ -458,7 +462,7 @@ static int journal_validate_key(struct bch_fs *c, struct jset *j,
return 0;
}
- if (JSET_BIG_ENDIAN(j) != CPU_BIG_ENDIAN)
+ if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN)
bch2_bkey_swab(key_type, NULL, bkey_to_packed(k));
invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k));
@@ -497,26 +501,27 @@ fsck_err:
#define journal_entry_err_on(cond, c, msg, ...) \
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
-static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j,
+static int journal_entry_validate_entries(struct bch_fs *c, struct jset *jset,
int write)
{
struct jset_entry *entry;
int ret = 0;
- vstruct_for_each(j, entry) {
+ vstruct_for_each(jset, entry) {
+ void *next = vstruct_next(entry);
struct bkey_i *k;
if (journal_entry_err_on(vstruct_next(entry) >
- vstruct_last(j), c,
+ vstruct_last(jset), c,
"journal entry extends past end of jset")) {
- j->u64s = cpu_to_le32((u64 *) entry - j->_data);
+ jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
break;
}
switch (entry->type) {
case JOURNAL_ENTRY_BTREE_KEYS:
vstruct_for_each(entry, k) {
- ret = journal_validate_key(c, j, entry, k,
+ ret = journal_validate_key(c, jset, entry, k,
bkey_type(entry->level,
entry->btree_id),
"key");
@@ -531,12 +536,17 @@ static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j,
if (journal_entry_err_on(!entry->u64s ||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
"invalid btree root journal entry: wrong number of keys")) {
- journal_entry_null_range(entry,
- vstruct_next(entry));
+ /*
+ * we don't want to null out this jset_entry,
+ * just the contents, so that later we can tell
+ * we were _supposed_ to have a btree root
+ */
+ entry->u64s = 0;
+ journal_entry_null_range(vstruct_next(entry), next);
continue;
}
- ret = journal_validate_key(c, j, entry, k,
+ ret = journal_validate_key(c, jset, entry, k,
BKEY_TYPE_BTREE, "btree root");
if (ret)
goto fsck_err;
@@ -566,21 +576,21 @@ fsck_err:
}
static int journal_entry_validate(struct bch_fs *c,
- struct jset *j, u64 sector,
+ struct jset *jset, u64 sector,
unsigned bucket_sectors_left,
unsigned sectors_read,
int write)
{
- size_t bytes = vstruct_bytes(j);
+ size_t bytes = vstruct_bytes(jset);
struct bch_csum csum;
int ret = 0;
- if (le64_to_cpu(j->magic) != jset_magic(c))
+ if (le64_to_cpu(jset->magic) != jset_magic(c))
return JOURNAL_ENTRY_NONE;
- if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
+ if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) {
bch_err(c, "unknown journal entry version %u",
- le32_to_cpu(j->version));
+ le32_to_cpu(jset->version));
return BCH_FSCK_UNKNOWN_VERSION;
}
@@ -594,26 +604,26 @@ static int journal_entry_validate(struct bch_fs *c,
if (bytes > sectors_read << 9)
return JOURNAL_ENTRY_REREAD;
- if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
+ if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
"journal entry with unknown csum type %llu sector %lluu",
- JSET_CSUM_TYPE(j), sector))
+ JSET_CSUM_TYPE(jset), sector))
return JOURNAL_ENTRY_BAD;
- csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
- if (journal_entry_err_on(bch2_crc_cmp(csum, j->csum), c,
+ csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
+ if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
"journal checksum bad, sector %llu", sector)) {
/* XXX: retry IO, when we start retrying checksum errors */
/* XXX: note we might have missing journal entries */
return JOURNAL_ENTRY_BAD;
}
- bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
- j->encrypted_start,
- vstruct_end(j) - (void *) j->encrypted_start);
+ bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
+ jset->encrypted_start,
+ vstruct_end(jset) - (void *) jset->encrypted_start);
- if (journal_entry_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
+ if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
"invalid journal entry: last_seq > seq"))
- j->last_seq = j->seq;
+ jset->last_seq = jset->seq;
return 0;
fsck_err:
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index aadfca18c7ba..29ffba65cfc5 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -676,13 +676,10 @@ static const char *__bch2_fs_start(struct bch_fs *c)
struct bch_dev *ca;
LIST_HEAD(journal);
struct jset *j;
- struct closure cl;
time64_t now;
unsigned i;
int ret = -EINVAL;
- closure_init_stack(&cl);
-
mutex_lock(&c->state_lock);
BUG_ON(c->state != BCH_FS_STARTING);
@@ -710,14 +707,14 @@ static const char *__bch2_fs_start(struct bch_fs *c)
unsigned level;
struct bkey_i *k;
- err = "missing btree root";
k = bch2_journal_find_btree_root(c, j, i, &level);
- if (!k && i < BTREE_ID_ALLOC)
- goto err;
-
if (!k)
continue;
+ err = "invalid btree root pointer";
+ if (IS_ERR(k))
+ goto err;
+
err = "error reading btree root";
if (bch2_btree_root_read(c, i, k, level)) {
if (i != BTREE_ID_ALLOC)
@@ -727,6 +724,10 @@ static const char *__bch2_fs_start(struct bch_fs *c)
}
}
+ for (i = 0; i < BTREE_ID_NR; i++)
+ if (!c->btree_roots[i].b)
+ bch2_btree_root_alloc(c, i);
+
err = "error reading allocation information";
ret = bch2_alloc_read(c, &journal);
if (ret)
@@ -744,14 +745,6 @@ static const char *__bch2_fs_start(struct bch_fs *c)
if (c->opts.noreplay)
goto recovery_done;
- err = "cannot allocate new btree root";
- for (i = 0; i < BTREE_ID_NR; i++)
- if (!c->btree_roots[i].b &&
- bch2_btree_root_alloc(c, i, &cl))
- goto err;
-
- closure_sync(&cl);
-
/*
* bch2_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
@@ -807,13 +800,11 @@ static const char *__bch2_fs_start(struct bch_fs *c)
goto err;
}
- err = "cannot allocate new btree root";
- for (i = 0; i < BTREE_ID_NR; i++)
- if (bch2_btree_root_alloc(c, i, &cl))
- goto err;
-
clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
+ for (i = 0; i < BTREE_ID_NR; i++)
+ bch2_btree_root_alloc(c, i);
+
/*
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
@@ -825,9 +816,6 @@ static const char *__bch2_fs_start(struct bch_fs *c)
if (bch2_fs_allocator_start(c))
goto err;
- /* Wait for new btree roots to be written: */
- closure_sync(&cl);
-
bch2_inode_init(c, &inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
inode.bi_inum = BCACHEFS_ROOT_INO;
@@ -883,8 +871,6 @@ out:
return err;
err:
fsck_err:
- closure_sync(&cl);
-
switch (ret) {
case BCH_FSCK_ERRORS_NOT_FIXED:
bch_err(c, "filesystem contains errors: please report this to the developers");