diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/backpointers.c | 49 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 28 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter.c | 139 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter.h | 36 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter_types.h | 29 | ||||
-rw-r--r-- | fs/bcachefs/btree_trans_commit.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/darray.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/disk_accounting.c | 72 | ||||
-rw-r--r-- | fs/bcachefs/extent_update.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.h | 23 | ||||
-rw-r--r-- | fs/bcachefs/journal_reclaim.c | 14 | ||||
-rw-r--r-- | fs/bcachefs/journal_types.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 39 | ||||
-rw-r--r-- | fs/bcachefs/recovery_passes_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/sb-counters_format.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 51 | ||||
-rw-r--r-- | fs/bcachefs/super-io.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 6 |
25 files changed, 334 insertions, 221 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index c43aaab4c108..cb25cddb759b 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -532,10 +532,6 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; CLASS(printbuf, buf)(); - if (bpos_lt(bp->k.p, s->bp_start) || - bpos_gt(bp->k.p, s->bp_end)) - return 0; - CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter); int ret = bkey_err(bp_k); @@ -690,6 +686,10 @@ static int check_extent_to_backpointers(struct btree_trans *trans, struct bkey_i_backpointer bp; bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); + if (bpos_lt(bp.k.p, s->bp_start) || + bpos_gt(bp.k.p, s->bp_end)) + continue; + int ret = !empty ? check_bp_exists(trans, s, &bp, k) : bch2_bucket_backpointer_mod(trans, k, &bp, true); @@ -809,8 +809,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, for (enum btree_id btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - /* btree_type_has_ptrs should probably include BTREE_ID_stripes, - * definitely her... */ int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, @@ -899,7 +897,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && + if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointer_bucket_gen && (bp.v->bucket_gen != a->gen || bp.v->pad)) { ret = bch2_backpointer_del(trans, bp_k.k->p); @@ -931,6 +929,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (sectors[ALLOC_dirty] != a->dirty_sectors || sectors[ALLOC_cached] != a->cached_sectors || sectors[ALLOC_stripe] != a->stripe_sectors) { + /* + * Post 1.14 upgrade, we assume that backpointers are mostly + * correct and a sector count mismatch is probably due to a + * write buffer race + * + * Pre upgrade, we expect all the buckets to be wrong, a write + * buffer flush is pointless: + */ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); if (ret) @@ -978,12 +984,22 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) goto next; struct bpos bucket = bp_pos_to_bucket(ca, pos); - u64 next = ca->mi.nbuckets; - - unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); - if (bitmap) - next = min_t(u64, next, - find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset)); + u64 next = min(bucket.offset, ca->mi.nbuckets); + + unsigned long *mismatch = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); + unsigned long *empty = READ_ONCE(ca->bucket_backpointer_empty.buckets); + /* + * Find the first bucket with mismatches - but + * not empty buckets; we don't need to pin those + * because we just recreate all backpointers in + * those buckets + */ + if (mismatch && empty) + next = find_next_andnot_bit(mismatch, empty, ca->mi.nbuckets, next); + else if (mismatch) + next = find_next_bit(mismatch, ca->mi.nbuckets, next); + else + next = ca->mi.nbuckets; bucket.offset = next; if (bucket.offset == ca->mi.nbuckets) @@ -1110,17 +1126,18 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (ret) goto err; - u64 nr_buckets = 0, nr_mismatches = 0; + u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; for_each_member_device(c, ca) { nr_buckets += ca->mi.nbuckets; nr_mismatches += ca->bucket_backpointer_mismatch.nr; + nr_empty += ca->bucket_backpointer_empty.nr; } if (!nr_mismatches) goto err; - bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", - nr_mismatches, nr_buckets); + bch_info(c, "scanning for missing backpointers in %llu/%llu buckets, %llu buckets with no backpointers", + nr_mismatches - nr_empty, nr_buckets, nr_empty); while (1) { ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ae7d260589d8..43f294284d57 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -356,7 +356,7 @@ again: bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); BUG_ON(bpos_gt(k.k->p, b->data->max_key)); @@ -470,7 +470,7 @@ again: bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { bch2_bkey_buf_reassemble(&cur_k, c, k); bch2_btree_and_journal_iter_advance(&iter); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 276cf088539e..2e3dd9bacac5 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -131,10 +131,10 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; - p = kvmalloc(size, GFP_NOWAIT); + p = kvmalloc(size, GFP_NOWAIT|__GFP_ACCOUNT|__GFP_RECLAIMABLE); if (!p) { *used_mempool = true; - p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); + p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS|__GFP_ACCOUNT|__GFP_RECLAIMABLE); } memalloc_nofs_restore(flags); return p; @@ -1014,6 +1014,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k = bkey_p_next(k); continue; drop_this_key: + ret = 0; next_good_key = k->u64s; if (!next_good_key || @@ -1470,7 +1471,7 @@ start: } prt_newline(&buf); - if (failed.nr) + if (ret || failed.nr) bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); async_object_list_del(c, btree_read_bio, rb->list_idx); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 546b559fe3ce..76f430f93dc1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -650,7 +650,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v; if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); @@ -848,7 +848,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p break; bch2_btree_and_journal_iter_advance(jiter); - k = bch2_btree_and_journal_iter_peek(jiter); + k = bch2_btree_and_journal_iter_peek(c, jiter); if (!k.k) break; @@ -898,7 +898,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos); - k = bch2_btree_and_journal_iter_peek(&jiter); + k = bch2_btree_and_journal_iter_peek(c, &jiter); if (!k.k) { CLASS(printbuf, buf)(); @@ -2120,10 +2120,10 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ } } -static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos search_pos, - struct bpos end_pos) +static const struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos search_pos, + struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2139,7 +2139,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); + const struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); if (k) { iter->k = k->k; @@ -2156,7 +2156,7 @@ void btree_trans_peek_journal(struct btree_trans *trans, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = + const struct bkey_i *next_journal = bch2_btree_journal_peek(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { @@ -2165,10 +2165,10 @@ void btree_trans_peek_journal(struct btree_trans *trans, } } -static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos search_key, - struct bpos end_pos) +static const struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos search_key, + struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2186,7 +2186,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = + const struct bkey_i *next_journal = bch2_btree_journal_peek_prev(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->data->min_key); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 24f2fbe84ad7..f63c349e09da 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -46,21 +46,22 @@ static size_t __bch2_journal_key_search(struct journal_keys *keys, enum btree_id id, unsigned level, struct bpos pos) { + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); size_t l = 0, r = keys->nr, m; while (l < r) { m = l + ((r - l) >> 1); - if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0) + if (__journal_key_cmp(c, id, level, pos, idx_to_key(keys, m)) > 0) l = m + 1; else r = m; } BUG_ON(l < keys->nr && - __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0); + __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l)) > 0); BUG_ON(l && - __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0); + __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l - 1)) <= 0); return l; } @@ -73,9 +74,9 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, } /* Returns first non-overwritten key >= search key: */ -struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) +const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; @@ -87,7 +88,7 @@ search: *idx = __bch2_journal_key_search(keys, btree_id, level, pos); while (*idx && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { --(*idx); iters++; if (iters == 10) { @@ -96,11 +97,11 @@ search: } } - struct bkey_i *ret = NULL; + const struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { - if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) + if (__journal_key_cmp(c, btree_id, level, end_pos, k) < 0) break; if (k->overwritten) { @@ -111,8 +112,8 @@ search: continue; } - if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { - ret = k->k; + if (__journal_key_cmp(c, btree_id, level, pos, k) <= 0) { + ret = journal_key_k(c, k); break; } @@ -129,9 +130,9 @@ search: return ret; } -struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) +const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; @@ -146,7 +147,7 @@ search: *idx = __bch2_journal_key_search(keys, btree_id, level, pos); while (*idx < keys->nr && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { + __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { (*idx)++; iters++; if (iters == 10) { @@ -158,12 +159,12 @@ search: if (*idx == keys->nr) --(*idx); - struct bkey_i *ret = NULL; + const struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ while (true) { k = idx_to_key(keys, *idx); - if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) + if (__journal_key_cmp(c, btree_id, level, end_pos, k) > 0) break; if (k->overwritten) { @@ -175,8 +176,8 @@ search: continue; } - if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { - ret = k->k; + if (__journal_key_cmp(c, btree_id, level, pos, k) >= 0) { + ret = journal_key_k(c, k); break; } @@ -194,8 +195,8 @@ search: return ret; } -struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos) +const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos) { size_t idx = 0; @@ -264,13 +265,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, struct journal_key n = { .btree_id = id, .level = level, - .k = k, .allocated = true, - /* - * Ensure these keys are done last by journal replay, to unblock - * journal reclaim: - */ - .journal_seq = U64_MAX, + .allocated_k = k, }; struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); @@ -278,8 +274,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, BUG_ON(test_bit(BCH_FS_rw, &c->flags)); if (idx < keys->size && - journal_key_cmp(&n, &keys->data[idx]) == 0) { - struct bkey_i *o = keys->data[idx].k; + journal_key_cmp(c, &n, &keys->data[idx]) == 0) { + struct bkey_i *o = journal_key_k(c, &keys->data[idx]); if (k->k.type == KEY_TYPE_accounting && o->k.type == KEY_TYPE_accounting) { @@ -291,7 +287,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, } if (keys->data[idx].allocated) - kfree(keys->data[idx].k); + kfree(keys->data[idx].allocated_k); keys->data[idx] = n; return 0; } @@ -376,17 +372,20 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bpos pos) { - struct journal_keys *keys = &trans->c->journal_keys; + if (!trans->journal_replay_not_finished) + return false; + + struct bch_fs *c = trans->c; + struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (!trans->journal_replay_not_finished) + if (idx >= keys->size || + keys->data[idx].btree_id != btree || + keys->data[idx].level != level) return false; - return (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - bkey_deleted(&keys->data[idx].k->k)); + struct bkey_i *k = journal_key_k(c, &keys->data[idx]); + return bpos_eq(k->k.p, pos) && bkey_deleted(&k->k); } static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) @@ -457,11 +456,15 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - !keys->data[idx].overwritten) { + if (idx >= keys->size || + keys->data[idx].btree_id != btree || + keys->data[idx].level != level || + keys->data[idx].overwritten) + return; + + struct bkey_i *k = journal_key_k(c, &keys->data[idx]); + + if (bpos_eq(k->k.p, pos)) { guard(mutex)(&keys->overwrite_lock); __bch2_journal_key_overwritten(keys, idx); } @@ -476,7 +479,7 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) } } -static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) +static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_iter *iter) { journal_iter_verify(iter); @@ -490,7 +493,7 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) BUG_ON(cmp); if (!k->overwritten) - return bkey_i_to_s_c(k->k); + return bkey_i_to_s_c(journal_key_k(c, k)); if (k->overwritten_range) iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); @@ -554,7 +557,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter while (nr--) { bch2_btree_and_journal_iter_advance(&iter); - struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter); + struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &iter); if (!k.k) break; @@ -565,7 +568,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter bch2_bkey_buf_exit(&tmp, c); } -struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) +struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *c, struct btree_and_journal_iter *iter) { struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; size_t iters = 0; @@ -586,7 +589,7 @@ again: bch2_journal_iter_advance_btree(iter); if (iter->trans->journal_replay_not_finished) - while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && + while ((journal_k = bch2_journal_iter_peek(c, &iter->journal)).k && bpos_lt(journal_k.k->p, iter->pos)) bch2_journal_iter_advance(&iter->journal); @@ -658,15 +661,22 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, /* * When keys compare equal, oldest compares first: */ -static int journal_sort_key_cmp(const void *_l, const void *_r) +static int journal_sort_key_cmp(const void *_l, const void *_r, const void *priv) { + struct bch_fs *c = (void *) priv; const struct journal_key *l = _l; const struct journal_key *r = _r; int rewind = l->rewind && r->rewind ? -1 : 1; - return journal_key_cmp(l, r) ?: - ((cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset)) * rewind); + int cmp = journal_key_cmp(c, l, r); + if (cmp) + return cmp; + + if (l->allocated || r->allocated) + return cmp_int(l->allocated, r->allocated); + + return ((cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -687,7 +697,7 @@ void bch2_journal_keys_put(struct bch_fs *c) kfree(i->overwritten_range); if (i->allocated) - kfree(i->k); + kfree(i->allocated_k); } kvfree(keys->data); @@ -704,8 +714,10 @@ void bch2_journal_keys_put(struct bch_fs *c) static void __journal_keys_sort(struct journal_keys *keys) { - sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), - journal_sort_key_cmp, NULL); + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + + sort_r_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), + journal_sort_key_cmp, NULL, c); cond_resched(); @@ -717,9 +729,10 @@ static void __journal_keys_sort(struct journal_keys *keys) * compare each individual accounting key against the version in * the btree during replay: */ - if (src->k->k.type != KEY_TYPE_accounting && + struct bkey_i *k = journal_key_k(c, src); + if (k->k.type != KEY_TYPE_accounting && src + 1 < &darray_top(*keys) && - !journal_key_cmp(src, src + 1)) + !journal_key_cmp(c, src, src + 1)) continue; *dst++ = *src; @@ -763,7 +776,6 @@ int bch2_journal_keys_sort(struct bch_fs *c) .btree_id = entry->btree_id, .level = entry->level, .rewind = rewind, - .k = k, .journal_seq = le64_to_cpu(i->j.seq), .journal_offset = k->_data - i->j._data, }; @@ -801,13 +813,18 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, move_gap(keys, keys->nr); - darray_for_each(*keys, i) + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (!(i->btree_id == btree && i->level >= level_min && i->level <= level_max && - bpos_ge(i->k->k.p, start) && - bpos_le(i->k->k.p, end))) + bpos_ge(k->k.p, start) && + bpos_le(k->k.p, end))) keys->data[dst++] = *i; + else if (i->allocated) + kfree(i->allocated_k); + } keys->nr = keys->gap = dst; } @@ -825,7 +842,7 @@ void bch2_journal_keys_dump(struct bch_fs *c) prt_printf(&buf, "btree="); bch2_btree_id_to_text(&buf, i->btree_id); prt_printf(&buf, " l=%u ", i->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(journal_key_k(c, i))); pr_err("%s", buf.buf); } } diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index 2a3082919b8d..cfd2061bc966 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -29,6 +29,23 @@ struct btree_and_journal_iter { bool fail_if_too_many_whiteouts; }; +static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) +{ + return (seq - c->journal_entries_base_seq) & (~0U >> 1); +} + +static inline struct bkey_i *journal_key_k(struct bch_fs *c, + const struct journal_key *k) +{ + if (k->allocated) + return k->allocated_k; + + struct journal_replay *i = + *genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, k->journal_seq)); + + return (struct bkey_i *) (i->j._data + k->journal_offset); +} + static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, unsigned l_level, const struct journal_key *r) @@ -37,25 +54,28 @@ static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, cmp_int(l_btree_id, r->btree_id); } -static inline int __journal_key_cmp(enum btree_id l_btree_id, +static inline int __journal_key_cmp(struct bch_fs *c, + enum btree_id l_btree_id, unsigned l_level, struct bpos l_pos, const struct journal_key *r) { return __journal_key_btree_cmp(l_btree_id, l_level, r) ?: - bpos_cmp(l_pos, r->k->k.p); + bpos_cmp(l_pos, journal_key_k(c, r)->k.p); } -static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) +static inline int journal_key_cmp(struct bch_fs *c, + const struct journal_key *l, const struct journal_key *r) { - return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); + return __journal_key_cmp(c, l->btree_id, l->level, + journal_key_k(c, l)->k.p, r); } -struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); -struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); -struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, unsigned, struct bpos); int bch2_btree_and_journal_iter_prefetch(struct btree_trans *, struct btree_path *, @@ -71,7 +91,7 @@ bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); -struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); +struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *, struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *); void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *, diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 86aacb254fb2..e9d8628edec6 100644 --- a/fs/bcachefs/btree_journal_iter_types.h +++ b/fs/bcachefs/btree_journal_iter_types.h @@ -2,12 +2,38 @@ #ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H #define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H +struct journal_ptr { + bool csum_good; + struct bch_csum csum; + u8 dev; + u32 bucket; + u32 bucket_offset; + u64 sector; +}; + +/* + * Only used for holding the journal entries we read in btree_journal_read() + * during cache_registration + */ +struct journal_replay { + DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs; + + bool csum_good; + bool ignore_blacklisted; + bool ignore_not_dirty; + /* must be last: */ + struct jset j; +}; + struct journal_key_range_overwritten { size_t start, end; }; struct journal_key { - u64 journal_seq; + union { + u64 journal_seq; + struct bkey_i *allocated_k; + }; u32 journal_offset; enum btree_id btree_id:8; unsigned level:8; @@ -16,7 +42,6 @@ struct journal_key { bool rewind:1; struct journal_key_range_overwritten __rcu * overwritten_range; - struct bkey_i *k; }; struct journal_keys { diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 4d58bdb233e9..5fa7f2f9f1e9 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -54,7 +54,7 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u); if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); if (j_k) diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 053a837cf241..b70eb095a37e 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -403,7 +403,7 @@ __btree_trans_update_by_path(struct btree_trans *trans, i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); if (j_k) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 65ca54c5b0ff..a9877a47bfc6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -95,7 +95,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) if (!b->c.level) goto out; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { if (k.k->type != KEY_TYPE_btree_ptr_v2) goto out; diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 4080ee99aadd..14c7fc7c8061 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -107,8 +107,11 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); #define __darray_for_each(_d, _i) \ for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++) +#define darray_for_each_from(_d, _i, _start) \ + for (typeof(&(_d).data[0]) _i = _start; _i < (_d).data + (_d).nr; _i++) + #define darray_for_each(_d, _i) \ - for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) + darray_for_each_from(_d, _i, (_d).data) #define darray_for_each_reverse(_d, _i) \ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 5944ad6d0f8d..5ec57b710501 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -765,75 +765,77 @@ int bch2_accounting_read(struct bch_fs *c) iter.flags &= ~BTREE_ITER_with_journal; int ret = for_each_btree_key_continue(trans, iter, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); - if (k.k->type != KEY_TYPE_accounting) - continue; + if (k.k->type != KEY_TYPE_accounting) + continue; - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) - break; + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + break; - if (!bch2_accounting_is_mem(&acc_k)) { - struct disk_accounting_pos next; - memset(&next, 0, sizeof(next)); - next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; - } + if (!bch2_accounting_is_mem(&acc_k)) { + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; + bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + continue; + } - accounting_read_key(trans, k); - })); + accounting_read_key(trans, k); + })); bch2_trans_iter_exit(&iter); if (ret) return ret; struct journal_keys *keys = &c->journal_keys; - struct journal_key *dst = keys->data; move_gap(keys, keys->nr); darray_for_each(*keys, i) { - if (i->k->k.type == KEY_TYPE_accounting) { + if (i->overwritten) + continue; + + struct bkey_i *k = journal_key_k(c, i); + + if (k->k.type == KEY_TYPE_accounting) { struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); + bpos_to_disk_accounting_pos(&acc_k, k->k.p); if (!bch2_accounting_is_mem(&acc_k)) continue; - struct bkey_s_c k = bkey_i_to_s_c(i->k); unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &k.k->p); + accounting_pos_cmp, &k->k.p); bool applied = idx < acc->k.nr && - bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0; + bversion_cmp(acc->k.data[idx].bversion, k->k.bversion) >= 0; if (applied) continue; - if (i + 1 < &darray_top(*keys) && - i[1].k->k.type == KEY_TYPE_accounting && - !journal_key_cmp(i, i + 1)) { - WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); + darray_for_each_from(*keys, j, i + 1) { + if (journal_key_cmp(c, i, j)) + break; - i[1].journal_seq = i[0].journal_seq; + struct bkey_i *n = journal_key_k(c, j); + if (n->k.type == KEY_TYPE_accounting) { + WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0); - bch2_accounting_accumulate(bkey_i_to_accounting(i[1].k), - bkey_s_c_to_accounting(k)); - continue; + bch2_accounting_accumulate(bkey_i_to_accounting(k), + bkey_i_to_s_c_accounting(n)); + j->overwritten = true; + } } - ret = accounting_read_key(trans, k); + ret = accounting_read_key(trans, bkey_i_to_s_c(k)); if (ret) return ret; } - - *dst++ = *i; } - keys->gap = keys->nr = dst - keys->data; guard(percpu_write)(&c->mark_lock); diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 7ddb156c765c..73eb28090bc7 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -115,9 +115,15 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, copy.flags |= BTREE_ITER_nofilter_whiteouts; + /* + * We're doing our own whiteout filtering, but we still need to pass a + * max key to avoid popping an assert in bch2_snapshot_is_ancestor(): + */ struct bkey_s_c k; unsigned nr_iters = 0; - for_each_btree_key_continue_norestart(copy, 0, k, ret) { + for_each_btree_key_max_continue_norestart(copy, + POS(insert->k.p.inode, U64_MAX), + 0, k, ret) { unsigned offset = 0; if (bkey_gt(iter->pos, bkey_start_pos(k.k))) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 07869436a964..93ac0faedf7d 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -120,6 +120,7 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) INIT_LIST_HEAD(&p->flushed[i]); atomic_set(&p->count, count); p->devs.nr = 0; + p->bytes = 0; } /* @@ -264,6 +265,11 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t /* Close out old buffer: */ buf->data->u64s = cpu_to_le32(old.cur_entry_offset); + struct journal_entry_pin_list *pin_list = + journal_seq_pin(j, journal_cur_seq(j)); + pin_list->bytes = roundup_pow_of_two(vstruct_bytes(buf->data)); + j->dirty_entry_bytes += pin_list->bytes; + if (trace_journal_entry_close_enabled() && trace) { CLASS(printbuf, err)(); guard(printbuf_atomic)(&err); @@ -737,9 +743,9 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, return ret; CLASS(printbuf, buf)(); + prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); bch2_journal_debug_to_text(&buf, j); bch2_print_str(c, KERN_ERR, buf.buf); - prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); closure_wait_event(&j->async_wait, !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 093e4acad085..6e8a89a0f244 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "btree_io.h" +#include "btree_journal_iter.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "buckets.h" @@ -106,11 +107,6 @@ static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *cs return !bch2_crc_cmp(j->csum, *csum); } -static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) -{ - return (seq - c->journal_entries_base_seq) & (~0U >> 1); -} - static void __journal_replay_free(struct bch_fs *c, struct journal_replay *i) { diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h index f53c5c81d137..f8754bf71264 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -7,29 +7,6 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *); void bch2_journal_pos_from_member_info_resume(struct bch_fs *); -struct journal_ptr { - bool csum_good; - struct bch_csum csum; - u8 dev; - u32 bucket; - u32 bucket_offset; - u64 sector; -}; - -/* - * Only used for holding the journal entries we read in btree_journal_read() - * during cache_registration - */ -struct journal_replay { - DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs; - - bool csum_good; - bool ignore_blacklisted; - bool ignore_not_dirty; - /* must be last: */ - struct jset j; -}; - static inline bool journal_replay_ignore(struct journal_replay *i) { return !i || i->ignore_blacklisted || i->ignore_not_dirty; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index f23e5ee9ad75..6400a63ed79b 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -148,6 +148,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); + ssize_t mem_limit = max_t(ssize_t, 0, + (totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes); + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { if (!ca->journal.nr || !ca->mi.durability) @@ -180,6 +183,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne * @nr_devs_want largest devices: */ space = dev_space[nr_devs_want - 1]; + space.total = min(space.total, mem_limit >> 9); space.next_entry = min(space.next_entry, min_bucket_size); return space; } @@ -328,9 +332,17 @@ void bch2_journal_reclaim_fast(struct journal *j) * Unpin journal entries whose reference counts reached zero, meaning * all btree nodes got written out */ + struct journal_entry_pin_list *pin_list; while (!fifo_empty(&j->pin) && j->pin.front <= j->seq_ondisk && - !atomic_read(&fifo_peek_front(&j->pin).count)) { + !atomic_read(&(pin_list = &fifo_peek_front(&j->pin))->count)) { + + if (WARN_ON(j->dirty_entry_bytes < pin_list->bytes)) + pin_list->bytes = j->dirty_entry_bytes; + + j->dirty_entry_bytes -= pin_list->bytes; + pin_list->bytes = 0; + j->pin.front++; popped = true; } diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 51104bbb99da..7c9273bd0e15 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -71,6 +71,7 @@ struct journal_entry_pin_list { struct list_head flushed[JOURNAL_PIN_TYPE_NR]; atomic_t count; struct bch_devs_list devs; + size_t bytes; }; struct journal; @@ -253,6 +254,7 @@ struct journal { u64 front, back, size, mask; struct journal_entry_pin_list *data; } pin; + size_t dirty_entry_bytes; struct journal_space space[journal_space_nr]; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 84ce69a7f131..31a3abcbd83e 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -242,7 +242,7 @@ enum fsck_err_opts { x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - BCH_SB_INODE_32BIT, true, \ + BCH_SB_INODE_32BIT, false, \ NULL, "Constrain inode numbers to 32 bits") \ x(shard_inode_numbers_bits, u8, \ OPT_FS|OPT_FORMAT, \ @@ -321,6 +321,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Don't kick drives out when splitbrain detected")\ + x(no_version_check, u8, \ + OPT_HIDDEN, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Don't fail reading the superblock due to incompatible version")\ x(verbose, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 29e81f96db0f..0117405e51ef 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -64,7 +64,6 @@ int bch2_btree_lost_data(struct bch_fs *c, * but in debug mode we want the next fsck run to be clean: */ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); @@ -182,9 +181,12 @@ void bch2_reconstruct_alloc(struct bch_fs *c) */ static void zero_out_btree_mem_ptr(struct journal_keys *keys) { - darray_for_each(*keys, i) - if (i->k->k.type == KEY_TYPE_btree_ptr_v2) - bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (k->k.type == KEY_TYPE_btree_ptr_v2) + bkey_i_to_btree_ptr_v2(k)->v.mem_ptr = 0; + } } /* journal replay: */ @@ -202,8 +204,9 @@ static void replay_now_at(struct journal *j, u64 seq) static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct journal_key *k) { + struct bkey_i *bk = journal_key_k(trans->c, k); struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, BTREE_ITER_intent); int ret = bch2_btree_iter_traverse(&iter); @@ -214,14 +217,14 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); /* Has this delta already been applied to the btree? */ - if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) { + if (bversion_cmp(old.k->bversion, bk->k.bversion) >= 0) { ret = 0; goto out; } - struct bkey_i *new = k->k; + struct bkey_i *new = bk; if (old.k->type == KEY_TYPE_accounting) { - new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k)); + new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(bk)); ret = PTR_ERR_OR_ZERO(new); if (ret) goto out; @@ -266,7 +269,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, else update_flags |= BTREE_UPDATE_key_cache_reclaim; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + struct bkey_i *bk = journal_key_k(trans->c, k); + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, iter_flags); ret = bch2_btree_iter_traverse(&iter); @@ -281,7 +285,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, prt_str(&buf, "btree="); bch2_btree_id_to_text(&buf, k->btree_id); prt_printf(&buf, " level=%u ", k->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(bk)); if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { @@ -298,7 +302,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, } bch2_trans_iter_exit(&iter); - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(&iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: @@ -310,17 +314,17 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; - if (k->k->k.type == KEY_TYPE_accounting) { - struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s); + if (bk->k.type == KEY_TYPE_accounting) { + struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, bk->k.u64s); ret = PTR_ERR_OR_ZERO(n); if (ret) goto out; - bkey_copy(n, k->k); + bkey_copy(n, bk); goto out; } - ret = bch2_trans_update(trans, &iter, k->k, update_flags); + ret = bch2_trans_update(trans, &iter, bk, update_flags); out: bch2_trans_iter_exit(&iter); return ret; @@ -369,7 +373,9 @@ int bch2_journal_replay(struct bch_fs *c) * flush accounting keys until we're done */ darray_for_each(*keys, k) { - if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated)) + struct bkey_i *bk = journal_key_k(trans->c, k); + + if (!(bk->k.type == KEY_TYPE_accounting && !k->allocated)) continue; cond_resched(); @@ -412,7 +418,6 @@ int bch2_journal_replay(struct bch_fs *c) BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting); if (ret) { ret = darray_push(&keys_sorted, k); if (ret) diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h index b63c20558d3d..2696eee00345 100644 --- a/fs/bcachefs/recovery_passes_format.h +++ b/fs/bcachefs/recovery_passes_format.h @@ -37,7 +37,7 @@ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \ - x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ + x(check_backpointers_to_extents, 13, PASS_ONLINE) \ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index 44bc12573a0c..96ad64920810 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -22,7 +22,7 @@ enum counters_flags { x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ - x(io_read_fail_and_poison, 82, TYPE_COUNTER) \ + x(io_read_fail_and_poison, 95, TYPE_COUNTER) \ x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ @@ -124,4 +124,13 @@ struct bch_sb_field_counters { __le64 d[]; }; +static inline void __maybe_unused check_bch_counter_ids_unique(void) { + switch(0){ +#define x(t, n, ...) case (n): + BCH_PERSISTENT_COUNTERS() +#undef x + ; + } +} + #endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 369465a4de77..5897380c4c08 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -379,7 +379,7 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) return 0; } -int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, +int bch2_sb_validate(struct bch_sb *sb, struct bch_opts *opts, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { enum bch_opt_id opt_id; @@ -389,28 +389,30 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, if (ret) return ret; - u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); - unsigned incompat_bit = 0; - if (incompat) - incompat_bit = __ffs64(incompat); - else if (sb->features[1]) - incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); - - if (incompat_bit) { - prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", - incompat_bit, - bch2_sb_features[BCH_FEATURE_NR - 1], - BCH_FEATURE_NR - 1); - return -BCH_ERR_invalid_sb_features; - } + if (!opts->no_version_check) { + u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); + unsigned incompat_bit = 0; + if (incompat) + incompat_bit = __ffs64(incompat); + else if (sb->features[1]) + incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); + + if (incompat_bit) { + prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", + incompat_bit, + bch2_sb_features[BCH_FEATURE_NR - 1], + BCH_FEATURE_NR - 1); + return -BCH_ERR_invalid_sb_features; + } - if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || - BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { - prt_str(out, "Filesystem has incompatible version "); - bch2_version_to_text(out, le16_to_cpu(sb->version)); - prt_str(out, ", current version "); - bch2_version_to_text(out, bcachefs_metadata_version_current); - return -BCH_ERR_invalid_sb_features; + if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || + BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { + prt_str(out, "Filesystem has incompatible version "); + bch2_version_to_text(out, le16_to_cpu(sb->version)); + prt_str(out, ", current version "); + bch2_version_to_text(out, bcachefs_metadata_version_current); + return -BCH_ERR_invalid_sb_features; + } } if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { @@ -915,7 +917,7 @@ got_super: sb->have_layout = true; - ret = bch2_sb_validate(sb->sb, offset, 0, &err); + ret = bch2_sb_validate(sb->sb, opts, offset, 0, &err); if (ret) { bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); @@ -1081,9 +1083,10 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_from_fs(c, (*ca)); darray_for_each(online_devices, ca) { + struct bch_opts opts = bch2_opts_empty(); printbuf_reset(&err); - ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err); + ret = bch2_sb_validate((*ca)->disk_sb.sb, &opts, 0, BCH_VALIDATE_write, &err); if (ret) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); goto out; diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index a3b7a90f2533..82cb3a3ceeae 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -92,7 +92,8 @@ int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *); void bch2_free_super(struct bch_sb_handle *); int bch2_sb_realloc(struct bch_sb_handle *, unsigned); -int bch2_sb_validate(struct bch_sb *, u64, enum bch_validate_flags, struct printbuf *); +int bch2_sb_validate(struct bch_sb *, struct bch_opts *, u64, + enum bch_validate_flags, struct printbuf *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_read_super_silent(const char *, struct bch_opts *, struct bch_sb_handle *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 09e7f8ae9922..ee3b30b1c2b5 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1021,6 +1021,12 @@ static int bch2_fs_opt_version_init(struct bch_fs *c) prt_bitflags(&p, bch2_recovery_passes, sb_passes); } + u64 btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); + if (btrees_lost_data) { + prt_str(&p, "\nsuperblock indicates damage to following btrees:\n "); + prt_bitflags(&p, __bch2_btree_ids, btrees_lost_data); + } + if (bch2_check_version_downgrade(c)) { prt_str(&p, "\nVersion downgrade required:"); |