diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/btree_cache.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 38 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.c | 16 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/lru.h | 10 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 188 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 193 | ||||
-rw-r--r-- | fs/bcachefs/sb-counters_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/sb-errors_format.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/sb-members.c | 116 | ||||
-rw-r--r-- | fs/bcachefs/sb-members.h | 25 | ||||
-rw-r--r-- | fs/bcachefs/snapshot.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 314 |
17 files changed, 550 insertions, 389 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 25b01e750880..9261ad043564 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -215,7 +215,7 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) struct btree_cache *bc = &c->btree_cache; guard(mutex)(&bc->lock); - if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { + if (!btree_node_is_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); bc->live[0].nr--; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 649e9dfd178a..035b2cb25077 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -144,6 +144,14 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) return r ? r->b : NULL; } +static inline bool btree_node_is_root(struct bch_fs *c, struct btree *b) +{ + struct btree *root = btree_node_root(c, b); + + BUG_ON(b != root && b->c.level >= root->c.level); + return b == root; +} + const char *bch2_btree_id_str(enum btree_id); /* avoid */ void bch2_btree_id_to_text(struct printbuf *, enum btree_id); void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ce3c7750a922..6b91649688da 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -282,6 +282,41 @@ fsck_err: return ret; } +static int btree_check_root_boundaries(struct btree_trans *trans, struct btree *b) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + int ret = 0; + + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, + b->data->min_key)); + + prt_str(&buf, " at "); + bch2_btree_pos_to_text(&buf, c, b); + + if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), + trans, btree_node_topology_bad_root_min_key, + "btree root with incorrect min_key%s", buf.buf)) { + ret = set_node_min(c, b, POS_MIN); + if (ret) + goto err; + } + + if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), + trans, btree_node_topology_bad_root_max_key, + "btree root with incorrect min_key%s", buf.buf)) { + ret = set_node_max(c, b, SPOS_MAX); + if (ret) + goto err; + } + +err: +fsck_err: + printbuf_exit(&buf); + return ret; +} + static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, struct btree *child, struct bpos *pulled_from_scan) { @@ -586,7 +621,8 @@ recover: struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); - ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); + ret = btree_check_root_boundaries(trans, b) ?: + bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); six_unlock_read(&b->c.lock); if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 4c764369ea7b..f59f018fe0d8 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -12,6 +12,7 @@ #include "extents.h" #include "keylist.h" #include "snapshot.h" +#include "super-io.h" #include "trace.h" #include <linux/string_helpers.h> @@ -158,7 +159,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, return ret; } -static inline enum bch_bkey_type extent_whiteout_type(enum btree_id btree, const struct bkey *k) +static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, const struct bkey *k) { /* * KEY_TYPE_extent_whiteout indicates that there isn't a real extent @@ -166,7 +167,9 @@ static inline enum bch_bkey_type extent_whiteout_type(enum btree_id btree, const * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are * monotonically increasing */ - return btree_id_is_extents_snapshots(btree) && bkey_deleted(k) + return btree_id_is_extents_snapshots(btree) && + bkey_deleted(k) && + !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) ? KEY_TYPE_extent_whiteout : KEY_TYPE_whiteout; } @@ -238,12 +241,13 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, update->k.p.snapshot = new.k->p.snapshot; if (btree_type_has_snapshots(btree_id)) { - ret = new.k->p.snapshot != old.k->p.snapshot ?: - need_whiteout_for_snapshot(trans, btree_id, update->k.p); + ret = new.k->p.snapshot != old.k->p.snapshot + ? 1 + : need_whiteout_for_snapshot(trans, btree_id, update->k.p); if (ret < 0) return ret; if (ret) - update->k.type = extent_whiteout_type(iter->btree_id, new.k); + update->k.type = extent_whiteout_type(trans->c, iter->btree_id, new.k); } ret = bch2_btree_insert_nonextent(trans, btree_id, update, @@ -309,7 +313,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, bool done = k.k->type != KEY_TYPE_whiteout && bkey_lt(insert->k.p, k.k->p); if (bkey_extent_whiteout(k.k)) { - enum bch_bkey_type whiteout_type = extent_whiteout_type(btree_id, &insert->k); + enum bch_bkey_type whiteout_type = extent_whiteout_type(trans->c, btree_id, &insert->k); if (bkey_le(k.k->p, insert->k.p) && k.k->type != whiteout_type) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 5f4f82967105..76897cf15946 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -66,6 +66,10 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) bkey_init(&prev.k->k); bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); + /* + * Don't use btree_node_is_root(): we're called by btree split, after + * creating a new root but before setting it + */ if (b == btree_node_root(c, b)) { if (!bpos_eq(b->data->min_key, POS_MIN)) { bch2_log_msg_start(c, &buf); @@ -1655,7 +1659,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, int ret = 0; bch2_verify_btree_nr_keys(b); - BUG_ON(!parent && (b != btree_node_root(c, b))); + BUG_ON(!parent && !btree_node_is_root(c, b)); BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1)); ret = bch2_btree_node_check_topology(trans, b); @@ -2527,7 +2531,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (ret) goto err; } else { - BUG_ON(btree_node_root(c, b) != b); + BUG_ON(!btree_node_is_root(c, b)); struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(new_key->k.u64s)); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index cec8b0f47d3d..adc1f9315eab 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -119,6 +119,7 @@ x(ENOENT, ENOENT_not_directory) \ x(ENOENT, ENOENT_directory_dead) \ x(ENOENT, ENOENT_subvolume) \ + x(ENOENT, ENOENT_snapshot) \ x(ENOENT, ENOENT_snapshot_tree) \ x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ x(ENOENT, ENOENT_dev_not_found) \ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3b289f696612..b5e3090f1cb8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -511,8 +511,8 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) struct bch_subvolume subvol; int ret = lockrestart_do(trans, bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: - PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); + bch2_inode_find_by_inum_trans(trans, inum, &inode_u) ?: + PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol))); return ret ? ERR_PTR(ret) : &inode->v; } diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 8abd0aa2083a..6f1e0a7b5db5 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -24,6 +24,16 @@ static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time) return pos; } +static inline struct bpos lru_start(u16 lru_id) +{ + return lru_pos(lru_id, 0, 0); +} + +static inline struct bpos lru_end(u16 lru_id) +{ + return lru_pos(lru_id, U64_MAX, LRU_TIME_MAX); +} + static inline enum bch_lru_type lru_type(struct bkey_s_c l) { u16 lru_id = l.k->p.inode >> 48; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index a38996f5366f..30fe269d531d 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -344,9 +344,13 @@ int bch2_move_extent(struct moving_context *ctxt, if (!data_opts.rewrite_ptrs && !data_opts.extra_replicas && !data_opts.scrub) { - if (data_opts.kill_ptrs) + if (data_opts.kill_ptrs) { + this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size); return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); - return 0; + } else { + this_cpu_add(c->counters[BCH_COUNTER_io_move_noop], k.k->size); + return 0; + } } struct moving_io *io = allocate_dropping_locks(trans, ret, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index b0cbe3c1aab6..f36d60b8fb07 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -14,6 +14,7 @@ #include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" +#include "ec.h" #include "errcode.h" #include "error.h" #include "lru.h" @@ -131,72 +132,153 @@ static bool bucket_in_flight(struct buckets_in_flight *list, return rhashtable_lookup_fast(list->table, &k, bch_move_bucket_params); } +static int try_add_copygc_bucket(struct btree_trans *trans, + struct buckets_in_flight *buckets_in_flight, + struct bpos bucket, u64 lru_time) +{ + struct move_bucket b = { .k.bucket = bucket }; + + int ret = bch2_bucket_is_movable(trans, &b, lru_time); + if (ret <= 0) + return ret; + + if (bucket_in_flight(buckets_in_flight, b.k)) + return 0; + + struct move_bucket *b_i = kmalloc(sizeof(*b_i), GFP_KERNEL); + if (!b_i) + return -ENOMEM; + + *b_i = b; + + ret = darray_push(&buckets_in_flight->to_evacuate, b_i); + if (ret) { + kfree(b_i); + return ret; + } + + ret = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash, + bch_move_bucket_params); + BUG_ON(ret); + + size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4); + return buckets_in_flight->to_evacuate.nr >= nr_to_get; +} + static int bch2_copygc_get_buckets(struct moving_context *ctxt, struct buckets_in_flight *buckets_in_flight) { struct btree_trans *trans = ctxt->trans; - struct bch_fs *c = trans->c; - size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4); - size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; - int ret; - move_buckets_wait(ctxt, buckets_in_flight, false); + int ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, + lru_start(BCH_LRU_BUCKET_FRAGMENTATION), + lru_end(BCH_LRU_BUCKET_FRAGMENTATION), + 0, k, + try_add_copygc_bucket(trans, buckets_in_flight, + u64_to_bucket(k.k->p.offset), + lru_pos_time(k.k->p)) + ); - ret = bch2_btree_write_buffer_tryflush(trans); - if (bch2_err_matches(ret, EROFS)) - return ret; + return ret < 0 ? ret : 0; +} - if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret))) - return ret; +static int bch2_copygc_get_stripe_buckets(struct moving_context *ctxt, + struct buckets_in_flight *buckets_in_flight) +{ + struct btree_trans *trans = ctxt->trans; - ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, - lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, 0, 0), - lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, U64_MAX, LRU_TIME_MAX), - 0, k, ({ - struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; - int ret2 = 0; + int ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, + lru_start(BCH_LRU_STRIPE_FRAGMENTATION), + lru_end(BCH_LRU_STRIPE_FRAGMENTATION), + 0, lru_k, ({ + CLASS(btree_iter, s_iter)(trans, BTREE_ID_stripes, POS(0, lru_k.k->p.offset), 0); + struct bkey_s_c s_k = bch2_btree_iter_peek_slot(&s_iter); + int ret2 = bkey_err(s_k); + if (ret2) + goto err; - saw++; + if (s_k.k->type != KEY_TYPE_stripe) + continue; - ret2 = bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)); - if (ret2 < 0) - goto err; + const struct bch_stripe *s = bkey_s_c_to_stripe(s_k).v; - if (!ret2) - not_movable++; - else if (bucket_in_flight(buckets_in_flight, b.k)) - in_flight++; - else { - struct move_bucket *b_i = kmalloc(sizeof(*b_i), GFP_KERNEL); - ret2 = b_i ? 0 : -ENOMEM; + /* write buffer race? */ + if (stripe_lru_pos(s) != lru_pos_time(lru_k.k->p)) + continue; + + unsigned nr_data = s->nr_blocks - s->nr_redundant; + for (unsigned i = 0; i < nr_data; i++) { + if (!stripe_blockcount_get(s, i)) + continue; + + const struct bch_extent_ptr *ptr = s->ptrs + i; + CLASS(bch2_dev_tryget, ca)(trans->c, ptr->dev); + if (unlikely(!ca)) + continue; + + ret2 = try_add_copygc_bucket(trans, buckets_in_flight, + PTR_BUCKET_POS(ca, ptr), U64_MAX); if (ret2) - goto err; + break; + } +err: + ret2; + })); - *b_i = b; + return ret < 0 ? ret : 0; +} + +static bool should_do_ec_copygc(struct btree_trans *trans) +{ + u64 stripe_frag_ratio = 0; + + for_each_btree_key_max(trans, iter, BTREE_ID_lru, + lru_start(BCH_LRU_STRIPE_FRAGMENTATION), + lru_end(BCH_LRU_STRIPE_FRAGMENTATION), + 0, lru_k, ({ + CLASS(btree_iter, s_iter)(trans, BTREE_ID_stripes, POS(0, lru_k.k->p.offset), 0); + struct bkey_s_c s_k = bch2_btree_iter_peek_slot(&s_iter); + int ret = bkey_err(s_k); + if (ret) + goto err; - ret2 = darray_push(&buckets_in_flight->to_evacuate, b_i); - if (ret2) { - kfree(b_i); - goto err; - } + if (s_k.k->type != KEY_TYPE_stripe) + continue; - ret2 = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash, - bch_move_bucket_params); - BUG_ON(ret2); + const struct bch_stripe *s = bkey_s_c_to_stripe(s_k).v; - sectors += b.sectors; - } + /* write buffer race? */ + if (stripe_lru_pos(s) != lru_pos_time(lru_k.k->p)) + continue; - ret2 = buckets_in_flight->to_evacuate.nr >= nr_to_get; + unsigned nr_data = s->nr_blocks - s->nr_redundant, blocks_nonempty = 0; + for (unsigned i = 0; i < nr_data; i++) + blocks_nonempty += !!stripe_blockcount_get(s, i); + + /* stripe is pending delete */ + if (!blocks_nonempty) + continue; + + /* This matches the calculation in alloc_lru_idx_fragmentation, so we can + * directly compare without actually looking up the bucket pointed to by the + * bucket fragmentation lru: + */ + stripe_frag_ratio = div_u64(blocks_nonempty * (1ULL << 31), nr_data); + break; err: - ret2; + ret; })); - pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i", - buckets_in_flight->nr, buckets_in_flight->sectors, - saw, in_flight, not_movable, buckets_in_flight->to_evacuate.nr, sectors, nr_to_get, ret); + CLASS(btree_iter, iter)(trans, BTREE_ID_lru, lru_start(BCH_LRU_BUCKET_FRAGMENTATION), 0); + struct bkey_s_c lru_k; - return ret < 0 ? ret : 0; + lockrestart_do(trans, bkey_err(lru_k = bch2_btree_iter_peek_max(&iter, + lru_end(BCH_LRU_BUCKET_FRAGMENTATION)))); + + u64 bucket_frag_ratio = lru_k.k && !bkey_err(lru_k) ? lru_pos_time(lru_k.k->p) : 0; + + /* Prefer normal bucket copygc */ + return stripe_frag_ratio && stripe_frag_ratio * 2 < bucket_frag_ratio; } noinline @@ -213,7 +295,18 @@ static int bch2_copygc(struct moving_context *ctxt, u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved); int ret = 0; - ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight); + move_buckets_wait(ctxt, buckets_in_flight, false); + + ret = bch2_btree_write_buffer_tryflush(trans); + if (bch2_err_matches(ret, EROFS)) + goto err; + + if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret))) + goto err; + + ret = should_do_ec_copygc(trans) + ? bch2_copygc_get_stripe_buckets(ctxt, buckets_in_flight) + : bch2_copygc_get_buckets(ctxt, buckets_in_flight); if (ret) goto err; @@ -265,7 +358,8 @@ static u64 bch2_copygc_dev_wait_amount(struct bch_dev *ca) for (unsigned i = 0; i < BCH_DATA_NR; i++) if (data_type_movable(i)) - fragmented += usage_full.d[i].fragmented; + fragmented += usage_full.d[i].buckets * ca->mi.bucket_size - + usage_full.d[i].sectors; return max(0LL, fragmented_allowed - fragmented); } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c57ff235a97a..21aa2edb13ac 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -626,93 +626,6 @@ fsck_err: return ret; } -static bool check_version_upgrade(struct bch_fs *c) -{ - unsigned latest_version = bcachefs_metadata_version_current; - unsigned latest_compatible = min(latest_version, - bch2_latest_compatible_version(c->sb.version)); - unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; - unsigned new_version = 0; - bool ret = false; - - if (old_version < bcachefs_metadata_required_upgrade_below) { - if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || - latest_compatible < bcachefs_metadata_required_upgrade_below) - new_version = latest_version; - else - new_version = latest_compatible; - } else { - switch (c->opts.version_upgrade) { - case BCH_VERSION_UPGRADE_compatible: - new_version = latest_compatible; - break; - case BCH_VERSION_UPGRADE_incompatible: - new_version = latest_version; - break; - case BCH_VERSION_UPGRADE_none: - new_version = min(old_version, latest_version); - break; - } - } - - if (new_version > old_version) { - CLASS(printbuf, buf)(); - - if (old_version < bcachefs_metadata_required_upgrade_below) - prt_str(&buf, "Version upgrade required:\n"); - - if (old_version != c->sb.version) { - prt_str(&buf, "Version upgrade from "); - bch2_version_to_text(&buf, c->sb.version_upgrade_complete); - prt_str(&buf, " to "); - bch2_version_to_text(&buf, c->sb.version); - prt_str(&buf, " incomplete\n"); - } - - prt_printf(&buf, "Doing %s version upgrade from ", - BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) - ? "incompatible" : "compatible"); - bch2_version_to_text(&buf, old_version); - prt_str(&buf, " to "); - bch2_version_to_text(&buf, new_version); - prt_newline(&buf); - - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __le64 passes = ext->recovery_passes_required[0]; - bch2_sb_set_upgrade(c, old_version, new_version); - passes = ext->recovery_passes_required[0] & ~passes; - - if (passes) { - prt_str(&buf, " running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, - bch2_recovery_passes_from_stable(le64_to_cpu(passes))); - } - - bch_notice(c, "%s", buf.buf); - ret = true; - } - - if (new_version > c->sb.version_incompat_allowed && - c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "Now allowing incompatible features up to "); - bch2_version_to_text(&buf, new_version); - prt_str(&buf, ", previously allowed up to "); - bch2_version_to_text(&buf, c->sb.version_incompat_allowed); - prt_newline(&buf); - - bch_notice(c, "%s", buf.buf); - ret = true; - } - - if (ret) - bch2_sb_upgrade(c, new_version, - c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); - - return ret; -} - int bch2_fs_recovery(struct bch_fs *c) { struct bch_sb_field_clean *clean = NULL; @@ -732,108 +645,6 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "recovering from unclean shutdown"); } - if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { - bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); - ret = -EINVAL; - goto err; - } - - if (!c->sb.clean && - !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { - bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); - ret = -EINVAL; - goto err; - } - - if (c->opts.norecovery) { - c->opts.recovery_pass_last = c->opts.recovery_pass_last - ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) - : BCH_RECOVERY_PASS_snapshots_read; - c->opts.nochanges = true; - } - - if (c->opts.nochanges) - c->opts.read_only = true; - - if (c->opts.journal_rewind) { - bch_info(c, "rewinding journal, fsck required"); - c->opts.fsck = true; - } - - if (go_rw_in_recovery(c)) { - /* - * start workqueues/kworkers early - kthread creation checks for - * pending signals, which is _very_ annoying - */ - ret = bch2_fs_init_rw(c); - if (ret) - goto err; - } - - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - bool write_sb = false; - - if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { - ext->recovery_passes_required[0] |= - cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); - write_sb = true; - } - - u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - if (sb_passes) { - CLASS(printbuf, buf)(); - prt_str(&buf, "superblock requires following recovery passes to be run:\n "); - prt_bitflags(&buf, bch2_recovery_passes, sb_passes); - bch_info(c, "%s", buf.buf); - } - - if (bch2_check_version_downgrade(c)) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "Version downgrade required:"); - - __le64 passes = ext->recovery_passes_required[0]; - bch2_sb_set_downgrade(c, - BCH_VERSION_MINOR(bcachefs_metadata_version_current), - BCH_VERSION_MINOR(c->sb.version)); - passes = ext->recovery_passes_required[0] & ~passes; - if (passes) { - prt_str(&buf, "\n running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, - bch2_recovery_passes_from_stable(le64_to_cpu(passes))); - } - - bch_info(c, "%s", buf.buf); - write_sb = true; - } - - if (check_version_upgrade(c)) - write_sb = true; - - c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - - if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) { - SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); - write_sb = true; - } - - if (write_sb) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - - if (c->sb.clean) - set_bit(BCH_FS_clean_recovery, &c->flags); - if (c->opts.fsck) - set_bit(BCH_FS_in_fsck, &c->flags); - set_bit(BCH_FS_in_recovery, &c->flags); - - ret = bch2_blacklist_table_initialize(c); - if (ret) { - bch_err(c, "error initializing blacklist table"); - goto err; - } - bch2_journal_pos_from_member_info_resume(c); if (!c->sb.clean || c->opts.retain_recovery_info) { @@ -1053,8 +864,8 @@ use_clean: } mutex_lock(&c->sb_lock); - ext = bch2_sb_field_get(c->disk_sb.sb, ext); - write_sb = false; + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + bool write_sb = false; if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index 2e3a56bfd085..f3ea53a55384 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -31,6 +31,8 @@ enum counters_flags { x(io_move_fail, 38, TYPE_COUNTER) \ x(io_move_write_fail, 82, TYPE_COUNTER) \ x(io_move_start_fail, 39, TYPE_COUNTER) \ + x(io_move_drop_only, 91, TYPE_COUNTER) \ + x(io_move_noop, 92, TYPE_COUNTER) \ x(io_move_created_rebalance, 83, TYPE_COUNTER) \ x(io_move_evacuate_bucket, 84, TYPE_COUNTER) \ x(bucket_invalidate, 3, TYPE_COUNTER) \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index dd4ee46606d7..5317b1bfe2e5 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -76,6 +76,8 @@ enum bch_fsck_flags { x(btree_node_read_error, 62, FSCK_AUTOFIX) \ x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \ x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \ + x(btree_node_topology_bad_root_min_key, 323, FSCK_AUTOFIX) \ + x(btree_node_topology_bad_root_max_key, 324, FSCK_AUTOFIX) \ x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \ x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \ x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \ @@ -334,7 +336,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 323, 0) + x(MAX, 325, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 0573c7b00151..e3c73d903898 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -68,34 +68,13 @@ struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); } -static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) -{ - struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); - memset(&ret, 0, sizeof(ret)); - memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); - return ret; -} - -static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) -{ - return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); -} - -static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) -{ - struct bch_member ret, *p = members_v1_get_mut(mi, i); - memset(&ret, 0, sizeof(ret)); - memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); - return ret; -} - struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) { struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); if (mi2) - return members_v2_get(mi2, i); + return bch2_members_v2_get(mi2, i); struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); - return members_v1_get(mi1, i); + return bch2_members_v1_get(mi1, i); } static int sb_members_v2_resize_entries(struct bch_fs *c) @@ -211,33 +190,25 @@ static int validate_member(struct printbuf *err, return 0; } -static void member_to_text(struct printbuf *out, - struct bch_member m, - struct bch_sb_field_disk_groups *gi, - struct bch_sb *sb, - int i) +void bch2_member_to_text(struct printbuf *out, + struct bch_member *m, + struct bch_sb_field_disk_groups *gi, + struct bch_sb *sb, + unsigned idx) { - unsigned data_have = bch2_sb_dev_has_data(sb, i); - u64 bucket_size = le16_to_cpu(m.bucket_size); - u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; - - if (!bch2_member_alive(&m)) - return; - - prt_printf(out, "Device:\t%u\n", i); - - printbuf_indent_add(out, 2); + u64 bucket_size = le16_to_cpu(m->bucket_size); + u64 device_size = le64_to_cpu(m->nbuckets) * bucket_size; prt_printf(out, "Label:\t"); - if (BCH_MEMBER_GROUP(&m)) + if (BCH_MEMBER_GROUP(m)) bch2_disk_path_to_text_sb(out, sb, - BCH_MEMBER_GROUP(&m) - 1); + BCH_MEMBER_GROUP(m) - 1); else prt_printf(out, "(none)"); prt_newline(out); prt_printf(out, "UUID:\t"); - pr_uuid(out, m.uuid.b); + pr_uuid(out, m->uuid.b); prt_newline(out); prt_printf(out, "Size:\t"); @@ -245,40 +216,41 @@ static void member_to_text(struct printbuf *out, prt_newline(out); for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) - prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i])); + prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m->errors[i])); for (unsigned i = 0; i < BCH_IOPS_NR; i++) - prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i])); + prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m->iops[i])); prt_printf(out, "Bucket size:\t"); prt_units_u64(out, bucket_size << 9); prt_newline(out); - prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket)); - prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets)); + prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m->first_bucket)); + prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m->nbuckets)); prt_printf(out, "Last mount:\t"); - if (m.last_mount) - bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); + if (m->last_mount) + bch2_prt_datetime(out, le64_to_cpu(m->last_mount)); else prt_printf(out, "(never)"); prt_newline(out); - prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq)); + prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m->seq)); prt_printf(out, "State:\t%s\n", - BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR - ? bch2_member_states[BCH_MEMBER_STATE(&m)] + BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR + ? bch2_member_states[BCH_MEMBER_STATE(m)] : "unknown"); prt_printf(out, "Data allowed:\t"); - if (BCH_MEMBER_DATA_ALLOWED(&m)) - prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); + if (BCH_MEMBER_DATA_ALLOWED(m)) + prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(m)); else prt_printf(out, "(none)"); prt_newline(out); prt_printf(out, "Has data:\t"); + unsigned data_have = bch2_sb_dev_has_data(sb, idx); if (data_have) prt_bitflags(out, __bch2_data_types, data_have); else @@ -286,22 +258,36 @@ static void member_to_text(struct printbuf *out, prt_newline(out); prt_printf(out, "Btree allocated bitmap blocksize:\t"); - if (m.btree_bitmap_shift < 64) - prt_units_u64(out, 1ULL << m.btree_bitmap_shift); + if (m->btree_bitmap_shift < 64) + prt_units_u64(out, 1ULL << m->btree_bitmap_shift); else - prt_printf(out, "(invalid shift %u)", m.btree_bitmap_shift); + prt_printf(out, "(invalid shift %u)", m->btree_bitmap_shift); prt_newline(out); prt_printf(out, "Btree allocated bitmap:\t"); - bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64); + bch2_prt_u64_base2_nbits(out, le64_to_cpu(m->btree_allocated_bitmap), 64); prt_newline(out); - prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); + prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(m) ? BCH_MEMBER_DURABILITY(m) - 1 : 1); + + prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(m)); + prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(m)); + prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(m)); +} + +static void member_to_text(struct printbuf *out, + struct bch_member m, + struct bch_sb_field_disk_groups *gi, + struct bch_sb *sb, + unsigned idx) +{ + if (!bch2_member_alive(&m)) + return; - prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); - prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); - prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(&m)); + prt_printf(out, "Device:\t%u\n", idx); + printbuf_indent_add(out, 2); + bch2_member_to_text(out, &m, gi, sb, idx); printbuf_indent_sub(out, 2); } @@ -317,7 +303,7 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f } for (i = 0; i < sb->nr_devices; i++) { - struct bch_member m = members_v1_get(mi, i); + struct bch_member m = bch2_members_v1_get(mi, i); int ret = validate_member(err, m, sb, i); if (ret) @@ -343,7 +329,7 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) - member_to_text(out, members_v1_get(mi, i), gi, sb, i); + member_to_text(out, bch2_members_v1_get(mi, i), gi, sb, i); } const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { @@ -377,7 +363,7 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, */ for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) - member_to_text(out, members_v2_get(mi, i), gi, sb, i); + member_to_text(out, bch2_members_v2_get(mi, i), gi, sb, i); } static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -394,7 +380,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f } for (unsigned i = 0; i < sb->nr_devices; i++) { - int ret = validate_member(err, members_v2_get(mi, i), sb, i); + int ret = validate_member(err, bch2_members_v2_get(mi, i), sb, i); if (ret) return ret; } @@ -430,7 +416,7 @@ void bch2_sb_members_to_cpu(struct bch_fs *c) struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(c->disk_sb.sb, members_v2); if (mi2) for (unsigned i = 0; i < c->sb.nr_devices; i++) { - struct bch_member m = members_v2_get(mi2, i); + struct bch_member m = bch2_members_v2_get(mi2, i); bool removed = uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID); mod_bit(i, c->devs_removed.d, removed); } diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 35d4ab9b6197..6de999cf71cb 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -14,11 +14,36 @@ __bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i) return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); } +static inline struct bch_member bch2_members_v2_get(struct bch_sb_field_members_v2 *mi, int i) +{ + struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); + memset(&ret, 0, sizeof(ret)); + memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); + return ret; +} + +static inline struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) +{ + return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); +} + +static inline struct bch_member bch2_members_v1_get(struct bch_sb_field_members_v1 *mi, int i) +{ + struct bch_member ret, *p = members_v1_get_mut(mi, i); + memset(&ret, 0, sizeof(ret)); + memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); + return ret; +} + int bch2_sb_members_v2_init(struct bch_fs *c); int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); +void bch2_member_to_text(struct printbuf *, struct bch_member *, + struct bch_sb_field_disk_groups *, + struct bch_sb *, unsigned); + static inline bool bch2_dev_is_online(struct bch_dev *ca) { return !enumerated_ref_is_zero(&ca->io_ref[READ]); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 5a1f81749661..84f987d3a02a 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1146,7 +1146,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) if (bch2_fs_inconsistent_on(i == 2, c, "snapshot %u missing child pointer to %u", parent_id, id)) - return ret; + return bch_err_throw(c, ENOENT_snapshot); parent->v.children[i] = cpu_to_le32(child_id); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b0019488f586..ef15e614f4f3 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -55,6 +55,7 @@ #include "replicas.h" #include "sb-clean.h" #include "sb-counters.h" +#include "sb-downgrade.h" #include "sb-errors.h" #include "sb-members.h" #include "snapshot.h" @@ -842,6 +843,233 @@ int bch2_fs_init_rw(struct bch_fs *c) return 0; } +static bool check_version_upgrade(struct bch_fs *c) +{ + unsigned latest_version = bcachefs_metadata_version_current; + unsigned latest_compatible = min(latest_version, + bch2_latest_compatible_version(c->sb.version)); + unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; + unsigned new_version = 0; + bool ret = false; + + if (old_version < bcachefs_metadata_required_upgrade_below) { + if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || + latest_compatible < bcachefs_metadata_required_upgrade_below) + new_version = latest_version; + else + new_version = latest_compatible; + } else { + switch (c->opts.version_upgrade) { + case BCH_VERSION_UPGRADE_compatible: + new_version = latest_compatible; + break; + case BCH_VERSION_UPGRADE_incompatible: + new_version = latest_version; + break; + case BCH_VERSION_UPGRADE_none: + new_version = min(old_version, latest_version); + break; + } + } + + if (new_version > old_version) { + CLASS(printbuf, buf)(); + + if (old_version < bcachefs_metadata_required_upgrade_below) + prt_str(&buf, "Version upgrade required:\n"); + + if (old_version != c->sb.version) { + prt_str(&buf, "Version upgrade from "); + bch2_version_to_text(&buf, c->sb.version_upgrade_complete); + prt_str(&buf, " to "); + bch2_version_to_text(&buf, c->sb.version); + prt_str(&buf, " incomplete\n"); + } + + prt_printf(&buf, "Doing %s version upgrade from ", + BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) + ? "incompatible" : "compatible"); + bch2_version_to_text(&buf, old_version); + prt_str(&buf, " to "); + bch2_version_to_text(&buf, new_version); + prt_newline(&buf); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_upgrade(c, old_version, new_version); + passes = ext->recovery_passes_required[0] & ~passes; + + if (passes) { + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); + } + + bch_notice(c, "%s", buf.buf); + ret = true; + } + + if (new_version > c->sb.version_incompat_allowed && + c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { + CLASS(printbuf, buf)(); + + prt_str(&buf, "Now allowing incompatible features up to "); + bch2_version_to_text(&buf, new_version); + prt_str(&buf, ", previously allowed up to "); + bch2_version_to_text(&buf, c->sb.version_incompat_allowed); + prt_newline(&buf); + + bch_notice(c, "%s", buf.buf); + ret = true; + } + + if (ret) + bch2_sb_upgrade(c, new_version, + c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); + + return ret; +} + +noinline_for_stack +static int bch2_fs_opt_version_init(struct bch_fs *c) +{ + int ret = 0; + + if (c->opts.norecovery) { + c->opts.recovery_pass_last = c->opts.recovery_pass_last + ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) + : BCH_RECOVERY_PASS_snapshots_read; + c->opts.nochanges = true; + } + + if (c->opts.nochanges) + c->opts.read_only = true; + + if (c->opts.journal_rewind) + c->opts.fsck = true; + + CLASS(printbuf, p)(); + bch2_log_msg_start(c, &p); + + prt_str(&p, "starting version "); + bch2_version_to_text(&p, c->sb.version); + + bool first = true; + for (enum bch_opt_id i = 0; i < bch2_opts_nr; i++) { + const struct bch_option *opt = &bch2_opt_table[i]; + u64 v = bch2_opt_get_by_id(&c->opts, i); + + if (!(opt->flags & OPT_MOUNT)) + continue; + + if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) + continue; + + prt_str(&p, first ? " opts=" : ","); + first = false; + bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); + } + + if (c->sb.version_incompat_allowed != c->sb.version) { + prt_printf(&p, "\nallowing incompatible features above "); + bch2_version_to_text(&p, c->sb.version_incompat_allowed); + } + + if (c->opts.verbose) { + prt_printf(&p, "\nfeatures: "); + prt_bitflags(&p, bch2_sb_features, c->sb.features); + } + + if (c->sb.multi_device) { + prt_printf(&p, "\nwith devices"); + for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) { + prt_char(&p, ' '); + prt_str(&p, ca->name); + } + } + + if (c->cf_encoding) + prt_printf(&p, "\nUsing encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + + if (c->opts.journal_rewind) + prt_printf(&p, "\nrewinding journal, fsck required"); + + scoped_guard(mutex, &c->sb_lock) { + struct bch_sb_field_ext *ext = bch2_sb_field_get_minsize(&c->disk_sb, ext, + sizeof(struct bch_sb_field_ext) / sizeof(u64)); + if (!ext) + return bch_err_throw(c, ENOSPC_sb); + + ret = bch2_sb_members_v2_init(c); + if (ret) + return ret; + + __le64 now = cpu_to_le64(ktime_get_real_seconds()); + for_each_online_member_rcu(c, ca) + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = now; + + if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); + + u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (sb_passes) { + prt_str(&p, "\nsuperblock requires following recovery passes to be run:\n "); + prt_bitflags(&p, bch2_recovery_passes, sb_passes); + } + + if (bch2_check_version_downgrade(c)) { + prt_str(&p, "\nVersion downgrade required:"); + + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_downgrade(c, + BCH_VERSION_MINOR(bcachefs_metadata_version_current), + BCH_VERSION_MINOR(c->sb.version)); + passes = ext->recovery_passes_required[0] & ~passes; + if (passes) { + prt_str(&p, "\nrunning recovery passes: "); + prt_bitflags(&p, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); + } + } + + check_version_upgrade(c); + + c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + + if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) + SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); + + /* Don't write the superblock, defer that until we go rw */ + } + + if (c->sb.clean) + set_bit(BCH_FS_clean_recovery, &c->flags); + if (c->opts.fsck) + set_bit(BCH_FS_in_fsck, &c->flags); + set_bit(BCH_FS_in_recovery, &c->flags); + + bch2_print_str(c, KERN_INFO, p.buf); + + if (BCH_SB_INITIALIZED(c->disk_sb.sb)) { + if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { + bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); + return -EINVAL; + } + + if (!c->sb.clean && + !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { + bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); + return -EINVAL; + } + } + + return 0; +} + static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch_sb_handles *sbs) { @@ -1013,6 +1241,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, ret = bch2_fs_async_obj_init(c) ?: + bch2_blacklist_table_initialize(c) ?: bch2_fs_btree_cache_init(c) ?: bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: @@ -1063,7 +1292,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, } #endif - for (i = 0; i < c->sb.nr_devices; i++) { + for (unsigned i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; ret = bch2_dev_alloc(c, i); @@ -1078,6 +1307,20 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, &c->clock_journal_res, (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2); + ret = bch2_fs_opt_version_init(c); + if (ret) + goto err; + + /* + * start workqueues/kworkers early - kthread creation checks for pending + * signals, which is _very_ annoying + */ + if (go_rw_in_recovery(c)) { + ret = bch2_fs_init_rw(c); + if (ret) + goto err; + } + scoped_guard(mutex, &bch_fs_list_lock) ret = bch2_fs_online(c); @@ -1093,53 +1336,6 @@ err: goto out; } -noinline_for_stack -static void print_mount_opts(struct bch_fs *c) -{ - enum bch_opt_id i; - CLASS(printbuf, p)(); - bch2_log_msg_start(c, &p); - - prt_str(&p, "starting version "); - bch2_version_to_text(&p, c->sb.version); - - bool first = true; - for (i = 0; i < bch2_opts_nr; i++) { - const struct bch_option *opt = &bch2_opt_table[i]; - u64 v = bch2_opt_get_by_id(&c->opts, i); - - if (!(opt->flags & OPT_MOUNT)) - continue; - - if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) - continue; - - prt_str(&p, first ? " opts=" : ","); - first = false; - bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); - } - - if (c->sb.version_incompat_allowed != c->sb.version) { - prt_printf(&p, "\nallowing incompatible features above "); - bch2_version_to_text(&p, c->sb.version_incompat_allowed); - } - - if (c->opts.verbose) { - prt_printf(&p, "\nfeatures: "); - prt_bitflags(&p, bch2_sb_features, c->sb.features); - } - - if (c->sb.multi_device) { - prt_printf(&p, "\nwith devices"); - for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) { - prt_char(&p, ' '); - prt_str(&p, ca->name); - } - } - - bch2_print_str(c, KERN_INFO, p.buf); -} - static bool bch2_fs_may_start(struct bch_fs *c) { struct bch_dev *ca; @@ -1174,38 +1370,16 @@ static bool bch2_fs_may_start(struct bch_fs *c) int bch2_fs_start(struct bch_fs *c) { - time64_t now = ktime_get_real_seconds(); int ret = 0; BUG_ON(test_bit(BCH_FS_started, &c->flags)); - print_mount_opts(c); - - if (c->cf_encoding) - bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - if (!bch2_fs_may_start(c)) return bch_err_throw(c, insufficient_devices_to_start); scoped_guard(rwsem_write, &c->state_lock) { - guard(mutex)(&c->sb_lock); - if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, - sizeof(struct bch_sb_field_ext) / sizeof(u64))) { - ret = bch_err_throw(c, ENOSPC_sb); - goto err; - } - - ret = bch2_sb_members_v2_init(c); - if (ret) - goto err; - scoped_guard(rcu) for_each_online_member_rcu(c, ca) { - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = - cpu_to_le64(now); if (ca->mi.state == BCH_MEMBER_STATE_rw) bch2_dev_allocator_add(c, ca); } |