diff options
Diffstat (limited to 'fs/bcachefs/recovery.c')
-rw-r--r-- | fs/bcachefs/recovery.c | 258 |
1 files changed, 36 insertions, 222 deletions
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c57ff235a97a..6319144a440c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -64,7 +64,6 @@ int bch2_btree_lost_data(struct bch_fs *c, * but in debug mode we want the next fsck run to be clean: */ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); @@ -182,9 +181,12 @@ void bch2_reconstruct_alloc(struct bch_fs *c) */ static void zero_out_btree_mem_ptr(struct journal_keys *keys) { - darray_for_each(*keys, i) - if (i->k->k.type == KEY_TYPE_btree_ptr_v2) - bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (k->k.type == KEY_TYPE_btree_ptr_v2) + bkey_i_to_btree_ptr_v2(k)->v.mem_ptr = 0; + } } /* journal replay: */ @@ -202,8 +204,10 @@ static void replay_now_at(struct journal *j, u64 seq) static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct journal_key *k) { + struct bch_fs *c = trans->c; + struct bkey_i *bk = journal_key_k(c, k); struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, BTREE_ITER_intent); int ret = bch2_btree_iter_traverse(&iter); @@ -214,14 +218,14 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); /* Has this delta already been applied to the btree? */ - if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) { + if (bversion_cmp(old.k->bversion, bk->k.bversion) >= 0) { ret = 0; goto out; } - struct bkey_i *new = k->k; + struct bkey_i *new = bk; if (old.k->type == KEY_TYPE_accounting) { - new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k)); + new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(bk)); ret = PTR_ERR_OR_ZERO(new); if (ret) goto out; @@ -230,7 +234,8 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, bkey_s_c_to_accounting(old)); } - trans->journal_res.seq = k->journal_seq; + if (!k->allocated) + trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset; ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun); out: @@ -241,6 +246,7 @@ out: static int bch2_journal_replay_key(struct btree_trans *trans, struct journal_key *k) { + struct bch_fs *c = trans->c; struct btree_iter iter; unsigned iter_flags = BTREE_ITER_intent| @@ -251,7 +257,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) return 0; - trans->journal_res.seq = k->journal_seq; + if (!k->allocated) + trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset; /* * BTREE_UPDATE_key_cache_reclaim disables key cache lookup/update to @@ -266,7 +273,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, else update_flags |= BTREE_UPDATE_key_cache_reclaim; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + struct bkey_i *bk = journal_key_k(c, k); + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, iter_flags); ret = bch2_btree_iter_traverse(&iter); @@ -275,13 +283,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, &iter); if (unlikely(!btree_path_node(path, k->level))) { - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); prt_str(&buf, "btree="); bch2_btree_id_to_text(&buf, k->btree_id); prt_printf(&buf, " level=%u ", k->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(bk)); if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { @@ -298,7 +304,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, } bch2_trans_iter_exit(&iter); - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(&iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: @@ -310,17 +316,17 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; - if (k->k->k.type == KEY_TYPE_accounting) { - struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s); + if (bk->k.type == KEY_TYPE_accounting) { + struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, bk->k.u64s); ret = PTR_ERR_OR_ZERO(n); if (ret) goto out; - bkey_copy(n, k->k); + bkey_copy(n, bk); goto out; } - ret = bch2_trans_update(trans, &iter, k->k, update_flags); + ret = bch2_trans_update(trans, &iter, bk, update_flags); out: bch2_trans_iter_exit(&iter); return ret; @@ -331,13 +337,9 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - /* - * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last - * - * journal_seq == 0 means that the key comes from early repair, and - * should be inserted last so as to avoid overflowing the journal - */ - return cmp_int(l->journal_seq - 1, r->journal_seq - 1); + return !l->allocated && !r->allocated + ? cmp_int(l->journal_seq_offset, r->journal_seq_offset) + : cmp_int(l->allocated, r->allocated); } DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *) @@ -369,7 +371,9 @@ int bch2_journal_replay(struct bch_fs *c) * flush accounting keys until we're done */ darray_for_each(*keys, k) { - if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated)) + struct bkey_i *bk = journal_key_k(trans->c, k); + + if (!(bk->k.type == KEY_TYPE_accounting && !k->allocated)) continue; cond_resched(); @@ -412,7 +416,6 @@ int bch2_journal_replay(struct bch_fs *c) BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting); if (ret) { ret = darray_push(&keys_sorted, k); if (ret) @@ -434,8 +437,8 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - if (k->journal_seq) - replay_now_at(j, k->journal_seq); + if (!k->allocated) + replay_now_at(j, c->journal_entries_base_seq + k->journal_seq_offset); else replay_now_at(j, j->replay_journal_seq_end); @@ -607,7 +610,7 @@ static int read_btree_roots(struct bch_fs *c) c, btree_root_read_error, "error reading btree root %s: %s", buf.buf, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) + if (btree_id_can_reconstruct(i)) r->error = 0; ret = 0; } @@ -626,93 +629,6 @@ fsck_err: return ret; } -static bool check_version_upgrade(struct bch_fs *c) -{ - unsigned latest_version = bcachefs_metadata_version_current; - unsigned latest_compatible = min(latest_version, - bch2_latest_compatible_version(c->sb.version)); - unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; - unsigned new_version = 0; - bool ret = false; - - if (old_version < bcachefs_metadata_required_upgrade_below) { - if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || - latest_compatible < bcachefs_metadata_required_upgrade_below) - new_version = latest_version; - else - new_version = latest_compatible; - } else { - switch (c->opts.version_upgrade) { - case BCH_VERSION_UPGRADE_compatible: - new_version = latest_compatible; - break; - case BCH_VERSION_UPGRADE_incompatible: - new_version = latest_version; - break; - case BCH_VERSION_UPGRADE_none: - new_version = min(old_version, latest_version); - break; - } - } - - if (new_version > old_version) { - CLASS(printbuf, buf)(); - - if (old_version < bcachefs_metadata_required_upgrade_below) - prt_str(&buf, "Version upgrade required:\n"); - - if (old_version != c->sb.version) { - prt_str(&buf, "Version upgrade from "); - bch2_version_to_text(&buf, c->sb.version_upgrade_complete); - prt_str(&buf, " to "); - bch2_version_to_text(&buf, c->sb.version); - prt_str(&buf, " incomplete\n"); - } - - prt_printf(&buf, "Doing %s version upgrade from ", - BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) - ? "incompatible" : "compatible"); - bch2_version_to_text(&buf, old_version); - prt_str(&buf, " to "); - bch2_version_to_text(&buf, new_version); - prt_newline(&buf); - - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __le64 passes = ext->recovery_passes_required[0]; - bch2_sb_set_upgrade(c, old_version, new_version); - passes = ext->recovery_passes_required[0] & ~passes; - - if (passes) { - prt_str(&buf, " running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, - bch2_recovery_passes_from_stable(le64_to_cpu(passes))); - } - - bch_notice(c, "%s", buf.buf); - ret = true; - } - - if (new_version > c->sb.version_incompat_allowed && - c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "Now allowing incompatible features up to "); - bch2_version_to_text(&buf, new_version); - prt_str(&buf, ", previously allowed up to "); - bch2_version_to_text(&buf, c->sb.version_incompat_allowed); - prt_newline(&buf); - - bch_notice(c, "%s", buf.buf); - ret = true; - } - - if (ret) - bch2_sb_upgrade(c, new_version, - c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); - - return ret; -} - int bch2_fs_recovery(struct bch_fs *c) { struct bch_sb_field_clean *clean = NULL; @@ -732,108 +648,6 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "recovering from unclean shutdown"); } - if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { - bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); - ret = -EINVAL; - goto err; - } - - if (!c->sb.clean && - !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { - bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); - ret = -EINVAL; - goto err; - } - - if (c->opts.norecovery) { - c->opts.recovery_pass_last = c->opts.recovery_pass_last - ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) - : BCH_RECOVERY_PASS_snapshots_read; - c->opts.nochanges = true; - } - - if (c->opts.nochanges) - c->opts.read_only = true; - - if (c->opts.journal_rewind) { - bch_info(c, "rewinding journal, fsck required"); - c->opts.fsck = true; - } - - if (go_rw_in_recovery(c)) { - /* - * start workqueues/kworkers early - kthread creation checks for - * pending signals, which is _very_ annoying - */ - ret = bch2_fs_init_rw(c); - if (ret) - goto err; - } - - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - bool write_sb = false; - - if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { - ext->recovery_passes_required[0] |= - cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); - write_sb = true; - } - - u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - if (sb_passes) { - CLASS(printbuf, buf)(); - prt_str(&buf, "superblock requires following recovery passes to be run:\n "); - prt_bitflags(&buf, bch2_recovery_passes, sb_passes); - bch_info(c, "%s", buf.buf); - } - - if (bch2_check_version_downgrade(c)) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "Version downgrade required:"); - - __le64 passes = ext->recovery_passes_required[0]; - bch2_sb_set_downgrade(c, - BCH_VERSION_MINOR(bcachefs_metadata_version_current), - BCH_VERSION_MINOR(c->sb.version)); - passes = ext->recovery_passes_required[0] & ~passes; - if (passes) { - prt_str(&buf, "\n running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, - bch2_recovery_passes_from_stable(le64_to_cpu(passes))); - } - - bch_info(c, "%s", buf.buf); - write_sb = true; - } - - if (check_version_upgrade(c)) - write_sb = true; - - c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - - if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) { - SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); - write_sb = true; - } - - if (write_sb) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - - if (c->sb.clean) - set_bit(BCH_FS_clean_recovery, &c->flags); - if (c->opts.fsck) - set_bit(BCH_FS_in_fsck, &c->flags); - set_bit(BCH_FS_in_recovery, &c->flags); - - ret = bch2_blacklist_table_initialize(c); - if (ret) { - bch_err(c, "error initializing blacklist table"); - goto err; - } - bch2_journal_pos_from_member_info_resume(c); if (!c->sb.clean || c->opts.retain_recovery_info) { @@ -1053,8 +867,8 @@ use_clean: } mutex_lock(&c->sb_lock); - ext = bch2_sb_field_get(c->disk_sb.sb, ext); - write_sb = false; + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + bool write_sb = false; if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); |