summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/bcachefs_format.h11
-rw-r--r--fs/bcachefs/bkey_methods.c6
-rw-r--r--fs/bcachefs/bkey_types.h5
-rw-r--r--fs/bcachefs/btree_cache.c2
-rw-r--r--fs/bcachefs/btree_cache.h8
-rw-r--r--fs/bcachefs/btree_gc.c38
-rw-r--r--fs/bcachefs/btree_iter.c72
-rw-r--r--fs/bcachefs/btree_update.c73
-rw-r--r--fs/bcachefs/btree_update_interior.c8
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/fs.c4
-rw-r--r--fs/bcachefs/fsck.c11
-rw-r--r--fs/bcachefs/inode.c7
-rw-r--r--fs/bcachefs/logged_ops.h2
-rw-r--r--fs/bcachefs/lru.h10
-rw-r--r--fs/bcachefs/move.c8
-rw-r--r--fs/bcachefs/movinggc.c188
-rw-r--r--fs/bcachefs/recovery.c193
-rw-r--r--fs/bcachefs/sb-counters_format.h2
-rw-r--r--fs/bcachefs/sb-errors_format.h4
-rw-r--r--fs/bcachefs/sb-members.c116
-rw-r--r--fs/bcachefs/sb-members.h25
-rw-r--r--fs/bcachefs/snapshot.c2
-rw-r--r--fs/bcachefs/super.c314
24 files changed, 678 insertions, 432 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index b4a04df5ea95..a8f59522e258 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -423,7 +423,8 @@ enum bch_bkey_type_flags {
x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \
x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \
x(accounting, 34, BKEY_TYPE_strict_btree_checks) \
- x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks)
+ x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks) \
+ x(extent_whiteout, 36, BKEY_TYPE_strict_btree_checks)
enum bch_bkey_type {
#define x(name, nr, ...) KEY_TYPE_##name = nr,
@@ -440,6 +441,10 @@ struct bch_whiteout {
struct bch_val v;
};
+struct bch_extent_whiteout {
+ struct bch_val v;
+};
+
struct bch_error {
struct bch_val v;
};
@@ -700,7 +705,8 @@ struct bch_sb_field_ext {
x(extent_flags, BCH_VERSION(1, 25)) \
x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \
x(fast_device_removal, BCH_VERSION(1, 27)) \
- x(inode_has_case_insensitive, BCH_VERSION(1, 28))
+ x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \
+ x(extent_snapshot_whiteouts, BCH_VERSION(1, 29))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1340,6 +1346,7 @@ enum btree_id_flags {
BTREE_IS_snapshots| \
BTREE_IS_data, \
BIT_ULL(KEY_TYPE_whiteout)| \
+ BIT_ULL(KEY_TYPE_extent_whiteout)| \
BIT_ULL(KEY_TYPE_error)| \
BIT_ULL(KEY_TYPE_cookie)| \
BIT_ULL(KEY_TYPE_extent)| \
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index fcd8c82cba4f..75d73677c4d8 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -41,6 +41,10 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
.key_validate = deleted_key_validate, \
})
+#define bch2_bkey_ops_extent_whiteout ((struct bkey_ops) { \
+ .key_validate = deleted_key_validate, \
+})
+
static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k,
struct bkey_validate_context from)
{
@@ -203,7 +207,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
? bch2_bkey_types[k.k->type]
: "(unknown)");
- if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
+ if (btree_node_type_is_extents(type) && !bkey_extent_whiteout(k.k)) {
bkey_fsck_err_on(k.k->size == 0,
c, bkey_extent_size_zero,
"size == 0");
diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h
index b4f328f9853c..88a48ce63656 100644
--- a/fs/bcachefs/bkey_types.h
+++ b/fs/bcachefs/bkey_types.h
@@ -44,6 +44,11 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
#define bkey_whiteout(_k) \
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
+#define bkey_extent_whiteout(_k) \
+ ((_k)->type == KEY_TYPE_deleted || \
+ (_k)->type == KEY_TYPE_whiteout || \
+ (_k)->type == KEY_TYPE_extent_whiteout)
+
/* bkey with split value, const */
struct bkey_s_c {
const struct bkey *k;
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 25b01e750880..9261ad043564 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -215,7 +215,7 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
struct btree_cache *bc = &c->btree_cache;
guard(mutex)(&bc->lock);
- if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
+ if (!btree_node_is_root(c, b) && !btree_node_pinned(b)) {
set_btree_node_pinned(b);
list_move(&b->list, &bc->live[1].list);
bc->live[0].nr--;
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 649e9dfd178a..035b2cb25077 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -144,6 +144,14 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
return r ? r->b : NULL;
}
+static inline bool btree_node_is_root(struct bch_fs *c, struct btree *b)
+{
+ struct btree *root = btree_node_root(c, b);
+
+ BUG_ON(b != root && b->c.level >= root->c.level);
+ return b == root;
+}
+
const char *bch2_btree_id_str(enum btree_id); /* avoid */
void bch2_btree_id_to_text(struct printbuf *, enum btree_id);
void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index ce3c7750a922..6b91649688da 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -282,6 +282,41 @@ fsck_err:
return ret;
}
+static int btree_check_root_boundaries(struct btree_trans *trans, struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
+
+ prt_str(&buf, " at ");
+ bch2_btree_pos_to_text(&buf, c, b);
+
+ if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN),
+ trans, btree_node_topology_bad_root_min_key,
+ "btree root with incorrect min_key%s", buf.buf)) {
+ ret = set_node_min(c, b, POS_MIN);
+ if (ret)
+ goto err;
+ }
+
+ if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX),
+ trans, btree_node_topology_bad_root_max_key,
+ "btree root with incorrect min_key%s", buf.buf)) {
+ ret = set_node_max(c, b, SPOS_MAX);
+ if (ret)
+ goto err;
+ }
+
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
struct btree *child, struct bpos *pulled_from_scan)
{
@@ -586,7 +621,8 @@ recover:
struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
- ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
+ ret = btree_check_root_boundaries(trans, b) ?:
+ bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
six_unlock_read(&b->c.lock);
if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) {
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index a67babf69d39..8962c481e310 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2450,10 +2450,27 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
continue;
}
- if (bkey_whiteout(k.k) &&
- !(iter->flags & BTREE_ITER_nofilter_whiteouts)) {
- search_key = bkey_successor(iter, k.k->p);
- continue;
+ if (!(iter->flags & BTREE_ITER_nofilter_whiteouts)) {
+ /*
+ * KEY_TYPE_extent_whiteout indicates that there
+ * are no extents that overlap with this
+ * whiteout - meaning bkey_start_pos() is
+ * monotonically increasing when including
+ * KEY_TYPE_extent_whiteout (not
+ * KEY_TYPE_whiteout).
+ *
+ * Without this @end wouldn't be able to
+ * terminate searches and we'd have to scan
+ * through tons of whiteouts:
+ */
+ if (k.k->type == KEY_TYPE_extent_whiteout &&
+ bkey_ge(k.k->p, end))
+ goto end;
+
+ if (bkey_extent_whiteout(k.k)) {
+ search_key = bkey_successor(iter, k.k->p);
+ continue;
+ }
}
}
@@ -2711,7 +2728,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
saved_path = 0;
}
- if (!bkey_whiteout(k.k)) {
+ if (!bkey_extent_whiteout(k.k)) {
saved_path = btree_path_clone(trans, iter->path,
iter->flags & BTREE_ITER_intent,
_THIS_IP_);
@@ -2724,7 +2741,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
continue;
}
- if (bkey_whiteout(k.k)) {
+ if (bkey_extent_whiteout(k.k)) {
search_key = bkey_predecessor(iter, k.k->p);
search_key.snapshot = U32_MAX;
continue;
@@ -2865,7 +2882,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
iter->k = *k.k;
}
- if (unlikely(k.k->type == KEY_TYPE_whiteout &&
+ if (unlikely(bkey_extent_whiteout(k.k) &&
(iter->flags & BTREE_ITER_filter_snapshots) &&
!(iter->flags & BTREE_ITER_nofilter_whiteouts)))
iter->k.type = KEY_TYPE_deleted;
@@ -2878,31 +2895,40 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
EBUG_ON(btree_iter_path(trans, iter)->level);
- if (iter->flags & BTREE_ITER_intent) {
- struct btree_iter iter2;
+ struct btree_iter iter2;
- bch2_trans_copy_iter(&iter2, iter);
- k = bch2_btree_iter_peek_max(&iter2, end);
+ bch2_trans_copy_iter(&iter2, iter);
+ iter2.flags |= BTREE_ITER_nofilter_whiteouts;
- if (k.k && !bkey_err(k)) {
- swap(iter->key_cache_path, iter2.key_cache_path);
- iter->k = iter2.k;
- k.k = &iter->k;
+ while (1) {
+ k = bch2_btree_iter_peek_max(&iter2, end);
+ if ((iter2.flags & BTREE_ITER_is_extents) &&
+ k.k &&
+ !bkey_err(k) &&
+ k.k->type == KEY_TYPE_whiteout) {
+ bch2_btree_iter_set_pos(&iter2, k.k->p);
+ continue;
}
- bch2_trans_iter_exit(&iter2);
- } else {
- struct bpos pos = iter->pos;
- k = bch2_btree_iter_peek_max(iter, end);
- if (unlikely(bkey_err(k)))
- bch2_btree_iter_set_pos(iter, pos);
- else
- iter->pos = pos;
+ break;
+ }
+
+ if (k.k && !bkey_err(k)) {
+ swap(iter->key_cache_path, iter2.key_cache_path);
+ iter->k = iter2.k;
+ k.k = &iter->k;
}
+ bch2_trans_iter_exit(&iter2);
if (unlikely(bkey_err(k)))
goto out;
+ if (unlikely(k.k &&
+ bkey_extent_whiteout(k.k) &&
+ (iter->flags & BTREE_ITER_filter_snapshots) &&
+ !(iter->flags & BTREE_ITER_nofilter_whiteouts)))
+ iter->k.type = KEY_TYPE_deleted;
+
next = k.k ? bkey_start_pos(k.k) : POS_MAX;
if (bkey_lt(iter->pos, next)) {
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index 566478728aa2..f59f018fe0d8 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -12,6 +12,7 @@
#include "extents.h"
#include "keylist.h"
#include "snapshot.h"
+#include "super-io.h"
#include "trace.h"
#include <linux/string_helpers.h>
@@ -158,6 +159,21 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
return ret;
}
+static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, const struct bkey *k)
+{
+ /*
+ * KEY_TYPE_extent_whiteout indicates that there isn't a real extent
+ * present at that position: key start positions inclusive of
+ * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are
+ * monotonically increasing
+ */
+ return btree_id_is_extents_snapshots(btree) &&
+ bkey_deleted(k) &&
+ !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts)
+ ? KEY_TYPE_extent_whiteout
+ : KEY_TYPE_whiteout;
+}
+
int bch2_trans_update_extent_overwrite(struct btree_trans *trans,
struct btree_iter *iter,
enum btree_iter_update_trigger_flags flags,
@@ -215,7 +231,7 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans,
return ret;
}
- if (bkey_le(old.k->p, new.k->p)) {
+ if (!back_split) {
update = bch2_trans_kmalloc(trans, sizeof(*update));
if ((ret = PTR_ERR_OR_ZERO(update)))
return ret;
@@ -224,23 +240,21 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans,
update->k.p = old.k->p;
update->k.p.snapshot = new.k->p.snapshot;
- if (new.k->p.snapshot != old.k->p.snapshot) {
- update->k.type = KEY_TYPE_whiteout;
- } else if (btree_type_has_snapshots(btree_id)) {
- ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p);
+ if (btree_type_has_snapshots(btree_id)) {
+ ret = new.k->p.snapshot != old.k->p.snapshot
+ ? 1
+ : need_whiteout_for_snapshot(trans, btree_id, update->k.p);
if (ret < 0)
return ret;
if (ret)
- update->k.type = KEY_TYPE_whiteout;
+ update->k.type = extent_whiteout_type(trans->c, iter->btree_id, new.k);
}
ret = bch2_btree_insert_nonextent(trans, btree_id, update,
BTREE_UPDATE_internal_snapshot_node|flags);
if (ret)
return ret;
- }
-
- if (back_split) {
+ } else {
update = bch2_bkey_make_mut_noupdate(trans, old);
if ((ret = PTR_ERR_OR_ZERO(update)))
return ret;
@@ -267,7 +281,8 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
CLASS(btree_iter, iter)(trans, btree_id, bkey_start_pos(&insert->k),
BTREE_ITER_intent|
BTREE_ITER_with_updates|
- BTREE_ITER_not_extents);
+ BTREE_ITER_not_extents|
+ BTREE_ITER_nofilter_whiteouts);
struct bkey_s_c k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
int ret = bkey_err(k);
if (ret)
@@ -285,12 +300,40 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
goto next;
}
- while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) {
- bool done = bkey_lt(insert->k.p, k.k->p);
+ while (true) {
+ BUG_ON(bkey_le(k.k->p, bkey_start_pos(&insert->k)));
- ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert));
- if (ret)
- return ret;
+ /*
+ * When KEY_TYPE_whiteout is included, bkey_start_pos is not
+ * monotonically increasing
+ */
+ if (k.k->type != KEY_TYPE_whiteout && bkey_le(insert->k.p, bkey_start_pos(k.k)))
+ break;
+
+ bool done = k.k->type != KEY_TYPE_whiteout && bkey_lt(insert->k.p, k.k->p);
+
+ if (bkey_extent_whiteout(k.k)) {
+ enum bch_bkey_type whiteout_type = extent_whiteout_type(trans->c, btree_id, &insert->k);
+
+ if (bkey_le(k.k->p, insert->k.p) &&
+ k.k->type != whiteout_type) {
+ struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ return ret;
+
+ update->k.p.snapshot = iter.snapshot;
+ update->k.type = whiteout_type;
+
+ ret = bch2_trans_update(trans, &iter, update, 0);
+ if (ret)
+ return ret;
+ }
+ } else {
+ ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert));
+ if (ret)
+ return ret;
+ }
if (done)
goto out;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 5f4f82967105..76897cf15946 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -66,6 +66,10 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
bkey_init(&prev.k->k);
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+ /*
+ * Don't use btree_node_is_root(): we're called by btree split, after
+ * creating a new root but before setting it
+ */
if (b == btree_node_root(c, b)) {
if (!bpos_eq(b->data->min_key, POS_MIN)) {
bch2_log_msg_start(c, &buf);
@@ -1655,7 +1659,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
int ret = 0;
bch2_verify_btree_nr_keys(b);
- BUG_ON(!parent && (b != btree_node_root(c, b)));
+ BUG_ON(!parent && !btree_node_is_root(c, b));
BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
ret = bch2_btree_node_check_topology(trans, b);
@@ -2527,7 +2531,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
if (ret)
goto err;
} else {
- BUG_ON(btree_node_root(c, b) != b);
+ BUG_ON(!btree_node_is_root(c, b));
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s));
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index cec8b0f47d3d..adc1f9315eab 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -119,6 +119,7 @@
x(ENOENT, ENOENT_not_directory) \
x(ENOENT, ENOENT_directory_dead) \
x(ENOENT, ENOENT_subvolume) \
+ x(ENOENT, ENOENT_snapshot) \
x(ENOENT, ENOENT_snapshot_tree) \
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
x(ENOENT, ENOENT_dev_not_found) \
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3b289f696612..b5e3090f1cb8 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -511,8 +511,8 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
struct bch_subvolume subvol;
int ret = lockrestart_do(trans,
bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
- bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
- PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u) ?:
+ PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)));
return ret ? ERR_PTR(ret) : &inode->v;
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 8d5ea217798e..01c1c6372229 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -923,9 +923,10 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
bkey_init(&whiteout.k);
whiteout.k.type = KEY_TYPE_whiteout;
whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot);
- ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
- &whiteout,
- BTREE_UPDATE_internal_snapshot_node);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_inodes,
+ &whiteout,
+ BTREE_ITER_cached|
+ BTREE_UPDATE_internal_snapshot_node);
}
if (ret)
@@ -1443,7 +1444,7 @@ static int check_key_has_inode(struct btree_trans *trans,
if (ret)
return ret;
- if (k.k->type == KEY_TYPE_whiteout)
+ if (bkey_extent_whiteout(k.k))
return 0;
bool have_inode = i && !i->whiteout;
@@ -1923,7 +1924,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
&inode->recalculate_sums);
if (ret)
goto err;
+ }
+ if (!bkey_extent_whiteout(k.k)) {
/*
* Check inodes in reverse order, from oldest snapshots to
* newest, starting from the inode that matches this extent's
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 85013e8d6166..d5e5190f0663 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -463,9 +463,10 @@ int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked
bch2_inode_pack(inode_p, inode);
inode_p->inode.k.p.snapshot = inode->bi_snapshot;
- return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
- &inode_p->inode.k_i,
- BTREE_UPDATE_internal_snapshot_node);
+ return bch2_btree_insert_trans(trans, BTREE_ID_inodes,
+ &inode_p->inode.k_i,
+ BTREE_ITER_cached|
+ BTREE_UPDATE_internal_snapshot_node);
}
int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode)
diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h
index 30ae9ef737dd..6dea6e2ac7a8 100644
--- a/fs/bcachefs/logged_ops.h
+++ b/fs/bcachefs/logged_ops.h
@@ -10,7 +10,7 @@
static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op)
{
- return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0);
+ return bch2_btree_insert_trans(trans, BTREE_ID_logged_ops, op, BTREE_ITER_cached);
}
int bch2_resume_logged_ops(struct bch_fs *);
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index 8abd0aa2083a..6f1e0a7b5db5 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -24,6 +24,16 @@ static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time)
return pos;
}
+static inline struct bpos lru_start(u16 lru_id)
+{
+ return lru_pos(lru_id, 0, 0);
+}
+
+static inline struct bpos lru_end(u16 lru_id)
+{
+ return lru_pos(lru_id, U64_MAX, LRU_TIME_MAX);
+}
+
static inline enum bch_lru_type lru_type(struct bkey_s_c l)
{
u16 lru_id = l.k->p.inode >> 48;
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index a38996f5366f..30fe269d531d 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -344,9 +344,13 @@ int bch2_move_extent(struct moving_context *ctxt,
if (!data_opts.rewrite_ptrs &&
!data_opts.extra_replicas &&
!data_opts.scrub) {
- if (data_opts.kill_ptrs)
+ if (data_opts.kill_ptrs) {
+ this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size);
return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
- return 0;
+ } else {
+ this_cpu_add(c->counters[BCH_COUNTER_io_move_noop], k.k->size);
+ return 0;
+ }
}
struct moving_io *io = allocate_dropping_locks(trans, ret,
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index b0cbe3c1aab6..f36d60b8fb07 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -14,6 +14,7 @@
#include "btree_write_buffer.h"
#include "buckets.h"
#include "clock.h"
+#include "ec.h"
#include "errcode.h"
#include "error.h"
#include "lru.h"
@@ -131,72 +132,153 @@ static bool bucket_in_flight(struct buckets_in_flight *list,
return rhashtable_lookup_fast(list->table, &k, bch_move_bucket_params);
}
+static int try_add_copygc_bucket(struct btree_trans *trans,
+ struct buckets_in_flight *buckets_in_flight,
+ struct bpos bucket, u64 lru_time)
+{
+ struct move_bucket b = { .k.bucket = bucket };
+
+ int ret = bch2_bucket_is_movable(trans, &b, lru_time);
+ if (ret <= 0)
+ return ret;
+
+ if (bucket_in_flight(buckets_in_flight, b.k))
+ return 0;
+
+ struct move_bucket *b_i = kmalloc(sizeof(*b_i), GFP_KERNEL);
+ if (!b_i)
+ return -ENOMEM;
+
+ *b_i = b;
+
+ ret = darray_push(&buckets_in_flight->to_evacuate, b_i);
+ if (ret) {
+ kfree(b_i);
+ return ret;
+ }
+
+ ret = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash,
+ bch_move_bucket_params);
+ BUG_ON(ret);
+
+ size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
+ return buckets_in_flight->to_evacuate.nr >= nr_to_get;
+}
+
static int bch2_copygc_get_buckets(struct moving_context *ctxt,
struct buckets_in_flight *buckets_in_flight)
{
struct btree_trans *trans = ctxt->trans;
- struct bch_fs *c = trans->c;
- size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
- size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
- int ret;
- move_buckets_wait(ctxt, buckets_in_flight, false);
+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru,
+ lru_start(BCH_LRU_BUCKET_FRAGMENTATION),
+ lru_end(BCH_LRU_BUCKET_FRAGMENTATION),
+ 0, k,
+ try_add_copygc_bucket(trans, buckets_in_flight,
+ u64_to_bucket(k.k->p.offset),
+ lru_pos_time(k.k->p))
+ );
- ret = bch2_btree_write_buffer_tryflush(trans);
- if (bch2_err_matches(ret, EROFS))
- return ret;
+ return ret < 0 ? ret : 0;
+}
- if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
- return ret;
+static int bch2_copygc_get_stripe_buckets(struct moving_context *ctxt,
+ struct buckets_in_flight *buckets_in_flight)
+{
+ struct btree_trans *trans = ctxt->trans;
- ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru,
- lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, 0, 0),
- lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, U64_MAX, LRU_TIME_MAX),
- 0, k, ({
- struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
- int ret2 = 0;
+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru,
+ lru_start(BCH_LRU_STRIPE_FRAGMENTATION),
+ lru_end(BCH_LRU_STRIPE_FRAGMENTATION),
+ 0, lru_k, ({
+ CLASS(btree_iter, s_iter)(trans, BTREE_ID_stripes, POS(0, lru_k.k->p.offset), 0);
+ struct bkey_s_c s_k = bch2_btree_iter_peek_slot(&s_iter);
+ int ret2 = bkey_err(s_k);
+ if (ret2)
+ goto err;
- saw++;
+ if (s_k.k->type != KEY_TYPE_stripe)
+ continue;
- ret2 = bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p));
- if (ret2 < 0)
- goto err;
+ const struct bch_stripe *s = bkey_s_c_to_stripe(s_k).v;
- if (!ret2)
- not_movable++;
- else if (bucket_in_flight(buckets_in_flight, b.k))
- in_flight++;
- else {
- struct move_bucket *b_i = kmalloc(sizeof(*b_i), GFP_KERNEL);
- ret2 = b_i ? 0 : -ENOMEM;
+ /* write buffer race? */
+ if (stripe_lru_pos(s) != lru_pos_time(lru_k.k->p))
+ continue;
+
+ unsigned nr_data = s->nr_blocks - s->nr_redundant;
+ for (unsigned i = 0; i < nr_data; i++) {
+ if (!stripe_blockcount_get(s, i))
+ continue;
+
+ const struct bch_extent_ptr *ptr = s->ptrs + i;
+ CLASS(bch2_dev_tryget, ca)(trans->c, ptr->dev);
+ if (unlikely(!ca))
+ continue;
+
+ ret2 = try_add_copygc_bucket(trans, buckets_in_flight,
+ PTR_BUCKET_POS(ca, ptr), U64_MAX);
if (ret2)
- goto err;
+ break;
+ }
+err:
+ ret2;
+ }));
- *b_i = b;
+ return ret < 0 ? ret : 0;
+}
+
+static bool should_do_ec_copygc(struct btree_trans *trans)
+{
+ u64 stripe_frag_ratio = 0;
+
+ for_each_btree_key_max(trans, iter, BTREE_ID_lru,
+ lru_start(BCH_LRU_STRIPE_FRAGMENTATION),
+ lru_end(BCH_LRU_STRIPE_FRAGMENTATION),
+ 0, lru_k, ({
+ CLASS(btree_iter, s_iter)(trans, BTREE_ID_stripes, POS(0, lru_k.k->p.offset), 0);
+ struct bkey_s_c s_k = bch2_btree_iter_peek_slot(&s_iter);
+ int ret = bkey_err(s_k);
+ if (ret)
+ goto err;
- ret2 = darray_push(&buckets_in_flight->to_evacuate, b_i);
- if (ret2) {
- kfree(b_i);
- goto err;
- }
+ if (s_k.k->type != KEY_TYPE_stripe)
+ continue;
- ret2 = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash,
- bch_move_bucket_params);
- BUG_ON(ret2);
+ const struct bch_stripe *s = bkey_s_c_to_stripe(s_k).v;
- sectors += b.sectors;
- }
+ /* write buffer race? */
+ if (stripe_lru_pos(s) != lru_pos_time(lru_k.k->p))
+ continue;
- ret2 = buckets_in_flight->to_evacuate.nr >= nr_to_get;
+ unsigned nr_data = s->nr_blocks - s->nr_redundant, blocks_nonempty = 0;
+ for (unsigned i = 0; i < nr_data; i++)
+ blocks_nonempty += !!stripe_blockcount_get(s, i);
+
+ /* stripe is pending delete */
+ if (!blocks_nonempty)
+ continue;
+
+ /* This matches the calculation in alloc_lru_idx_fragmentation, so we can
+ * directly compare without actually looking up the bucket pointed to by the
+ * bucket fragmentation lru:
+ */
+ stripe_frag_ratio = div_u64(blocks_nonempty * (1ULL << 31), nr_data);
+ break;
err:
- ret2;
+ ret;
}));
- pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
- buckets_in_flight->nr, buckets_in_flight->sectors,
- saw, in_flight, not_movable, buckets_in_flight->to_evacuate.nr, sectors, nr_to_get, ret);
+ CLASS(btree_iter, iter)(trans, BTREE_ID_lru, lru_start(BCH_LRU_BUCKET_FRAGMENTATION), 0);
+ struct bkey_s_c lru_k;
- return ret < 0 ? ret : 0;
+ lockrestart_do(trans, bkey_err(lru_k = bch2_btree_iter_peek_max(&iter,
+ lru_end(BCH_LRU_BUCKET_FRAGMENTATION))));
+
+ u64 bucket_frag_ratio = lru_k.k && !bkey_err(lru_k) ? lru_pos_time(lru_k.k->p) : 0;
+
+ /* Prefer normal bucket copygc */
+ return stripe_frag_ratio && stripe_frag_ratio * 2 < bucket_frag_ratio;
}
noinline
@@ -213,7 +295,18 @@ static int bch2_copygc(struct moving_context *ctxt,
u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved);
int ret = 0;
- ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight);
+ move_buckets_wait(ctxt, buckets_in_flight, false);
+
+ ret = bch2_btree_write_buffer_tryflush(trans);
+ if (bch2_err_matches(ret, EROFS))
+ goto err;
+
+ if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
+ goto err;
+
+ ret = should_do_ec_copygc(trans)
+ ? bch2_copygc_get_stripe_buckets(ctxt, buckets_in_flight)
+ : bch2_copygc_get_buckets(ctxt, buckets_in_flight);
if (ret)
goto err;
@@ -265,7 +358,8 @@ static u64 bch2_copygc_dev_wait_amount(struct bch_dev *ca)
for (unsigned i = 0; i < BCH_DATA_NR; i++)
if (data_type_movable(i))
- fragmented += usage_full.d[i].fragmented;
+ fragmented += usage_full.d[i].buckets * ca->mi.bucket_size -
+ usage_full.d[i].sectors;
return max(0LL, fragmented_allowed - fragmented);
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index c57ff235a97a..21aa2edb13ac 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -626,93 +626,6 @@ fsck_err:
return ret;
}
-static bool check_version_upgrade(struct bch_fs *c)
-{
- unsigned latest_version = bcachefs_metadata_version_current;
- unsigned latest_compatible = min(latest_version,
- bch2_latest_compatible_version(c->sb.version));
- unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
- unsigned new_version = 0;
- bool ret = false;
-
- if (old_version < bcachefs_metadata_required_upgrade_below) {
- if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
- latest_compatible < bcachefs_metadata_required_upgrade_below)
- new_version = latest_version;
- else
- new_version = latest_compatible;
- } else {
- switch (c->opts.version_upgrade) {
- case BCH_VERSION_UPGRADE_compatible:
- new_version = latest_compatible;
- break;
- case BCH_VERSION_UPGRADE_incompatible:
- new_version = latest_version;
- break;
- case BCH_VERSION_UPGRADE_none:
- new_version = min(old_version, latest_version);
- break;
- }
- }
-
- if (new_version > old_version) {
- CLASS(printbuf, buf)();
-
- if (old_version < bcachefs_metadata_required_upgrade_below)
- prt_str(&buf, "Version upgrade required:\n");
-
- if (old_version != c->sb.version) {
- prt_str(&buf, "Version upgrade from ");
- bch2_version_to_text(&buf, c->sb.version_upgrade_complete);
- prt_str(&buf, " to ");
- bch2_version_to_text(&buf, c->sb.version);
- prt_str(&buf, " incomplete\n");
- }
-
- prt_printf(&buf, "Doing %s version upgrade from ",
- BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
- ? "incompatible" : "compatible");
- bch2_version_to_text(&buf, old_version);
- prt_str(&buf, " to ");
- bch2_version_to_text(&buf, new_version);
- prt_newline(&buf);
-
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- __le64 passes = ext->recovery_passes_required[0];
- bch2_sb_set_upgrade(c, old_version, new_version);
- passes = ext->recovery_passes_required[0] & ~passes;
-
- if (passes) {
- prt_str(&buf, " running recovery passes: ");
- prt_bitflags(&buf, bch2_recovery_passes,
- bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
- }
-
- bch_notice(c, "%s", buf.buf);
- ret = true;
- }
-
- if (new_version > c->sb.version_incompat_allowed &&
- c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
- CLASS(printbuf, buf)();
-
- prt_str(&buf, "Now allowing incompatible features up to ");
- bch2_version_to_text(&buf, new_version);
- prt_str(&buf, ", previously allowed up to ");
- bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
- prt_newline(&buf);
-
- bch_notice(c, "%s", buf.buf);
- ret = true;
- }
-
- if (ret)
- bch2_sb_upgrade(c, new_version,
- c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible);
-
- return ret;
-}
-
int bch2_fs_recovery(struct bch_fs *c)
{
struct bch_sb_field_clean *clean = NULL;
@@ -732,108 +645,6 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "recovering from unclean shutdown");
}
- if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
- bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported");
- ret = -EINVAL;
- goto err;
- }
-
- if (!c->sb.clean &&
- !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
- bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
- ret = -EINVAL;
- goto err;
- }
-
- if (c->opts.norecovery) {
- c->opts.recovery_pass_last = c->opts.recovery_pass_last
- ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
- : BCH_RECOVERY_PASS_snapshots_read;
- c->opts.nochanges = true;
- }
-
- if (c->opts.nochanges)
- c->opts.read_only = true;
-
- if (c->opts.journal_rewind) {
- bch_info(c, "rewinding journal, fsck required");
- c->opts.fsck = true;
- }
-
- if (go_rw_in_recovery(c)) {
- /*
- * start workqueues/kworkers early - kthread creation checks for
- * pending signals, which is _very_ annoying
- */
- ret = bch2_fs_init_rw(c);
- if (ret)
- goto err;
- }
-
- mutex_lock(&c->sb_lock);
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- bool write_sb = false;
-
- if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
- ext->recovery_passes_required[0] |=
- cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
- write_sb = true;
- }
-
- u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
- if (sb_passes) {
- CLASS(printbuf, buf)();
- prt_str(&buf, "superblock requires following recovery passes to be run:\n ");
- prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
- bch_info(c, "%s", buf.buf);
- }
-
- if (bch2_check_version_downgrade(c)) {
- CLASS(printbuf, buf)();
-
- prt_str(&buf, "Version downgrade required:");
-
- __le64 passes = ext->recovery_passes_required[0];
- bch2_sb_set_downgrade(c,
- BCH_VERSION_MINOR(bcachefs_metadata_version_current),
- BCH_VERSION_MINOR(c->sb.version));
- passes = ext->recovery_passes_required[0] & ~passes;
- if (passes) {
- prt_str(&buf, "\n running recovery passes: ");
- prt_bitflags(&buf, bch2_recovery_passes,
- bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
- }
-
- bch_info(c, "%s", buf.buf);
- write_sb = true;
- }
-
- if (check_version_upgrade(c))
- write_sb = true;
-
- c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
-
- if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) {
- SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe);
- write_sb = true;
- }
-
- if (write_sb)
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
- if (c->sb.clean)
- set_bit(BCH_FS_clean_recovery, &c->flags);
- if (c->opts.fsck)
- set_bit(BCH_FS_in_fsck, &c->flags);
- set_bit(BCH_FS_in_recovery, &c->flags);
-
- ret = bch2_blacklist_table_initialize(c);
- if (ret) {
- bch_err(c, "error initializing blacklist table");
- goto err;
- }
-
bch2_journal_pos_from_member_info_resume(c);
if (!c->sb.clean || c->opts.retain_recovery_info) {
@@ -1053,8 +864,8 @@ use_clean:
}
mutex_lock(&c->sb_lock);
- ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- write_sb = false;
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index 2e3a56bfd085..f3ea53a55384 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -31,6 +31,8 @@ enum counters_flags {
x(io_move_fail, 38, TYPE_COUNTER) \
x(io_move_write_fail, 82, TYPE_COUNTER) \
x(io_move_start_fail, 39, TYPE_COUNTER) \
+ x(io_move_drop_only, 91, TYPE_COUNTER) \
+ x(io_move_noop, 92, TYPE_COUNTER) \
x(io_move_created_rebalance, 83, TYPE_COUNTER) \
x(io_move_evacuate_bucket, 84, TYPE_COUNTER) \
x(bucket_invalidate, 3, TYPE_COUNTER) \
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index dd4ee46606d7..5317b1bfe2e5 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -76,6 +76,8 @@ enum bch_fsck_flags {
x(btree_node_read_error, 62, FSCK_AUTOFIX) \
x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \
x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \
+ x(btree_node_topology_bad_root_min_key, 323, FSCK_AUTOFIX) \
+ x(btree_node_topology_bad_root_max_key, 324, FSCK_AUTOFIX) \
x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \
x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \
x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \
@@ -334,7 +336,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 323, 0)
+ x(MAX, 325, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 0573c7b00151..e3c73d903898 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -68,34 +68,13 @@ struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
}
-static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
-{
- struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
- memset(&ret, 0, sizeof(ret));
- memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
- return ret;
-}
-
-static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
-{
- return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
-}
-
-static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
-{
- struct bch_member ret, *p = members_v1_get_mut(mi, i);
- memset(&ret, 0, sizeof(ret));
- memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
- return ret;
-}
-
struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
{
struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
if (mi2)
- return members_v2_get(mi2, i);
+ return bch2_members_v2_get(mi2, i);
struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
- return members_v1_get(mi1, i);
+ return bch2_members_v1_get(mi1, i);
}
static int sb_members_v2_resize_entries(struct bch_fs *c)
@@ -211,33 +190,25 @@ static int validate_member(struct printbuf *err,
return 0;
}
-static void member_to_text(struct printbuf *out,
- struct bch_member m,
- struct bch_sb_field_disk_groups *gi,
- struct bch_sb *sb,
- int i)
+void bch2_member_to_text(struct printbuf *out,
+ struct bch_member *m,
+ struct bch_sb_field_disk_groups *gi,
+ struct bch_sb *sb,
+ unsigned idx)
{
- unsigned data_have = bch2_sb_dev_has_data(sb, i);
- u64 bucket_size = le16_to_cpu(m.bucket_size);
- u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
-
- if (!bch2_member_alive(&m))
- return;
-
- prt_printf(out, "Device:\t%u\n", i);
-
- printbuf_indent_add(out, 2);
+ u64 bucket_size = le16_to_cpu(m->bucket_size);
+ u64 device_size = le64_to_cpu(m->nbuckets) * bucket_size;
prt_printf(out, "Label:\t");
- if (BCH_MEMBER_GROUP(&m))
+ if (BCH_MEMBER_GROUP(m))
bch2_disk_path_to_text_sb(out, sb,
- BCH_MEMBER_GROUP(&m) - 1);
+ BCH_MEMBER_GROUP(m) - 1);
else
prt_printf(out, "(none)");
prt_newline(out);
prt_printf(out, "UUID:\t");
- pr_uuid(out, m.uuid.b);
+ pr_uuid(out, m->uuid.b);
prt_newline(out);
prt_printf(out, "Size:\t");
@@ -245,40 +216,41 @@ static void member_to_text(struct printbuf *out,
prt_newline(out);
for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
- prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i]));
+ prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m->errors[i]));
for (unsigned i = 0; i < BCH_IOPS_NR; i++)
- prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i]));
+ prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m->iops[i]));
prt_printf(out, "Bucket size:\t");
prt_units_u64(out, bucket_size << 9);
prt_newline(out);
- prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket));
- prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets));
+ prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m->first_bucket));
+ prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m->nbuckets));
prt_printf(out, "Last mount:\t");
- if (m.last_mount)
- bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
+ if (m->last_mount)
+ bch2_prt_datetime(out, le64_to_cpu(m->last_mount));
else
prt_printf(out, "(never)");
prt_newline(out);
- prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq));
+ prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m->seq));
prt_printf(out, "State:\t%s\n",
- BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
- ? bch2_member_states[BCH_MEMBER_STATE(&m)]
+ BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
+ ? bch2_member_states[BCH_MEMBER_STATE(m)]
: "unknown");
prt_printf(out, "Data allowed:\t");
- if (BCH_MEMBER_DATA_ALLOWED(&m))
- prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
+ if (BCH_MEMBER_DATA_ALLOWED(m))
+ prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(m));
else
prt_printf(out, "(none)");
prt_newline(out);
prt_printf(out, "Has data:\t");
+ unsigned data_have = bch2_sb_dev_has_data(sb, idx);
if (data_have)
prt_bitflags(out, __bch2_data_types, data_have);
else
@@ -286,22 +258,36 @@ static void member_to_text(struct printbuf *out,
prt_newline(out);
prt_printf(out, "Btree allocated bitmap blocksize:\t");
- if (m.btree_bitmap_shift < 64)
- prt_units_u64(out, 1ULL << m.btree_bitmap_shift);
+ if (m->btree_bitmap_shift < 64)
+ prt_units_u64(out, 1ULL << m->btree_bitmap_shift);
else
- prt_printf(out, "(invalid shift %u)", m.btree_bitmap_shift);
+ prt_printf(out, "(invalid shift %u)", m->btree_bitmap_shift);
prt_newline(out);
prt_printf(out, "Btree allocated bitmap:\t");
- bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64);
+ bch2_prt_u64_base2_nbits(out, le64_to_cpu(m->btree_allocated_bitmap), 64);
prt_newline(out);
- prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
+ prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(m) ? BCH_MEMBER_DURABILITY(m) - 1 : 1);
+
+ prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(m));
+ prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(m));
+ prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(m));
+}
+
+static void member_to_text(struct printbuf *out,
+ struct bch_member m,
+ struct bch_sb_field_disk_groups *gi,
+ struct bch_sb *sb,
+ unsigned idx)
+{
+ if (!bch2_member_alive(&m))
+ return;
- prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m));
- prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
- prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(&m));
+ prt_printf(out, "Device:\t%u\n", idx);
+ printbuf_indent_add(out, 2);
+ bch2_member_to_text(out, &m, gi, sb, idx);
printbuf_indent_sub(out, 2);
}
@@ -317,7 +303,7 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f
}
for (i = 0; i < sb->nr_devices; i++) {
- struct bch_member m = members_v1_get(mi, i);
+ struct bch_member m = bch2_members_v1_get(mi, i);
int ret = validate_member(err, m, sb, i);
if (ret)
@@ -343,7 +329,7 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices);
for (unsigned i = 0; i < min(sb->nr_devices, nr); i++)
- member_to_text(out, members_v1_get(mi, i), gi, sb, i);
+ member_to_text(out, bch2_members_v1_get(mi, i), gi, sb, i);
}
const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
@@ -377,7 +363,7 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
*/
for (unsigned i = 0; i < min(sb->nr_devices, nr); i++)
- member_to_text(out, members_v2_get(mi, i), gi, sb, i);
+ member_to_text(out, bch2_members_v2_get(mi, i), gi, sb, i);
}
static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f,
@@ -394,7 +380,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
}
for (unsigned i = 0; i < sb->nr_devices; i++) {
- int ret = validate_member(err, members_v2_get(mi, i), sb, i);
+ int ret = validate_member(err, bch2_members_v2_get(mi, i), sb, i);
if (ret)
return ret;
}
@@ -430,7 +416,7 @@ void bch2_sb_members_to_cpu(struct bch_fs *c)
struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(c->disk_sb.sb, members_v2);
if (mi2)
for (unsigned i = 0; i < c->sb.nr_devices; i++) {
- struct bch_member m = members_v2_get(mi2, i);
+ struct bch_member m = bch2_members_v2_get(mi2, i);
bool removed = uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
mod_bit(i, c->devs_removed.d, removed);
}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 35d4ab9b6197..6de999cf71cb 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -14,11 +14,36 @@ __bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i)
return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
}
+static inline struct bch_member bch2_members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
+{
+ struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
+ memset(&ret, 0, sizeof(ret));
+ memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
+ return ret;
+}
+
+static inline struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
+{
+ return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
+}
+
+static inline struct bch_member bch2_members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
+{
+ struct bch_member ret, *p = members_v1_get_mut(mi, i);
+ memset(&ret, 0, sizeof(ret));
+ memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
+ return ret;
+}
+
int bch2_sb_members_v2_init(struct bch_fs *c);
int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
+void bch2_member_to_text(struct printbuf *, struct bch_member *,
+ struct bch_sb_field_disk_groups *,
+ struct bch_sb *, unsigned);
+
static inline bool bch2_dev_is_online(struct bch_dev *ca)
{
return !enumerated_ref_is_zero(&ca->io_ref[READ]);
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 5a1f81749661..84f987d3a02a 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1146,7 +1146,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
if (bch2_fs_inconsistent_on(i == 2, c,
"snapshot %u missing child pointer to %u",
parent_id, id))
- return ret;
+ return bch_err_throw(c, ENOENT_snapshot);
parent->v.children[i] = cpu_to_le32(child_id);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index b0019488f586..ef15e614f4f3 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -55,6 +55,7 @@
#include "replicas.h"
#include "sb-clean.h"
#include "sb-counters.h"
+#include "sb-downgrade.h"
#include "sb-errors.h"
#include "sb-members.h"
#include "snapshot.h"
@@ -842,6 +843,233 @@ int bch2_fs_init_rw(struct bch_fs *c)
return 0;
}
+static bool check_version_upgrade(struct bch_fs *c)
+{
+ unsigned latest_version = bcachefs_metadata_version_current;
+ unsigned latest_compatible = min(latest_version,
+ bch2_latest_compatible_version(c->sb.version));
+ unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
+ unsigned new_version = 0;
+ bool ret = false;
+
+ if (old_version < bcachefs_metadata_required_upgrade_below) {
+ if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
+ latest_compatible < bcachefs_metadata_required_upgrade_below)
+ new_version = latest_version;
+ else
+ new_version = latest_compatible;
+ } else {
+ switch (c->opts.version_upgrade) {
+ case BCH_VERSION_UPGRADE_compatible:
+ new_version = latest_compatible;
+ break;
+ case BCH_VERSION_UPGRADE_incompatible:
+ new_version = latest_version;
+ break;
+ case BCH_VERSION_UPGRADE_none:
+ new_version = min(old_version, latest_version);
+ break;
+ }
+ }
+
+ if (new_version > old_version) {
+ CLASS(printbuf, buf)();
+
+ if (old_version < bcachefs_metadata_required_upgrade_below)
+ prt_str(&buf, "Version upgrade required:\n");
+
+ if (old_version != c->sb.version) {
+ prt_str(&buf, "Version upgrade from ");
+ bch2_version_to_text(&buf, c->sb.version_upgrade_complete);
+ prt_str(&buf, " to ");
+ bch2_version_to_text(&buf, c->sb.version);
+ prt_str(&buf, " incomplete\n");
+ }
+
+ prt_printf(&buf, "Doing %s version upgrade from ",
+ BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
+ ? "incompatible" : "compatible");
+ bch2_version_to_text(&buf, old_version);
+ prt_str(&buf, " to ");
+ bch2_version_to_text(&buf, new_version);
+ prt_newline(&buf);
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_upgrade(c, old_version, new_version);
+ passes = ext->recovery_passes_required[0] & ~passes;
+
+ if (passes) {
+ prt_str(&buf, " running recovery passes: ");
+ prt_bitflags(&buf, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
+
+ bch_notice(c, "%s", buf.buf);
+ ret = true;
+ }
+
+ if (new_version > c->sb.version_incompat_allowed &&
+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
+ CLASS(printbuf, buf)();
+
+ prt_str(&buf, "Now allowing incompatible features up to ");
+ bch2_version_to_text(&buf, new_version);
+ prt_str(&buf, ", previously allowed up to ");
+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
+ prt_newline(&buf);
+
+ bch_notice(c, "%s", buf.buf);
+ ret = true;
+ }
+
+ if (ret)
+ bch2_sb_upgrade(c, new_version,
+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible);
+
+ return ret;
+}
+
+noinline_for_stack
+static int bch2_fs_opt_version_init(struct bch_fs *c)
+{
+ int ret = 0;
+
+ if (c->opts.norecovery) {
+ c->opts.recovery_pass_last = c->opts.recovery_pass_last
+ ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
+ : BCH_RECOVERY_PASS_snapshots_read;
+ c->opts.nochanges = true;
+ }
+
+ if (c->opts.nochanges)
+ c->opts.read_only = true;
+
+ if (c->opts.journal_rewind)
+ c->opts.fsck = true;
+
+ CLASS(printbuf, p)();
+ bch2_log_msg_start(c, &p);
+
+ prt_str(&p, "starting version ");
+ bch2_version_to_text(&p, c->sb.version);
+
+ bool first = true;
+ for (enum bch_opt_id i = 0; i < bch2_opts_nr; i++) {
+ const struct bch_option *opt = &bch2_opt_table[i];
+ u64 v = bch2_opt_get_by_id(&c->opts, i);
+
+ if (!(opt->flags & OPT_MOUNT))
+ continue;
+
+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
+ continue;
+
+ prt_str(&p, first ? " opts=" : ",");
+ first = false;
+ bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
+ }
+
+ if (c->sb.version_incompat_allowed != c->sb.version) {
+ prt_printf(&p, "\nallowing incompatible features above ");
+ bch2_version_to_text(&p, c->sb.version_incompat_allowed);
+ }
+
+ if (c->opts.verbose) {
+ prt_printf(&p, "\nfeatures: ");
+ prt_bitflags(&p, bch2_sb_features, c->sb.features);
+ }
+
+ if (c->sb.multi_device) {
+ prt_printf(&p, "\nwith devices");
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) {
+ prt_char(&p, ' ');
+ prt_str(&p, ca->name);
+ }
+ }
+
+ if (c->cf_encoding)
+ prt_printf(&p, "\nUsing encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+
+ if (c->opts.journal_rewind)
+ prt_printf(&p, "\nrewinding journal, fsck required");
+
+ scoped_guard(mutex, &c->sb_lock) {
+ struct bch_sb_field_ext *ext = bch2_sb_field_get_minsize(&c->disk_sb, ext,
+ sizeof(struct bch_sb_field_ext) / sizeof(u64));
+ if (!ext)
+ return bch_err_throw(c, ENOSPC_sb);
+
+ ret = bch2_sb_members_v2_init(c);
+ if (ret)
+ return ret;
+
+ __le64 now = cpu_to_le64(ktime_get_real_seconds());
+ for_each_online_member_rcu(c, ca)
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = now;
+
+ if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))
+ ext->recovery_passes_required[0] |=
+ cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
+
+ u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ if (sb_passes) {
+ prt_str(&p, "\nsuperblock requires following recovery passes to be run:\n ");
+ prt_bitflags(&p, bch2_recovery_passes, sb_passes);
+ }
+
+ if (bch2_check_version_downgrade(c)) {
+ prt_str(&p, "\nVersion downgrade required:");
+
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_downgrade(c,
+ BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+ BCH_VERSION_MINOR(c->sb.version));
+ passes = ext->recovery_passes_required[0] & ~passes;
+ if (passes) {
+ prt_str(&p, "\nrunning recovery passes: ");
+ prt_bitflags(&p, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
+ }
+
+ check_version_upgrade(c);
+
+ c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
+ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors)
+ SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe);
+
+ /* Don't write the superblock, defer that until we go rw */
+ }
+
+ if (c->sb.clean)
+ set_bit(BCH_FS_clean_recovery, &c->flags);
+ if (c->opts.fsck)
+ set_bit(BCH_FS_in_fsck, &c->flags);
+ set_bit(BCH_FS_in_recovery, &c->flags);
+
+ bch2_print_str(c, KERN_INFO, p.buf);
+
+ if (BCH_SB_INITIALIZED(c->disk_sb.sb)) {
+ if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
+ bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported");
+ return -EINVAL;
+ }
+
+ if (!c->sb.clean &&
+ !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
+ bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch_sb_handles *sbs)
{
@@ -1013,6 +1241,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
ret =
bch2_fs_async_obj_init(c) ?:
+ bch2_blacklist_table_initialize(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
@@ -1063,7 +1292,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
}
#endif
- for (i = 0; i < c->sb.nr_devices; i++) {
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
ret = bch2_dev_alloc(c, i);
@@ -1078,6 +1307,20 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
&c->clock_journal_res,
(sizeof(struct jset_entry_clock) / sizeof(u64)) * 2);
+ ret = bch2_fs_opt_version_init(c);
+ if (ret)
+ goto err;
+
+ /*
+ * start workqueues/kworkers early - kthread creation checks for pending
+ * signals, which is _very_ annoying
+ */
+ if (go_rw_in_recovery(c)) {
+ ret = bch2_fs_init_rw(c);
+ if (ret)
+ goto err;
+ }
+
scoped_guard(mutex, &bch_fs_list_lock)
ret = bch2_fs_online(c);
@@ -1093,53 +1336,6 @@ err:
goto out;
}
-noinline_for_stack
-static void print_mount_opts(struct bch_fs *c)
-{
- enum bch_opt_id i;
- CLASS(printbuf, p)();
- bch2_log_msg_start(c, &p);
-
- prt_str(&p, "starting version ");
- bch2_version_to_text(&p, c->sb.version);
-
- bool first = true;
- for (i = 0; i < bch2_opts_nr; i++) {
- const struct bch_option *opt = &bch2_opt_table[i];
- u64 v = bch2_opt_get_by_id(&c->opts, i);
-
- if (!(opt->flags & OPT_MOUNT))
- continue;
-
- if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
- continue;
-
- prt_str(&p, first ? " opts=" : ",");
- first = false;
- bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
- }
-
- if (c->sb.version_incompat_allowed != c->sb.version) {
- prt_printf(&p, "\nallowing incompatible features above ");
- bch2_version_to_text(&p, c->sb.version_incompat_allowed);
- }
-
- if (c->opts.verbose) {
- prt_printf(&p, "\nfeatures: ");
- prt_bitflags(&p, bch2_sb_features, c->sb.features);
- }
-
- if (c->sb.multi_device) {
- prt_printf(&p, "\nwith devices");
- for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) {
- prt_char(&p, ' ');
- prt_str(&p, ca->name);
- }
- }
-
- bch2_print_str(c, KERN_INFO, p.buf);
-}
-
static bool bch2_fs_may_start(struct bch_fs *c)
{
struct bch_dev *ca;
@@ -1174,38 +1370,16 @@ static bool bch2_fs_may_start(struct bch_fs *c)
int bch2_fs_start(struct bch_fs *c)
{
- time64_t now = ktime_get_real_seconds();
int ret = 0;
BUG_ON(test_bit(BCH_FS_started, &c->flags));
- print_mount_opts(c);
-
- if (c->cf_encoding)
- bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
-
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);
scoped_guard(rwsem_write, &c->state_lock) {
- guard(mutex)(&c->sb_lock);
- if (!bch2_sb_field_get_minsize(&c->disk_sb, ext,
- sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
- ret = bch_err_throw(c, ENOSPC_sb);
- goto err;
- }
-
- ret = bch2_sb_members_v2_init(c);
- if (ret)
- goto err;
-
scoped_guard(rcu)
for_each_online_member_rcu(c, ca) {
- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
- cpu_to_le64(now);
if (ca->mi.state == BCH_MEMBER_STATE_rw)
bch2_dev_allocator_add(c, ca);
}