summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-03-11 14:59:58 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2019-04-17 17:11:25 -0400
commit7fee13da1c9b45e8e92224d3c9b2a9a0ea854ba9 (patch)
tree3151f6d63a8a664b9b2964872f6d2a2f01180c61
parent073cdc0311f6be9afef05966a22747036e7da3bc (diff)
bcachefs: bch2_trans_mark_update()
-rw-r--r--fs/bcachefs/alloc_background.c6
-rw-r--r--fs/bcachefs/alloc_background.h2
-rw-r--r--fs/bcachefs/bcachefs_format.h1
-rw-r--r--fs/bcachefs/btree_iter.c4
-rw-r--r--fs/bcachefs/btree_types.h4
-rw-r--r--fs/bcachefs/btree_update.h13
-rw-r--r--fs/bcachefs/btree_update_leaf.c119
-rw-r--r--fs/bcachefs/buckets.c486
-rw-r--r--fs/bcachefs/buckets.h11
-rw-r--r--fs/bcachefs/buckets_types.h13
-rw-r--r--fs/bcachefs/ec.c23
-rw-r--r--fs/bcachefs/extents.c45
-rw-r--r--fs/bcachefs/migrate.c3
-rw-r--r--fs/bcachefs/move.c2
-rw-r--r--fs/bcachefs/recovery.c100
-rw-r--r--fs/bcachefs/replicas.c8
-rw-r--r--fs/bcachefs/replicas.h1
-rw-r--r--fs/bcachefs/super-io.c3
18 files changed, 702 insertions, 142 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index fabe8d364809..f7b6e8c793c5 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -140,8 +140,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
return ret;
}
-static void bch2_alloc_pack(struct bkey_i_alloc *dst,
- const struct bkey_alloc_unpacked src)
+void bch2_alloc_pack(struct bkey_i_alloc *dst,
+ const struct bkey_alloc_unpacked src)
{
unsigned idx = 0;
void *d = dst->v.data;
@@ -961,7 +961,6 @@ retry:
invalidating_cached_data = m.cached_sectors != 0;
- //BUG_ON(u.dirty_sectors);
u.data_type = 0;
u.dirty_sectors = 0;
u.cached_sectors = 0;
@@ -973,6 +972,7 @@ retry:
* we have to trust the in memory bucket @m, not the version in the
* btree:
*/
+ //BUG_ON(u.dirty_sectors);
u.gen = m.gen + 1;
a = bkey_alloc_init(&alloc_key.k);
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 057af2fa9bf4..39c31e391fe4 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -13,6 +13,8 @@ struct bkey_alloc_unpacked {
};
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
+void bch2_alloc_pack(struct bkey_i_alloc *,
+ const struct bkey_alloc_unpacked);
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index d390ac860d18..be6acec19671 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1295,6 +1295,7 @@ enum bch_sb_features {
enum bch_sb_compat {
BCH_COMPAT_FEAT_ALLOC_INFO = 0,
+ BCH_COMPAT_FEAT_ALLOC_METADATA = 1,
};
/* options: */
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 33cbc2ff5c9a..49ddf05cc9a9 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1002,7 +1002,7 @@ retry_all:
goto retry_all;
}
- ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0;
+ ret = hweight64(trans->iters_live) > 1 ? -EINTR : 0;
out:
bch2_btree_cache_cannibalize_unlock(c);
return ret;
@@ -1100,8 +1100,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
if (unlikely(ret))
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
- BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
-
return ret;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index a995efc73fdc..ae273ab7aa1a 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -6,6 +6,7 @@
#include <linux/six.h>
#include "bkey_methods.h"
+#include "buckets_types.h"
#include "journal_types.h"
struct open_bucket;
@@ -260,6 +261,7 @@ struct btree_insert_entry {
};
bool deferred;
+ bool triggered;
};
#define BTREE_ITER_MAX 64
@@ -297,6 +299,8 @@ struct btree_trans {
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
+
+ struct replicas_delta_list fs_usage_deltas;
};
#define BTREE_FLAG(flag) \
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 2c4b8c797974..be11efdcbe04 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -42,8 +42,11 @@ enum {
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
+ __BTREE_INSERT_NOMARK_INSERT,
__BTREE_INSERT_NOMARK_OVERWRITES,
__BTREE_INSERT_NOMARK,
+ __BTREE_INSERT_MARK_INMEM,
+ __BTREE_INSERT_NO_CLEAR_REPLICAS,
__BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD,
__BCH_HASH_SET_MUST_CREATE,
@@ -76,12 +79,20 @@ enum {
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
+/* Don't mark new key, just overwrites: */
+#define BTREE_INSERT_NOMARK_INSERT (1 << __BTREE_INSERT_NOMARK_INSERT)
+
/* Don't mark overwrites, just new key: */
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
-/* Don't call bch2_mark_key: */
+/* Don't call mark new key at all: */
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
+/* Don't mark transactionally: */
+#define BTREE_INSERT_MARK_INMEM (1 << __BTREE_INSERT_MARK_INMEM)
+
+#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
+
/* Don't block on allocation failure (for new btree nodes: */
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 1a72a584ebef..d052ca541965 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -524,6 +524,22 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
btree_insert_key_deferred(trans, insert);
}
+static inline bool update_triggers_transactional(struct btree_trans *trans,
+ struct btree_insert_entry *i)
+{
+ return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
+ (i->iter->btree_id == BTREE_ID_EXTENTS ||
+ i->iter->btree_id == BTREE_ID_INODES);
+}
+
+static inline bool update_has_triggers(struct btree_trans *trans,
+ struct btree_insert_entry *i)
+{
+ return likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
+ !i->deferred &&
+ btree_node_type_needs_gc(i->iter->btree_id);
+}
+
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
@@ -536,29 +552,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct btree_iter *linked;
int ret;
+ if (likely(!(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS))) {
+ memset(&trans->fs_usage_deltas.fs_usage, 0,
+ sizeof(trans->fs_usage_deltas.fs_usage));
+ trans->fs_usage_deltas.top = trans->fs_usage_deltas.d;
+ }
+
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
- btree_trans_lock_write(c, trans);
-
- if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
- trans_for_each_update_iter(trans, i) {
- if (i->deferred ||
- !btree_node_type_needs_gc(i->iter->btree_id))
- continue;
-
- if (!fs_usage) {
- percpu_down_read(&c->mark_lock);
- fs_usage = bch2_fs_usage_scratch_get(c);
- }
-
- if (!bch2_bkey_replicas_marked_locked(c,
- bkey_i_to_s_c(i->k), true)) {
- ret = BTREE_INSERT_NEED_MARK_REPLICAS;
- goto out;
- }
+ trans_for_each_update_iter(trans, i)
+ if (update_has_triggers(trans, i) &&
+ update_triggers_transactional(trans, i)) {
+ ret = bch2_trans_mark_update(trans, i,
+ &trans->fs_usage_deltas);
+ if (ret)
+ return ret;
}
- }
+
+ btree_trans_lock_write(c, trans);
if (race_fault()) {
ret = -EINTR;
@@ -575,6 +587,23 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
if (ret)
goto out;
+ trans_for_each_update_iter(trans, i) {
+ if (i->deferred ||
+ !btree_node_type_needs_gc(i->iter->btree_id))
+ continue;
+
+ if (!fs_usage) {
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
+ }
+
+ if (!bch2_bkey_replicas_marked_locked(c,
+ bkey_i_to_s_c(i->k), true)) {
+ ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+ goto out;
+ }
+ }
+
/*
* Don't get journal reservation until after we know insert will
* succeed:
@@ -603,20 +632,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
linked->flags |= BTREE_ITER_NOUNLOCK;
}
- if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
- trans_for_each_update_iter(trans, i)
+ trans_for_each_update_iter(trans, i)
+ if (update_has_triggers(trans, i) &&
+ !update_triggers_transactional(trans, i))
bch2_mark_update(trans, i, fs_usage, 0);
- if (fs_usage)
- bch2_trans_fs_usage_apply(trans, fs_usage);
-
- if (unlikely(c->gc_pos.phase)) {
- trans_for_each_update_iter(trans, i)
- if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
- bch2_mark_update(trans, i, NULL,
- BCH_BUCKET_MARK_GC);
- }
+
+ if (fs_usage) {
+ bch2_replicas_delta_list_apply(c, fs_usage,
+ &trans->fs_usage_deltas);
+ bch2_trans_fs_usage_apply(trans, fs_usage);
}
+ if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
+ unlikely(c->gc_pos.phase))
+ trans_for_each_update_iter(trans, i)
+ if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
+ bch2_mark_update(trans, i, NULL,
+ BCH_BUCKET_MARK_GC);
+
trans_for_each_update(trans, i)
do_btree_insert_one(trans, i);
out:
@@ -643,6 +676,19 @@ int bch2_trans_commit_error(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
unsigned flags = trans->flags;
+ struct btree_insert_entry *src, *dst;
+
+ src = dst = trans->updates;
+
+ while (src < trans->updates + trans->nr_updates) {
+ if (!src->triggered) {
+ *dst = *src;
+ dst++;
+ }
+ src++;
+ }
+
+ trans->nr_updates = dst - trans->updates;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
@@ -800,6 +846,7 @@ int bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
+ unsigned orig_mem_top = trans->mem_top;
int ret = 0;
if (!trans->nr_updates)
@@ -877,8 +924,16 @@ out_noupdates:
return ret;
err:
ret = bch2_trans_commit_error(trans, i, ret);
- if (!ret)
+
+ /* can't loop if it was passed in and we changed it: */
+ if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
+ ret = -EINTR;
+
+ if (!ret) {
+ /* free memory used by triggers, they'll be reexecuted: */
+ trans->mem_top = orig_mem_top;
goto retry;
+ }
goto out;
}
@@ -961,6 +1016,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
int ret = 0;
bch2_trans_init(&trans, c);
+ bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
@@ -1006,5 +1062,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
}
bch2_trans_exit(&trans);
+ BUG_ON(ret == -EINTR);
return ret;
}
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 3834b150a0d1..6d452311d924 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -646,19 +646,16 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = __bucket(ca, k.k->p.offset, gc);
- /*
- * this should currently only be getting called from the bucket
- * invalidate path:
- */
- BUG_ON(u.dirty_sectors);
- BUG_ON(u.cached_sectors);
- BUG_ON(!g->mark.owned_by_allocator);
-
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
m.gen = u.gen;
m.data_type = u.data_type;
m.dirty_sectors = u.dirty_sectors;
m.cached_sectors = u.cached_sectors;
+
+ if (!(flags & BCH_BUCKET_MARK_GC)) {
+ m.journal_seq_valid = 1;
+ m.journal_seq = journal_seq;
+ }
}));
g->io_time[READ] = u.read_time;
@@ -666,6 +663,11 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
g->oldest_gen = u.oldest_gen;
g->gen_valid = 1;
+ /*
+ * need to know if we're getting called from the invalidate path or
+ * not:
+ */
+
if (old.cached_sectors) {
update_cached_sectors(c, fs_usage, ca->dev_idx,
-old.cached_sectors);
@@ -751,11 +753,34 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
}
}
-/*
- * Checking against gc's position has to be done here, inside the cmpxchg()
- * loop, to avoid racing with the start of gc clearing all the marks - GC does
- * that with the gc pos seqlock held.
- */
+static void bucket_set_stripe(struct bch_fs *c,
+ const struct bch_stripe *v,
+ bool enabled,
+ struct bch_fs_usage *fs_usage,
+ u64 journal_seq,
+ bool gc)
+{
+ unsigned i;
+
+ for (i = 0; i < v->nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = v->ptrs + i;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bucket *g = PTR_BUCKET(ca, ptr, gc);
+ struct bucket_mark new, old;
+
+ BUG_ON(ptr_stale(ca, ptr));
+
+ old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+ new.dirty = true;
+ new.stripe = enabled;
+ if (journal_seq) {
+ new.journal_seq_valid = 1;
+ new.journal_seq = journal_seq;
+ }
+ }));
+ }
+}
+
static bool bch2_mark_pointer(struct bch_fs *c,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
@@ -765,8 +790,7 @@ static bool bch2_mark_pointer(struct bch_fs *c,
{
struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- size_t b = PTR_BUCKET_NR(ca, &p.ptr);
- struct bucket *g = __bucket(ca, b, gc);
+ struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
bool overflow;
u64 v;
@@ -935,35 +959,6 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
return 0;
}
-static void bucket_set_stripe(struct bch_fs *c,
- const struct bch_stripe *v,
- bool enabled,
- struct bch_fs_usage *fs_usage,
- u64 journal_seq,
- bool gc)
-{
- unsigned i;
-
- for (i = 0; i < v->nr_blocks; i++) {
- const struct bch_extent_ptr *ptr = v->ptrs + i;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- size_t b = PTR_BUCKET_NR(ca, ptr);
- struct bucket *g = __bucket(ca, b, gc);
- struct bucket_mark new, old;
-
- BUG_ON(ptr_stale(ca, ptr));
-
- old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
- new.dirty = true;
- new.stripe = enabled;
- if (journal_seq) {
- new.journal_seq_valid = 1;
- new.journal_seq = journal_seq;
- }
- }));
- }
-}
-
static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bool inserting,
struct bch_fs_usage *fs_usage,
@@ -995,14 +990,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
m->nr_blocks = s.v->nr_blocks;
m->nr_redundant = s.v->nr_redundant;
- memset(&m->r, 0, sizeof(m->r));
-
- m->r.e.data_type = BCH_DATA_USER;
- m->r.e.nr_devs = s.v->nr_blocks;
- m->r.e.nr_required = s.v->nr_blocks - s.v->nr_redundant;
-
- for (i = 0; i < s.v->nr_blocks; i++)
- m->r.e.devs[i] = s.v->ptrs[i].dev;
+ bch2_bkey_to_replicas(&m->r.e, k);
/*
* XXX: account for stripes somehow here
@@ -1169,10 +1157,11 @@ int bch2_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
- bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
- bpos_min(insert->k->k.p, b->key.k.p).offset -
- bkey_start_offset(&insert->k->k),
- fs_usage, trans->journal_res.seq, flags);
+ if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
+ bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
+ bpos_min(insert->k->k.p, b->key.k.p).offset -
+ bkey_start_offset(&insert->k->k),
+ fs_usage, trans->journal_res.seq, flags);
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
return 0;
@@ -1251,6 +1240,391 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
}
}
+/* trans_mark: */
+
+static inline void update_replicas_list(struct replicas_delta_list *d,
+ struct bch_replicas_entry *r,
+ s64 sectors)
+{
+ d->top->delta = sectors;
+ memcpy(&d->top->r, r, replicas_entry_bytes(r));
+
+ d->top = (void *) d->top + replicas_entry_bytes(r) + 8;
+
+ BUG_ON((void *) d->top > (void *) d->d + sizeof(d->pad));
+}
+
+static inline void update_cached_sectors_list(struct replicas_delta_list *d,
+ unsigned dev, s64 sectors)
+{
+ struct bch_replicas_padded r;
+
+ bch2_replicas_entry_cached(&r.e, dev);
+
+ update_replicas_list(d, &r.e, sectors);
+}
+
+void bch2_replicas_delta_list_apply(struct bch_fs *c,
+ struct bch_fs_usage *fs_usage,
+ struct replicas_delta_list *r)
+{
+ struct replicas_delta *d = r->d;
+
+ acc_u64s((u64 *) fs_usage,
+ (u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
+
+ while (d != r->top) {
+ BUG_ON((void *) d > (void *) r->top);
+
+ update_replicas(c, fs_usage, &d->r, d->delta);
+
+ d = (void *) d + replicas_entry_bytes(&d->r) + 8;
+ }
+}
+
+static int trans_get_key(struct btree_trans *trans,
+ enum btree_id btree_id, struct bpos pos,
+ struct btree_insert_entry **insert,
+ struct btree_iter **iter,
+ struct bkey_s_c *k)
+{
+ unsigned i;
+ int ret;
+
+ *insert = NULL;
+
+ for (i = 0; i < trans->nr_updates; i++)
+ if (!trans->updates[i].deferred &&
+ trans->updates[i].iter->btree_id == btree_id &&
+ !bkey_cmp(pos, trans->updates[i].iter->pos)) {
+ *insert = &trans->updates[i];
+ *iter = (*insert)->iter;
+ *k = bkey_i_to_s_c((*insert)->k);
+ return 0;
+ }
+
+ *iter = __bch2_trans_get_iter(trans, btree_id, pos,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, 0);
+ if (IS_ERR(*iter))
+ return PTR_ERR(*iter);
+
+ *k = bch2_btree_iter_peek_slot(*iter);
+ ret = bkey_err(*k);
+ if (ret)
+ bch2_trans_iter_put(trans, *iter);
+ return ret;
+}
+
+static int trans_update_key(struct btree_trans *trans,
+ struct btree_insert_entry **insert,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ unsigned extra_u64s)
+{
+ struct bkey_i *new_k;
+
+ if (*insert)
+ return 0;
+
+ new_k = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
+ extra_u64s * sizeof(u64));
+ if (IS_ERR(new_k))
+ return PTR_ERR(new_k);
+
+ *insert = bch2_trans_update(trans, ((struct btree_insert_entry) {
+ .iter = iter,
+ .k = new_k,
+ .triggered = true,
+ }));
+
+ bkey_reassemble((*insert)->k, k);
+ return 0;
+}
+
+static int bch2_trans_mark_pointer(struct btree_trans *trans,
+ struct extent_ptr_decoded p,
+ s64 sectors, enum bch_data_type data_type,
+ struct replicas_delta_list *d)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct btree_insert_entry *insert;
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ struct bkey_alloc_unpacked u;
+ struct bkey_i_alloc *a;
+ bool overflow;
+ int ret;
+
+ ret = trans_get_key(trans, BTREE_ID_ALLOC,
+ POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
+ &insert, &iter, &k);
+ if (ret)
+ return ret;
+
+ if (k.k->type != KEY_TYPE_alloc) {
+ bch_err_ratelimited(c, "pointer to nonexistent bucket %u:%zu",
+ p.ptr.dev,
+ PTR_BUCKET_NR(ca, &p.ptr));
+ ret = -1;
+ goto out;
+ }
+
+ u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
+
+ if (gen_after(u.gen, p.ptr.gen)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (!p.ptr.cached)
+ overflow = checked_add(u.dirty_sectors, sectors);
+ else
+ overflow = checked_add(u.cached_sectors, sectors);
+
+ u.data_type = u.dirty_sectors || u.cached_sectors
+ ? data_type : 0;
+
+ bch2_fs_inconsistent_on(overflow, c,
+ "bucket sector count overflow: %u + %lli > U16_MAX",
+ !p.ptr.cached
+ ? u.dirty_sectors
+ : u.cached_sectors, sectors);
+
+ ret = trans_update_key(trans, &insert, iter, k, 1);
+ if (ret)
+ goto out;
+
+ a = bkey_alloc_init(insert->k);
+ a->k.p = iter->pos;
+ bch2_alloc_pack(a, u);
+out:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
+ struct bch_extent_stripe_ptr p,
+ s64 sectors, enum bch_data_type data_type,
+ struct replicas_delta_list *d)
+{
+ struct bch_replicas_padded r;
+ struct btree_insert_entry *insert;
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ struct bkey_s_stripe s;
+ unsigned nr_data;
+ s64 parity_sectors;
+ int ret = 0;
+
+ BUG_ON(!sectors);
+
+ ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx),
+ &insert, &iter, &k);
+ if (ret)
+ return ret;
+
+ if (k.k->type != KEY_TYPE_stripe) {
+ bch_err_ratelimited(trans->c,
+ "pointer to nonexistent stripe %llu",
+ (u64) p.idx);
+ ret = -1;
+ goto out;
+ }
+
+ ret = trans_update_key(trans, &insert, iter, k, 1);
+ if (ret)
+ goto out;
+
+ s = bkey_i_to_s_stripe(insert->k);
+
+ nr_data = s.v->nr_blocks - s.v->nr_redundant;
+
+ parity_sectors = DIV_ROUND_UP(abs(sectors) * s.v->nr_redundant, nr_data);
+
+ if (sectors < 0)
+ parity_sectors = -parity_sectors;
+
+ stripe_blockcount_set(s.v, p.block,
+ stripe_blockcount_get(s.v, p.block) +
+ sectors + parity_sectors);
+
+ bch2_bkey_to_replicas(&r.e, s.s_c);
+
+ update_replicas_list(d, &r.e, sectors);
+out:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
+static int bch2_trans_mark_extent(struct btree_trans *trans,
+ struct bkey_s_c k,
+ s64 sectors, enum bch_data_type data_type,
+ struct replicas_delta_list *d)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bch_replicas_padded r;
+ s64 dirty_sectors = 0;
+ bool stale;
+ unsigned i;
+ int ret;
+
+ r.e.data_type = data_type;
+ r.e.nr_devs = 0;
+ r.e.nr_required = 1;
+
+ BUG_ON(!sectors);
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ s64 disk_sectors = data_type == BCH_DATA_BTREE
+ ? sectors
+ : ptr_disk_sectors_delta(p, sectors);
+
+ ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
+ data_type, d);
+ if (ret < 0)
+ return ret;
+
+ stale = ret > 0;
+
+ if (p.ptr.cached) {
+ if (disk_sectors && !stale)
+ update_cached_sectors_list(d, p.ptr.dev,
+ disk_sectors);
+ } else if (!p.ec_nr) {
+ dirty_sectors += disk_sectors;
+ r.e.devs[r.e.nr_devs++] = p.ptr.dev;
+ } else {
+ for (i = 0; i < p.ec_nr; i++) {
+ ret = bch2_trans_mark_stripe_ptr(trans, p.ec[i],
+ disk_sectors, data_type, d);
+ if (ret)
+ return ret;
+ }
+
+ r.e.nr_required = 0;
+ }
+ }
+
+ if (dirty_sectors)
+ update_replicas_list(d, &r.e, dirty_sectors);
+
+ return 0;
+}
+
+int bch2_trans_mark_key(struct btree_trans *trans,
+ struct bkey_s_c k,
+ bool inserting, s64 sectors,
+ struct replicas_delta_list *d)
+{
+ struct bch_fs *c = trans->c;
+
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ return bch2_trans_mark_extent(trans, k, inserting
+ ? c->opts.btree_node_size
+ : -c->opts.btree_node_size,
+ BCH_DATA_BTREE, d);
+ case KEY_TYPE_extent:
+ return bch2_trans_mark_extent(trans, k,
+ sectors, BCH_DATA_USER, d);
+ case KEY_TYPE_inode:
+ if (inserting)
+ d->fs_usage.nr_inodes++;
+ else
+ d->fs_usage.nr_inodes--;
+ return 0;
+ case KEY_TYPE_reservation: {
+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+
+ sectors *= replicas;
+ replicas = clamp_t(unsigned, replicas, 1,
+ ARRAY_SIZE(d->fs_usage.persistent_reserved));
+
+ d->fs_usage.reserved += sectors;
+ d->fs_usage.persistent_reserved[replicas - 1] += sectors;
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+int bch2_trans_mark_update(struct btree_trans *trans,
+ struct btree_insert_entry *insert,
+ struct replicas_delta_list *d)
+{
+ struct btree_iter *iter = insert->iter;
+ struct btree *b = iter->l[0].b;
+ struct btree_node_iter node_iter = iter->l[0].iter;
+ struct bkey_packed *_k;
+ int ret;
+
+ if (!btree_node_type_needs_gc(iter->btree_id))
+ return 0;
+
+ ret = bch2_trans_mark_key(trans,
+ bkey_i_to_s_c(insert->k), true,
+ bpos_min(insert->k->k.p, b->key.k.p).offset -
+ bkey_start_offset(&insert->k->k), d);
+ if (ret)
+ return ret;
+
+ while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
+ KEY_TYPE_discard))) {
+ struct bkey unpacked;
+ struct bkey_s_c k;
+ s64 sectors = 0;
+
+ k = bkey_disassemble(b, _k, &unpacked);
+
+ if (btree_node_is_extents(b)
+ ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
+ : bkey_cmp(insert->k->k.p, k.k->p))
+ break;
+
+ if (btree_node_is_extents(b)) {
+ switch (bch2_extent_overlap(&insert->k->k, k.k)) {
+ case BCH_EXTENT_OVERLAP_ALL:
+ sectors = -((s64) k.k->size);
+ break;
+ case BCH_EXTENT_OVERLAP_BACK:
+ sectors = bkey_start_offset(&insert->k->k) -
+ k.k->p.offset;
+ break;
+ case BCH_EXTENT_OVERLAP_FRONT:
+ sectors = bkey_start_offset(k.k) -
+ insert->k->k.p.offset;
+ break;
+ case BCH_EXTENT_OVERLAP_MIDDLE:
+ sectors = k.k->p.offset - insert->k->k.p.offset;
+ BUG_ON(sectors <= 0);
+
+ ret = bch2_trans_mark_key(trans, k, true,
+ sectors, d);
+ if (ret)
+ return ret;
+
+ sectors = bkey_start_offset(&insert->k->k) -
+ k.k->p.offset;
+ break;
+ }
+
+ BUG_ON(sectors >= 0);
+ }
+
+ ret = bch2_trans_mark_key(trans, k, false, sectors, d);
+ if (ret)
+ return ret;
+
+ bch2_btree_node_iter_advance(&node_iter, b);
+ }
+
+ return 0;
+}
+
/* Disk reservations: */
static u64 bch2_recalc_sectors_available(struct bch_fs *c)
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 0aada931322f..a32c25d8f298 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -99,7 +99,7 @@ static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
struct bucket_mark m;
rcu_read_lock();
- m = READ_ONCE(bucket(ca, PTR_BUCKET_NR(ca, ptr))->mark);
+ m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
rcu_read_unlock();
return m;
@@ -265,6 +265,15 @@ int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
struct bch_fs_usage *, unsigned);
int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *, unsigned);
+
+void bch2_replicas_delta_list_apply(struct bch_fs *,
+ struct bch_fs_usage *,
+ struct replicas_delta_list *);
+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
+ bool, s64, struct replicas_delta_list *);
+int bch2_trans_mark_update(struct btree_trans *,
+ struct btree_insert_entry *,
+ struct replicas_delta_list *);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
/* disk reservations: */
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 2a1fd7a7ec20..974daa7ef2d3 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -94,6 +94,19 @@ struct bch_fs_usage_short {
u64 nr_inodes;
};
+struct replicas_delta {
+ s64 delta;
+ struct bch_replicas_entry r;
+};
+
+struct replicas_delta_list {
+ struct bch_fs_usage fs_usage;
+
+ struct replicas_delta *top;
+ struct replicas_delta d[0];
+ u8 pad[256];
+};
+
/*
* A reservation for space on disk:
*/
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 07c0298cad65..748fc559aae3 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -536,14 +536,17 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
struct btree_iter *iter)
{
size_t idx = iter->pos.offset;
+ int ret = 0;
if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT))
- return 0;
+ return ret;
bch2_btree_trans_unlock(iter->trans);
+ ret = -EINTR;
if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
- return -EINTR;
+ return ret;
+
return -ENOMEM;
}
@@ -689,23 +692,22 @@ retry:
if (!ret)
ret = -ENOSPC;
- goto out;
+ goto err;
found_slot:
ret = ec_stripe_mem_alloc(c, iter);
-
- if (ret == -EINTR)
- goto retry;
if (ret)
- return ret;
+ goto err;
stripe->k.p = iter->pos;
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE);
-out:
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL);
+err:
+ if (ret == -EINTR)
+ goto retry;
bch2_trans_exit(&trans);
return ret;
@@ -742,6 +744,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
int ret = 0, dev, idx;
bch2_trans_init(&trans, c);
+ bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(pos),
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index d81e590c9124..f8f29251a5e7 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -872,15 +872,54 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_iter_verify(iter, l->b);
}
+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ unsigned ret = 0;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ switch (__extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ ret++;
+ }
+ }
+
+ return ret;
+}
+
static inline struct bpos
-bch2_extent_atomic_end(struct bkey_i *k, struct btree_iter *iter)
+bch2_extent_atomic_end(struct bkey_i *insert, struct btree_iter *iter)
{
struct btree *b = iter->l[0].b;
+ struct btree_node_iter node_iter = iter->l[0].iter;
+ struct bkey_packed *_k;
+ unsigned nr_alloc_ptrs =
+ bch2_bkey_nr_alloc_ptrs(bkey_i_to_s_c(insert));
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
- BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0);
+ BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
+
+ while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
+ KEY_TYPE_discard))) {
+ struct bkey unpacked;
+ struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
+
+ if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
+ break;
+
+ nr_alloc_ptrs += bch2_bkey_nr_alloc_ptrs(k);
+
+ if (nr_alloc_ptrs > 20) {
+ BUG_ON(bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0);
+ return bpos_min(insert->k.p, k.k->p);
+ }
+
+ bch2_btree_node_iter_advance(&node_iter, b);
+ }
- return bpos_min(k->k.p, b->key.k.p);
+ return bpos_min(insert->k.p, b->key.k.p);
}
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 88761d34dc65..822b3fce09b4 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -42,6 +42,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
int ret = 0;
bch2_trans_init(&trans, c);
+ bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH);
@@ -95,6 +96,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
break;
}
+ BUG_ON(ret == -EINTR);
+
bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 7c69a4a51fba..946e616228c9 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -62,6 +62,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
int ret = 0;
bch2_trans_init(&trans, c);
+ bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
@@ -184,6 +185,7 @@ nomatch:
}
out:
bch2_trans_exit(&trans);
+ BUG_ON(ret == -EINTR);
return ret;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 3f60b91629b3..75d64b7f2307 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -211,11 +211,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
bch2_disk_reservation_init(c, 0);
struct bkey_i *split;
bool split_compressed = false;
- unsigned flags = BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_JOURNAL_REPLAY|
- BTREE_INSERT_NOMARK;
int ret;
bch2_trans_init(&trans, c);
@@ -251,9 +246,6 @@ retry:
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
- flags &= ~BTREE_INSERT_JOURNAL_REPLAY;
- flags &= ~BTREE_INSERT_NOMARK;
- flags |= BTREE_INSERT_NOMARK_OVERWRITES;
split_compressed = true;
}
@@ -265,24 +257,31 @@ retry:
bch2_btree_iter_set_pos(iter, split->k.p);
} while (bkey_cmp(iter->pos, k->k.p) < 0);
- ret = bch2_trans_commit(&trans, &disk_res, NULL, flags);
- if (ret)
- goto err;
-
if (split_compressed) {
- /*
- * This isn't strictly correct - we should only be relying on
- * the btree node lock for synchronization with gc when we've
- * got a write lock held.
- *
- * but - there are other correctness issues if btree gc were to
- * run before journal replay finishes
- */
- BUG_ON(c->gc_pos.phase);
-
- bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
- NULL, 0, 0);
+ memset(&trans.fs_usage_deltas.fs_usage, 0,
+ sizeof(trans.fs_usage_deltas.fs_usage));
+ trans.fs_usage_deltas.top = trans.fs_usage_deltas.d;
+
+ ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), false,
+ -((s64) k->k.size),
+ &trans.fs_usage_deltas) ?:
+ bch2_trans_commit(&trans, &disk_res, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOMARK_OVERWRITES|
+ BTREE_INSERT_NO_CLEAR_REPLICAS);
+ } else {
+ ret = bch2_trans_commit(&trans, &disk_res, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_JOURNAL_REPLAY|
+ BTREE_INSERT_NOMARK);
}
+
+ if (ret)
+ goto err;
err:
if (ret == -EINTR)
goto retry;
@@ -526,7 +525,7 @@ static int verify_superblock_clean(struct bch_fs *c,
struct bch_sb_field_clean *clean = *cleanp;
int ret = 0;
- if (!clean || !j)
+ if (!c->sb.clean || !j)
return 0;
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
@@ -652,6 +651,7 @@ int bch2_fs_recovery(struct bch_fs *c)
u64 journal_seq;
LIST_HEAD(journal_entries);
struct journal_keys journal_keys = { NULL };
+ bool wrote = false, write_sb = false;
int ret;
if (c->sb.clean)
@@ -676,8 +676,12 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
- fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
- "filesystem marked clean but journal not empty");
+ if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
+ "filesystem marked clean but journal not empty")) {
+ c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ c->sb.clean = false;
+ }
if (!c->sb.clean && list_empty(&journal_entries)) {
bch_err(c, "no journal entries found");
@@ -735,12 +739,15 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
+ bch_verbose(c, "starting alloc read");
err = "error reading allocation information";
ret = bch2_alloc_read(c, &journal_keys);
if (ret)
goto err;
+ bch_verbose(c, "alloc read done");
bch_verbose(c, "starting stripes_read");
+ err = "error reading stripes";
ret = bch2_stripes_read(c, &journal_keys);
if (ret)
goto err;
@@ -748,11 +755,26 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
+ if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
+ !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
+ /*
+ * interior btree node updates aren't consistent with the
+ * journal; after an unclean shutdown we have to walk all
+ * pointers to metadata:
+ */
+ bch_verbose(c, "starting metadata mark and sweep:");
+ err = "error in mark and sweep";
+ ret = bch2_gc(c, NULL, true, true);
+ if (ret)
+ goto err;
+ bch_verbose(c, "mark and sweep done");
+ }
+
if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_verbose(c, "starting mark and sweep:");
- err = "error in recovery";
+ err = "error in mark and sweep";
ret = bch2_gc(c, &journal_keys, true, false);
if (ret)
goto err;
@@ -779,6 +801,16 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
bch_verbose(c, "journal replay done");
+ bch_verbose(c, "writing allocation info:");
+ err = "error writing out alloc info";
+ ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
+ bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
+ if (ret) {
+ bch_err(c, "error writing alloc info");
+ goto err;
+ }
+ bch_verbose(c, "alloc write done");
+
if (c->opts.norecovery)
goto out;
@@ -801,13 +833,23 @@ int bch2_fs_recovery(struct bch_fs *c)
c->disk_sb.sb->version_min =
le16_to_cpu(bcachefs_metadata_version_min);
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
+ write_sb = true;
+ }
+
+ if (!test_bit(BCH_FS_ERROR, &c->flags)) {
+ c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
+ write_sb = true;
}
if (c->opts.fsck &&
!test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
+ write_sb = true;
}
+
+ if (write_sb)
+ bch2_write_super(c);
mutex_unlock(&c->sb_lock);
if (c->journal_seq_blacklist_table &&
@@ -820,7 +862,7 @@ out:
return ret;
err:
fsck_err:
- pr_err("Error in recovery: %s (%i)", err, ret);
+ bch_err(c, "Error in recovery: %s (%i)", err, ret);
goto out;
}
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index c2aa330c9932..eb44119466d8 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -101,8 +101,8 @@ static void stripe_to_replicas(struct bkey_s_c k,
r->devs[r->nr_devs++] = ptr->dev;
}
-static void bkey_to_replicas(struct bch_replicas_entry *e,
- struct bkey_s_c k)
+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
+ struct bkey_s_c k)
{
e->nr_devs = 0;
@@ -435,7 +435,7 @@ bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
return false;
}
- bkey_to_replicas(&search.e, k);
+ bch2_bkey_to_replicas(&search.e, k);
return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
}
@@ -468,7 +468,7 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
return ret;
}
- bkey_to_replicas(&search.e, k);
+ bch2_bkey_to_replicas(&search.e, k);
return bch2_mark_replicas(c, &search.e);
}
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index ad97e3bc6b93..2ffafad7c631 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -27,6 +27,7 @@ int bch2_mark_replicas(struct bch_fs *,
bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
struct bkey_s_c, bool);
+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index c675280cfe03..61eefd2dd1d2 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -945,7 +945,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
- c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+ c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@@ -1061,6 +1061,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
+ c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;