summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-03-12 16:56:43 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2021-03-12 16:56:43 -0500
commite7c4380a892297d2f65e1c317a1b6d4c67378299 (patch)
tree5663bfdcc1c2d67f6b1a8a9da517941cdf44888e
parentfb2d506f6f5c582d89596a2d2d1b2cc7d8374a7a (diff)
Update bcachefs sources to 63924135a1 bcachefs: Have fsck check for stripe pointers matching stripe
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/alloc_background.c20
-rw-r--r--libbcachefs/bcachefs_format.h1
-rw-r--r--libbcachefs/bkey.h31
-rw-r--r--libbcachefs/bset.c7
-rw-r--r--libbcachefs/btree_cache.c29
-rw-r--r--libbcachefs/btree_gc.c68
-rw-r--r--libbcachefs/btree_io.c47
-rw-r--r--libbcachefs/btree_iter.c259
-rw-r--r--libbcachefs/btree_iter.h21
-rw-r--r--libbcachefs/btree_key_cache.c9
-rw-r--r--libbcachefs/btree_types.h29
-rw-r--r--libbcachefs/btree_update_interior.c18
-rw-r--r--libbcachefs/btree_update_leaf.c306
-rw-r--r--libbcachefs/buckets.c4
-rw-r--r--libbcachefs/ec.c3
-rw-r--r--libbcachefs/ec.h37
-rw-r--r--libbcachefs/ec_types.h1
-rw-r--r--libbcachefs/fsck.c18
-rw-r--r--libbcachefs/inode.c19
-rw-r--r--libbcachefs/inode.h2
-rw-r--r--libbcachefs/journal_io.c119
-rw-r--r--libbcachefs/journal_io.h3
-rw-r--r--libbcachefs/journal_reclaim.c4
-rw-r--r--libbcachefs/movinggc.c4
-rw-r--r--libbcachefs/quota.c5
-rw-r--r--libbcachefs/rebalance.c12
-rw-r--r--libbcachefs/recovery.c23
-rw-r--r--libbcachefs/super-io.c31
-rw-r--r--libbcachefs/super-io.h2
-rw-r--r--libbcachefs/super.c13
31 files changed, 657 insertions, 490 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 3d1ac83e..61666d60 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-e2b8120595b8d82ad51f3b4310deaef1c96b3e26
+63924135a103cbf2411ef73e7ca9b1b6ebe265bd
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index e1d7d7a8..796a061d 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -1068,6 +1068,12 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
return 0;
}
+static inline bool allocator_thread_running(struct bch_dev *ca)
+{
+ return ca->mi.state == BCH_MEMBER_STATE_rw &&
+ test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags);
+}
+
/**
* bch_allocator_thread - move buckets from free_inc to reserves
*
@@ -1084,9 +1090,16 @@ static int bch2_allocator_thread(void *arg)
int ret;
set_freezable();
- ca->allocator_state = ALLOCATOR_RUNNING;
while (1) {
+ if (!allocator_thread_running(ca)) {
+ ca->allocator_state = ALLOCATOR_STOPPED;
+ if (kthread_wait_freezable(allocator_thread_running(ca)))
+ break;
+ }
+
+ ca->allocator_state = ALLOCATOR_RUNNING;
+
cond_resched();
if (kthread_should_stop())
break;
@@ -1387,8 +1400,11 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
p = kthread_create(bch2_allocator_thread, ca,
"bch-alloc/%s", ca->name);
- if (IS_ERR(p))
+ if (IS_ERR(p)) {
+ bch_err(ca->fs, "error creating allocator thread: %li",
+ PTR_ERR(p));
return PTR_ERR(p);
+ }
get_task_struct(p);
rcu_assign_pointer(ca->alloc_thread, p);
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index be9851c6..3d06547e 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -1310,6 +1310,7 @@ LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62);
+LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
/* 61-64 unused */
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
index a22a1dc6..629288a6 100644
--- a/libbcachefs/bkey.h
+++ b/libbcachefs/bkey.h
@@ -175,6 +175,37 @@ static inline struct bpos bpos_max(struct bpos l, struct bpos r)
return bkey_cmp(l, r) > 0 ? l : r;
}
+#define sbb(a, b, borrow) \
+do { \
+ typeof(a) d1, d2; \
+ \
+ d1 = a - borrow; \
+ borrow = d1 > a; \
+ \
+ d2 = d1 - b; \
+ borrow += d2 > d1; \
+ a = d2; \
+} while (0)
+
+/* returns a - b: */
+static inline struct bpos bpos_sub(struct bpos a, struct bpos b)
+{
+ int borrow = 0;
+
+ sbb(a.snapshot, b.snapshot, borrow);
+ sbb(a.offset, b.offset, borrow);
+ sbb(a.inode, b.inode, borrow);
+ return a;
+}
+
+static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
+{
+ if (bkey_cmp(l, r) > 0)
+ swap(l, r);
+
+ return bpos_sub(r, l);
+}
+
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c
index 756cbae6..87f951e1 100644
--- a/libbcachefs/bset.c
+++ b/libbcachefs/bset.c
@@ -1729,9 +1729,10 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
uk = bkey_unpack_key(b, k);
pr_buf(out,
" failed unpacked at depth %u\n"
- "\t%llu:%llu\n",
- ilog2(j),
- uk.p.inode, uk.p.offset);
+ "\t",
+ ilog2(j));
+ bch2_bpos_to_text(out, uk.p);
+ pr_buf(out, "\n");
break;
}
}
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index b8e183b7..89b3b509 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -836,7 +836,7 @@ retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
if (nofill)
- return NULL;
+ goto out;
b = bch2_btree_node_fill(c, NULL, k, btree_id,
level, SIX_LOCK_read, true);
@@ -845,8 +845,12 @@ retry:
if (!b)
goto retry;
+ if (IS_ERR(b) &&
+ !bch2_btree_cache_cannibalize_lock(c, NULL))
+ goto retry;
+
if (IS_ERR(b))
- return b;
+ goto out;
} else {
lock_node:
ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
@@ -881,7 +885,8 @@ lock_node:
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
- return ERR_PTR(-EIO);
+ b = ERR_PTR(-EIO);
+ goto out;
}
EBUG_ON(b->c.btree_id != btree_id);
@@ -890,7 +895,8 @@ lock_node:
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
bkey_cmp(b->data->min_key,
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
-
+out:
+ bch2_btree_cache_cannibalize_unlock(c);
return b;
}
@@ -1051,15 +1057,14 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
bch2_btree_keys_stats(b, &stats);
- pr_buf(out,
- "l %u %llu:%llu - %llu:%llu:\n"
- " ptrs: ",
- b->c.level,
- b->data->min_key.inode,
- b->data->min_key.offset,
- b->data->max_key.inode,
- b->data->max_key.offset);
+ pr_buf(out, "l %u ", b->c.level);
+ bch2_bpos_to_text(out, b->data->min_key);
+ pr_buf(out, " - ");
+ bch2_bpos_to_text(out, b->data->max_key);
+ pr_buf(out, ":\n"
+ " ptrs: ");
bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+
pr_buf(out, "\n"
" format: u64s %u fields %u %u %u %u %u\n"
" unpack fn len: %u\n"
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index b4dd973c..f8da65de 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -73,12 +73,13 @@ static int bch2_gc_check_topology(struct bch_fs *c,
if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
- if (bkey_deleted(&prev->k->k))
- scnprintf(buf1, sizeof(buf1), "start of node: %llu:%llu",
- node_start.inode,
- node_start.offset);
- else
+ if (bkey_deleted(&prev->k->k)) {
+ struct printbuf out = PBUF(buf1);
+ pr_buf(&out, "start of node: ");
+ bch2_bpos_to_text(&out, node_start);
+ } else {
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
+ }
if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c,
"btree node with incorrect min_key at btree %s level %u:\n"
@@ -115,8 +116,10 @@ static int bch2_gc_check_topology(struct bch_fs *c,
}
new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
- if (!new)
+ if (!new) {
+ bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
+ }
bkey_copy(new, cur.k);
@@ -220,6 +223,11 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
"pointer to nonexistent stripe %llu",
(u64) p.ec.idx))
do_update = true;
+
+ if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c,
+ "pointer does not match stripe %llu",
+ (u64) p.ec.idx))
+ do_update = true;
}
}
@@ -235,8 +243,10 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
}
new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
- if (!new)
+ if (!new) {
+ bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
+ }
bkey_reassemble(new, *k);
@@ -256,7 +266,8 @@ again:
struct stripe *m = genradix_ptr(&c->stripes[true],
entry->stripe_ptr.idx);
- if (!m || !m->alive) {
+ if (!m || !m->alive ||
+ !bch2_ptr_matches_stripe_m(m, p)) {
bch2_bkey_extent_entry_drop(new, entry);
goto again;
}
@@ -302,8 +313,10 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
"superblock not marked as containing replicas (type %u)",
k.k->type)) {
ret = bch2_mark_bkey_replicas(c, k);
- if (ret)
- return ret;
+ if (ret) {
+ bch_err(c, "error marking bkey replicas: %i", ret);
+ goto err;
+ }
}
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, &k);
@@ -321,6 +334,9 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
fsck_err:
+err:
+ if (ret)
+ bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
@@ -448,8 +464,10 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
k, &max_stale, true);
- if (ret)
+ if (ret) {
+ bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
break;
+ }
if (b->c.level) {
bch2_bkey_buf_reassemble(&cur, c, k);
@@ -493,8 +511,11 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
continue;
}
- if (ret)
+ if (ret) {
+ bch_err(c, "%s: error %i getting btree node",
+ __func__, ret);
break;
+ }
ret = bch2_gc_btree_init_recurse(c, child,
target_depth);
@@ -519,6 +540,7 @@ static int bch2_gc_btree_init(struct bch_fs *c,
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
u8 max_stale = 0;
+ char buf[100];
int ret = 0;
b = c->btree_roots[btree_id].b;
@@ -528,16 +550,14 @@ static int bch2_gc_btree_init(struct bch_fs *c,
six_lock_read(&b->c.lock, NULL, NULL);
if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
- "btree root with incorrect min_key: %llu:%llu",
- b->data->min_key.inode,
- b->data->min_key.offset)) {
+ "btree root with incorrect min_key: %s",
+ (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) {
BUG();
}
if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
- "btree root with incorrect min_key: %llu:%llu",
- b->data->max_key.inode,
- b->data->max_key.offset)) {
+ "btree root with incorrect max_key: %s",
+ (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) {
BUG();
}
@@ -551,6 +571,8 @@ static int bch2_gc_btree_init(struct bch_fs *c,
fsck_err:
six_unlock_read(&b->c.lock);
+ if (ret)
+ bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
@@ -574,8 +596,10 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial)
int ret = initial
? bch2_gc_btree_init(c, id)
: bch2_gc_btree(c, id, initial);
- if (ret)
+ if (ret) {
+ bch_err(c, "%s: ret %i", __func__, ret);
return ret;
+ }
}
return 0;
@@ -881,6 +905,8 @@ static int bch2_gc_done(struct bch_fs *c,
#undef copy_stripe_field
#undef copy_field
fsck_err:
+ if (ret)
+ bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
@@ -1601,8 +1627,10 @@ int bch2_gc_thread_start(struct bch_fs *c)
BUG_ON(c->gc_thread);
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
- if (IS_ERR(p))
+ if (IS_ERR(p)) {
+ bch_err(c, "error creating gc thread: %li", PTR_ERR(p));
return PTR_ERR(p);
+ }
get_task_struct(p);
c->gc_thread = p;
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index b0c9e017..dab3a713 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -488,12 +488,12 @@ enum btree_validate_ret {
({ \
__label__ out; \
char _buf[300]; \
- char *buf2 = _buf; \
+ char *_buf2 = _buf; \
struct printbuf out = PBUF(_buf); \
\
- buf2 = kmalloc(4096, GFP_ATOMIC); \
- if (buf2) \
- out = _PBUF(buf2, 4986); \
+ _buf2 = kmalloc(4096, GFP_ATOMIC); \
+ if (_buf2) \
+ out = _PBUF(_buf2, 4986); \
\
btree_err_msg(&out, c, ca, b, i, b->written, write); \
pr_buf(&out, ": " msg, ##__VA_ARGS__); \
@@ -501,13 +501,13 @@ enum btree_validate_ret {
if (type == BTREE_ERR_FIXABLE && \
write == READ && \
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
- mustfix_fsck_err(c, "%s", buf2); \
+ mustfix_fsck_err(c, "%s", _buf2); \
goto out; \
} \
\
switch (write) { \
case READ: \
- bch_err(c, "%s", buf2); \
+ bch_err(c, "%s", _buf2); \
\
switch (type) { \
case BTREE_ERR_FIXABLE: \
@@ -528,7 +528,7 @@ enum btree_validate_ret {
} \
break; \
case WRITE: \
- bch_err(c, "corrupt metadata before write: %s", buf2); \
+ bch_err(c, "corrupt metadata before write: %s", _buf2); \
\
if (bch2_fs_inconsistent(c)) { \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
@@ -537,8 +537,8 @@ enum btree_validate_ret {
break; \
} \
out: \
- if (buf2 != _buf) \
- kfree(buf2); \
+ if (_buf2 != _buf) \
+ kfree(_buf2); \
true; \
})
@@ -550,6 +550,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
{
unsigned version = le16_to_cpu(i->version);
const char *err;
+ char buf1[100];
+ char buf2[100];
int ret = 0;
btree_err_on((version != BCH_BSET_VERSION_OLD &&
@@ -613,37 +615,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
- "incorrect min_key: got %llu:%llu should be %llu:%llu",
- b->data->min_key.inode,
- b->data->min_key.offset,
- bp->min_key.inode,
- bp->min_key.offset);
+ "incorrect min_key: got %s should be %s",
+ (bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1),
+ (bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2));
}
btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
BTREE_ERR_MUST_RETRY, c, ca, b, i,
- "incorrect max key %llu:%llu",
- bn->max_key.inode,
- bn->max_key.offset);
+ "incorrect max key %s",
+ (bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1));
if (write)
compat_btree_node(b->c.level, b->c.btree_id, version,
BSET_BIG_ENDIAN(i), write, bn);
- /* XXX: ideally we would be validating min_key too */
-#if 0
- /*
- * not correct anymore, due to btree node write error
- * handling
- *
- * need to add bn->seq to btree keys and verify
- * against that
- */
- btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
- bn->ptr),
- BTREE_ERR_FATAL, c, b, i,
- "incorrect backpointer");
-#endif
err = bch2_bkey_format_validate(&bn->format);
btree_err_on(err,
BTREE_ERR_FATAL, c, ca, b, i,
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 303e6d3a..72e3d6d8 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -495,7 +495,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter;
bool locked = btree_node_locked(iter, level);
struct bkey_packed *p, *k;
- char buf1[100], buf2[100];
+ char buf1[100], buf2[100], buf3[100];
const char *msg;
if (!bch2_debug_check_iterators)
@@ -552,38 +552,50 @@ unlock:
btree_node_unlock(iter, level);
return;
err:
- strcpy(buf1, "(none)");
strcpy(buf2, "(none)");
+ strcpy(buf3, "(none)");
+
+ bch2_bpos_to_text(&PBUF(buf1), iter->real_pos);
if (p) {
struct bkey uk = bkey_unpack_key(l->b, p);
- bch2_bkey_to_text(&PBUF(buf1), &uk);
+ bch2_bkey_to_text(&PBUF(buf2), &uk);
}
if (k) {
struct bkey uk = bkey_unpack_key(l->b, k);
- bch2_bkey_to_text(&PBUF(buf2), &uk);
+ bch2_bkey_to_text(&PBUF(buf3), &uk);
}
panic("iterator should be %s key at level %u:\n"
- "iter pos %llu:%llu\n"
+ "iter pos %s\n"
"prev key %s\n"
"cur key %s\n",
- msg, level,
- iter->real_pos.inode, iter->real_pos.offset,
- buf1, buf2);
+ msg, level, buf1, buf2, buf3);
}
static void bch2_btree_iter_verify(struct btree_iter *iter)
{
unsigned i;
- bch2_btree_trans_verify_locks(iter->trans);
+ EBUG_ON(iter->btree_id >= BTREE_ID_NR);
+
+ bch2_btree_iter_verify_locks(iter);
for (i = 0; i < BTREE_MAX_DEPTH; i++)
bch2_btree_iter_verify_level(iter, i);
}
+static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
+{
+ enum btree_iter_type type = btree_iter_type(iter);
+
+ BUG_ON((type == BTREE_ITER_KEYS ||
+ type == BTREE_ITER_CACHED) &&
+ (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
+ bkey_cmp(iter->pos, iter->k.p) > 0));
+}
+
void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
{
struct btree_iter *iter;
@@ -599,6 +611,7 @@ void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {}
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
+static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
#endif
@@ -863,22 +876,23 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
if (!k ||
bkey_deleted(k) ||
bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
- char buf[100];
+ char buf1[100];
+ char buf2[100];
+ char buf3[100];
+ char buf4[100];
struct bkey uk = bkey_unpack_key(b, k);
bch2_dump_btree_node(iter->trans->c, l->b);
- bch2_bkey_to_text(&PBUF(buf), &uk);
+ bch2_bpos_to_text(&PBUF(buf1), iter->real_pos);
+ bch2_bkey_to_text(&PBUF(buf2), &uk);
+ bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
+ bch2_bpos_to_text(&PBUF(buf3), b->data->max_key);
panic("parent iter doesn't point to new node:\n"
- "iter pos %s %llu:%llu\n"
+ "iter pos %s %s\n"
"iter key %s\n"
- "new node %llu:%llu-%llu:%llu\n",
- bch2_btree_ids[iter->btree_id],
- iter->pos.inode,
- iter->pos.offset,
- buf,
- b->data->min_key.inode,
- b->data->min_key.offset,
- b->key.k.p.inode, b->key.k.p.offset);
+ "new node %s-%s\n",
+ bch2_btree_ids[iter->btree_id], buf1,
+ buf2, buf3, buf4);
}
if (!parent_locked)
@@ -1336,21 +1350,6 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return ret;
}
-static inline void bch2_btree_iter_checks(struct btree_iter *iter)
-{
- enum btree_iter_type type = btree_iter_type(iter);
-
- EBUG_ON(iter->btree_id >= BTREE_ID_NR);
-
- BUG_ON((type == BTREE_ITER_KEYS ||
- type == BTREE_ITER_CACHED) &&
- (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
- bkey_cmp(iter->pos, iter->k.p) > 0));
-
- bch2_btree_iter_verify_locks(iter);
- bch2_btree_iter_verify_level(iter, iter->level);
-}
-
/* Iterate across nodes (leaf and interior nodes) */
struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
@@ -1359,7 +1358,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE)
return iter->l[iter->level].b;
@@ -1388,7 +1387,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
/* already got to end? */
if (!btree_iter_node(iter, iter->level))
@@ -1491,26 +1490,18 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
iter->real_pos = new_pos;
btree_iter_pos_changed(iter, cmp);
+
+ bch2_btree_iter_verify(iter);
}
-void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
- bool strictly_greater)
+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos;
- iter->flags &= ~BTREE_ITER_IS_EXTENTS;
- iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
-
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
}
-void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
-{
- __bch2_btree_iter_set_pos(iter, new_pos,
- (iter->flags & BTREE_ITER_IS_EXTENTS) != 0);
-}
-
static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter)
{
struct bpos pos = iter->k.p;
@@ -1603,7 +1594,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
if (iter->uptodate == BTREE_ITER_UPTODATE &&
!bkey_deleted(&iter->k))
@@ -1633,7 +1627,8 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
iter->uptodate = BTREE_ITER_UPTODATE;
- bch2_btree_iter_verify_level(iter, 0);
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
return k;
}
@@ -1687,7 +1682,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
while (1) {
ret = bch2_btree_iter_traverse(iter);
@@ -1697,7 +1692,8 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
k = __bch2_btree_iter_peek_with_updates(iter);
if (k.k && bkey_deleted(k.k)) {
- bch2_btree_iter_advance_pos(iter);
+ if (!bch2_btree_iter_advance_pos(iter))
+ return bkey_s_c_null;
continue;
}
@@ -1733,13 +1729,15 @@ struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
*/
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
- struct bpos pos = iter->pos;
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c k;
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ btree_iter_set_search_pos(iter, iter->pos);
if (iter->uptodate == BTREE_ITER_UPTODATE &&
!bkey_deleted(&iter->k))
@@ -1747,35 +1745,47 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
while (1) {
ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto no_key;
+ }
k = __btree_iter_peek(iter, l);
if (!k.k ||
((iter->flags & BTREE_ITER_IS_EXTENTS)
- ? bkey_cmp(bkey_start_pos(k.k), pos) >= 0
- : bkey_cmp(bkey_start_pos(k.k), pos) > 0))
+ ? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0
+ : bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0))
k = __btree_iter_prev(iter, l);
if (likely(k.k))
break;
- if (!btree_iter_set_pos_to_prev_leaf(iter))
- return bkey_s_c_null;
+ if (!btree_iter_set_pos_to_prev_leaf(iter)) {
+ k = bkey_s_c_null;
+ goto no_key;
+ }
}
- EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
+ EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
/* Extents can straddle iter->pos: */
- if (bkey_cmp(k.k->p, pos) < 0)
+ if (bkey_cmp(k.k->p, iter->pos) < 0)
iter->pos = k.k->p;
-
iter->real_pos = k.k->p;
-
- iter->uptodate = BTREE_ITER_UPTODATE;
-
- bch2_btree_iter_verify_level(iter, 0);
+ iter->uptodate = BTREE_ITER_UPTODATE;
+out:
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
return k;
+no_key:
+ /*
+ * __btree_iter_peek() may have set iter->k to a key we didn't want, and
+ * then we errored going to the previous leaf - make sure it's
+ * consistent with iter->pos:
+ */
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos;
+ goto out;
}
/**
@@ -1829,7 +1839,9 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
iter->uptodate = BTREE_ITER_UPTODATE;
- bch2_btree_iter_verify_level(iter, 0);
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+
return (struct bkey_s_c) { &iter->k, NULL };
}
@@ -1840,7 +1852,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
@@ -1864,7 +1879,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_UPTODATE;
- bch2_btree_iter_verify_level(iter, 0);
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
return k;
}
@@ -1876,13 +1892,21 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter);
}
+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
+{
+ if (!bch2_btree_iter_rewind_pos(iter))
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek_slot(iter);
+}
+
struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
{
struct bkey_cached *ck;
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
- bch2_btree_iter_checks(iter);
+ bch2_btree_iter_verify(iter);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
@@ -1898,27 +1922,17 @@ struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
}
static inline void bch2_btree_iter_init(struct btree_trans *trans,
- struct btree_iter *iter, enum btree_id btree_id,
- struct bpos pos, unsigned flags)
+ struct btree_iter *iter, enum btree_id btree_id)
{
struct bch_fs *c = trans->c;
unsigned i;
- if (btree_node_type_is_extents(btree_id) &&
- !(flags & BTREE_ITER_NODES))
- flags |= BTREE_ITER_IS_EXTENTS;
-
iter->trans = trans;
- iter->pos = pos;
- bkey_init(&iter->k);
- iter->k.p = pos;
- iter->flags = flags;
- iter->real_pos = btree_iter_search_key(iter);
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
iter->btree_id = btree_id;
iter->level = 0;
iter->min_depth = 0;
- iter->locks_want = flags & BTREE_ITER_INTENT ? 1 : 0;
+ iter->locks_want = 0;
iter->nodes_locked = 0;
iter->nodes_intent_locked = 0;
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
@@ -1975,13 +1989,13 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
struct btree_iter *iter;
struct btree_insert_entry *i;
+ char buf[100];
trans_for_each_iter(trans, iter)
- printk(KERN_ERR "iter: btree %s pos %llu:%llu%s%s%s %ps\n",
+ printk(KERN_ERR "iter: btree %s pos %s%s%s%s %ps\n",
bch2_btree_ids[iter->btree_id],
- iter->pos.inode,
- iter->pos.offset,
- (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
+ (bch2_bpos_to_text(&PBUF(buf), iter->pos), buf),
+ btree_iter_live(trans, iter) ? " live" : "",
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
(void *) iter->ip_allocated);
@@ -2030,20 +2044,16 @@ static inline void btree_iter_copy(struct btree_iter *dst,
dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
}
-static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
-{
- if (bkey_cmp(l, r) > 0)
- swap(l, r);
-
- return POS(r.inode - l.inode, r.offset - l.offset);
-}
-
-static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
- unsigned btree_id, struct bpos pos,
- unsigned flags)
+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags)
{
struct btree_iter *iter, *best = NULL;
+ /* We always want a fresh iterator for node iterators: */
+ if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
+ goto alloc_iter;
+
trans_for_each_iter(trans, iter) {
if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
continue;
@@ -2058,51 +2068,34 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
best = iter;
}
-
+alloc_iter:
if (!best) {
iter = btree_trans_iter_alloc(trans);
- bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
- } else if ((trans->iters_live & (1ULL << best->idx)) ||
- (best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) {
+ bch2_btree_iter_init(trans, iter, btree_id);
+ } else if (btree_iter_keep(trans, best)) {
iter = btree_trans_iter_alloc(trans);
btree_iter_copy(iter, best);
} else {
iter = best;
}
- iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
- iter->flags &= ~BTREE_ITER_USER_FLAGS;
- iter->flags |= flags & BTREE_ITER_USER_FLAGS;
-
- if (iter->flags & BTREE_ITER_INTENT) {
- if (!iter->locks_want) {
- __bch2_btree_iter_unlock(iter);
- iter->locks_want = 1;
- }
- } else
- bch2_btree_iter_downgrade(iter);
-
- BUG_ON(iter->btree_id != btree_id);
- BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
- BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
- BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT);
- BUG_ON(trans->iters_live & (1ULL << iter->idx));
-
trans->iters_live |= 1ULL << iter->idx;
trans->iters_touched |= 1ULL << iter->idx;
- return iter;
-}
+ if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
+ btree_node_type_is_extents(btree_id) &&
+ !(flags & BTREE_ITER_NOT_EXTENTS))
+ flags |= BTREE_ITER_IS_EXTENTS;
-struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
- enum btree_id btree_id,
- struct bpos pos, unsigned flags)
-{
- struct btree_iter *iter =
- __btree_trans_get_iter(trans, btree_id, pos, flags);
+ iter->flags = flags;
+
+ if (!(iter->flags & BTREE_ITER_INTENT))
+ bch2_btree_iter_downgrade(iter);
+ else if (!iter->locks_want)
+ __bch2_btree_iter_upgrade_nounlock(iter, 1);
+
+ bch2_btree_iter_set_pos(iter, pos);
- __bch2_btree_iter_set_pos(iter, pos,
- btree_node_type_is_extents(btree_id));
return iter;
}
@@ -2114,8 +2107,10 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
unsigned flags)
{
struct btree_iter *iter =
- __btree_trans_get_iter(trans, btree_id, pos,
- flags|BTREE_ITER_NODES);
+ __bch2_trans_get_iter(trans, btree_id, pos,
+ BTREE_ITER_NODES|
+ BTREE_ITER_NOT_EXTENTS|
+ flags);
unsigned i;
BUG_ON(bkey_cmp(iter->pos, pos));
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 12c519ae..bd0c429b 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -171,10 +171,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
-void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
/* Sort order for locking btree iterators: */
@@ -242,11 +242,9 @@ static inline int bkey_err(struct bkey_s_c k)
_start, _flags, _k, _ret) \
for ((_iter) = bch2_trans_get_iter((_trans), (_btree_id), \
(_start), (_flags)), \
- (_ret) = PTR_ERR_OR_ZERO(((_k) = \
- __bch2_btree_iter_peek(_iter, _flags)).k); \
- !_ret && (_k).k; \
- (_ret) = PTR_ERR_OR_ZERO(((_k) = \
- __bch2_btree_iter_next(_iter, _flags)).k))
+ (_k) = __bch2_btree_iter_peek(_iter, _flags); \
+ !((_ret) = bkey_err(_k)) && (_k).k; \
+ (_k) = __bch2_btree_iter_next(_iter, _flags))
#define for_each_btree_key_continue(_iter, _flags, _k, _ret) \
for ((_k) = __bch2_btree_iter_peek(_iter, _flags); \
@@ -289,6 +287,17 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
enum btree_id, struct bpos,
unsigned, unsigned, unsigned);
+static inline bool btree_iter_live(struct btree_trans *trans, struct btree_iter *iter)
+{
+ return (trans->iters_live & (1ULL << iter->idx)) != 0;
+}
+
+static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter *iter)
+{
+ return btree_iter_live(trans, iter) ||
+ (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
+}
+
#define TRANS_RESET_NOTRAVERSE (1 << 0)
void bch2_trans_reset(struct btree_trans *, unsigned);
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index 4357aefd..2230da8b 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -297,7 +297,14 @@ fill:
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
iter->uptodate = BTREE_ITER_NEED_PEEK;
- bch2_btree_iter_downgrade(iter);
+
+ if (!(iter->flags & BTREE_ITER_INTENT))
+ bch2_btree_iter_downgrade(iter);
+ else if (!iter->locks_want) {
+ if (!__bch2_btree_iter_upgrade(iter, 1))
+ ret = -EINTR;
+ }
+
return ret;
err:
if (ret != -EINTR) {
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index f0b85d5c..c3148079 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -214,13 +214,7 @@ enum btree_iter_type {
#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 8)
#define BTREE_ITER_CACHED_NOFILL (1 << 9)
#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
-
-#define BTREE_ITER_USER_FLAGS \
- (BTREE_ITER_SLOTS \
- |BTREE_ITER_INTENT \
- |BTREE_ITER_PREFETCH \
- |BTREE_ITER_CACHED_NOFILL \
- |BTREE_ITER_CACHED_NOCREATE)
+#define BTREE_ITER_NOT_EXTENTS (1 << 11)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@@ -334,7 +328,11 @@ struct bkey_cached {
struct btree_insert_entry {
unsigned trigger_flags;
+ u8 bkey_type;
+ u8 btree_id;
+ u8 level;
unsigned trans_triggers_run:1;
+ unsigned is_extent:1;
struct bkey_i *k;
struct btree_iter *iter;
};
@@ -586,19 +584,20 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
return btree_node_type_is_extents(btree_iter_key_type(iter));
}
-#define BTREE_NODE_TYPE_HAS_TRIGGERS \
+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
((1U << BKEY_TYPE_extents)| \
- (1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_inodes)| \
- (1U << BKEY_TYPE_reflink)| \
(1U << BKEY_TYPE_stripes)| \
+ (1U << BKEY_TYPE_reflink)| \
(1U << BKEY_TYPE_btree))
-#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
- ((1U << BKEY_TYPE_extents)| \
- (1U << BKEY_TYPE_inodes)| \
- (1U << BKEY_TYPE_stripes)| \
- (1U << BKEY_TYPE_reflink))
+#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
+ ((1U << BKEY_TYPE_alloc)| \
+ (1U << BKEY_TYPE_stripes))
+
+#define BTREE_NODE_TYPE_HAS_TRIGGERS \
+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
+ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 22c051c7..df06c4a8 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -35,6 +35,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
struct bkey_s_c k;
struct bkey_s_c_btree_ptr_v2 bp;
struct bkey unpacked;
+ char buf1[100], buf2[100];
BUG_ON(!b->c.level);
@@ -51,24 +52,19 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
if (bkey_cmp(next_node, bp.v->min_key)) {
bch2_dump_btree_node(c, b);
- panic("expected next min_key %llu:%llu got %llu:%llu\n",
- next_node.inode,
- next_node.offset,
- bp.v->min_key.inode,
- bp.v->min_key.offset);
+ panic("expected next min_key %s got %s\n",
+ (bch2_bpos_to_text(&PBUF(buf1), next_node), buf1),
+ (bch2_bpos_to_text(&PBUF(buf2), bp.v->min_key), buf2));
}
bch2_btree_node_iter_advance(&iter, b);
if (bch2_btree_node_iter_end(&iter)) {
-
if (bkey_cmp(k.k->p, b->key.k.p)) {
bch2_dump_btree_node(c, b);
- panic("expected end %llu:%llu got %llu:%llu\n",
- b->key.k.p.inode,
- b->key.k.p.offset,
- k.k->p.inode,
- k.k->p.offset);
+ panic("expected end %s got %s\n",
+ (bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1),
+ (bch2_bpos_to_text(&PBUF(buf2), k.k->p), buf2));
}
break;
}
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 579c60e2..d7937bdf 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -21,6 +21,14 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
+static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
+ const struct btree_insert_entry *r)
+{
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bkey_cmp(l->k->k.p, r->k->k.p);
+}
+
static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
@@ -211,15 +219,15 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
/* Normal update interface: */
static inline void btree_insert_entry_checks(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_i *insert)
+ struct btree_insert_entry *i)
{
struct bch_fs *c = trans->c;
- BUG_ON(bkey_cmp(insert->k.p, iter->real_pos));
BUG_ON(bch2_debug_check_bkeys &&
- bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
- __btree_node_type(iter->level, iter->btree_id)));
+ bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type));
+ BUG_ON(bkey_cmp(i->k->k.p, i->iter->real_pos));
+ BUG_ON(i->level != i->iter->level);
+ BUG_ON(i->btree_id != i->iter->btree_id);
}
static noinline int
@@ -284,7 +292,8 @@ btree_key_can_insert_cached(struct btree_trans *trans,
BUG_ON(iter->level);
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
- bch2_btree_key_cache_must_wait(trans->c))
+ bch2_btree_key_cache_must_wait(trans->c) &&
+ !(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
return BTREE_INSERT_NEED_JOURNAL_RECLAIM;
if (u64s <= ck->u64s)
@@ -331,19 +340,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
}
}
-static inline bool iter_has_trans_triggers(struct btree_iter *iter)
-{
- return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
-}
-
-static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
-{
- return (((BTREE_NODE_TYPE_HAS_TRIGGERS &
- ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) |
- (1U << BTREE_ID_stripes)) &
- (1U << iter->btree_id);
-}
-
static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
{
__bch2_btree_iter_unlock(iter);
@@ -404,7 +400,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
return ret;
}
- if (btree_node_type_needs_gc(i->iter->btree_id))
+ if (btree_node_type_needs_gc(i->bkey_type))
marking = true;
}
@@ -458,7 +454,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
}
trans_for_each_update(trans, i)
- if (iter_has_nontrans_triggers(i->iter))
+ if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
bch2_mark_update(trans, i->iter, i->k,
fs_usage, i->trigger_flags);
@@ -516,8 +512,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
*/
trans_for_each_iter(trans, iter) {
if (iter->nodes_locked != iter->nodes_intent_locked) {
- if ((iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
- (trans->iters_live & (1ULL << iter->idx))) {
+ if (btree_iter_keep(trans, iter)) {
if (!bch2_btree_iter_upgrade(iter, 1)) {
trace_trans_restart_upgrade(trans->ip);
return -EINTR;
@@ -530,7 +525,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
trans_for_each_update2(trans, i)
- btree_insert_entry_checks(trans, i->iter, i->k);
+ btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
trans_for_each_update2(trans, i)
@@ -695,69 +690,63 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
return 0;
}
-static inline int btree_iter_pos_cmp(const struct btree_iter *l,
- const struct btree_iter *r)
-{
- return cmp_int(l->btree_id, r->btree_id) ?:
- bkey_cmp(l->pos, r->pos);
-}
-
-static int bch2_trans_update2(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_i *insert)
+static int __bch2_trans_update2(struct btree_trans *trans,
+ struct btree_insert_entry n)
{
- struct btree_insert_entry *i, n = (struct btree_insert_entry) {
- .iter = iter, .k = insert
- };
- int ret;
+ struct btree_insert_entry *i;
- btree_insert_entry_checks(trans, n.iter, n.k);
+ btree_insert_entry_checks(trans, &n);
EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
- ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return ret;
-
- BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+ n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
- iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-
- trans_for_each_update2(trans, i) {
- if (btree_iter_pos_cmp(n.iter, i->iter) == 0) {
- *i = n;
- return 0;
- }
-
- if (btree_iter_pos_cmp(n.iter, i->iter) <= 0)
+ trans_for_each_update2(trans, i)
+ if (btree_insert_entry_cmp(&n, i) <= 0)
break;
- }
- array_insert_item(trans->updates2, trans->nr_updates2,
- i - trans->updates2, n);
+ if (i < trans->updates2 + trans->nr_updates2 &&
+ !btree_insert_entry_cmp(&n, i))
+ *i = n;
+ else
+ array_insert_item(trans->updates2, trans->nr_updates2,
+ i - trans->updates2, n);
+
return 0;
}
+static int bch2_trans_update2(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *insert)
+{
+ return __bch2_trans_update2(trans, (struct btree_insert_entry) {
+ .bkey_type = __btree_node_type(iter->level, iter->btree_id),
+ .btree_id = iter->btree_id,
+ .level = iter->level,
+ .iter = iter,
+ .k = insert,
+ });
+}
+
static int extent_update_to_keys(struct btree_trans *trans,
- struct btree_iter *orig_iter,
- struct bkey_i *insert)
+ struct btree_insert_entry n)
{
- struct btree_iter *iter;
int ret;
- ret = bch2_extent_can_insert(trans, orig_iter, insert);
+ if (bkey_deleted(&n.k->k))
+ return 0;
+
+ ret = bch2_extent_can_insert(trans, n.iter, n.k);
if (ret)
return ret;
- if (bkey_deleted(&insert->k))
- return 0;
+ n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_NOT_EXTENTS);
+ n.is_extent = false;
- iter = bch2_trans_copy_iter(trans, orig_iter);
-
- iter->flags |= BTREE_ITER_INTENT;
- __bch2_btree_iter_set_pos(iter, insert->k.p, false);
- ret = bch2_trans_update2(trans, iter, insert);
- bch2_trans_iter_put(trans, iter);
+ ret = __bch2_trans_update2(trans, n);
+ bch2_trans_iter_put(trans, n.iter);
return ret;
}
@@ -787,7 +776,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
bkey_reassemble(update, k);
bch2_cut_back(start, update);
- __bch2_btree_iter_set_pos(update_iter, update->k.p, false);
+ update_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
+ bch2_btree_iter_set_pos(update_iter, update->k.p);
ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
if (ret)
@@ -804,7 +794,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
bkey_reassemble(update, k);
bch2_cut_front(end, update);
- __bch2_btree_iter_set_pos(update_iter, update->k.p, false);
+ update_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
+ bch2_btree_iter_set_pos(update_iter, update->k.p);
ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
if (ret)
@@ -821,7 +812,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
update->k.type = KEY_TYPE_deleted;
update->k.size = 0;
- __bch2_btree_iter_set_pos(update_iter, update->k.p, false);
+ update_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
+ bch2_btree_iter_set_pos(update_iter, update->k.p);
ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
if (ret)
@@ -867,7 +859,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
!(i->trigger_flags & BTREE_TRIGGER_NORUN))
bch2_btree_key_cache_verify_clean(trans,
- i->iter->btree_id, i->iter->pos);
+ i->btree_id, i->k->k.p);
#endif
/*
@@ -878,24 +870,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
trans_trigger_run = false;
trans_for_each_update(trans, i) {
- ret = bch2_btree_iter_traverse(i->iter);
- if (unlikely(ret)) {
- trace_trans_restart_traverse(trans->ip);
- goto out;
- }
-
- /*
- * We're not using bch2_btree_iter_upgrade here because
- * we know trans->nounlock can't be set:
- */
- if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
- !__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
- trace_trans_restart_upgrade(trans->ip);
- ret = -EINTR;
- goto out;
- }
-
- if (iter_has_trans_triggers(i->iter) &&
+ if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
!i->trans_triggers_run) {
i->trans_triggers_run = true;
trans_trigger_run = true;
@@ -913,33 +888,45 @@ int __bch2_trans_commit(struct btree_trans *trans)
/* Turn extents updates into keys: */
trans_for_each_update(trans, i)
- if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
+ if (i->is_extent) {
struct bpos start = bkey_start_pos(&i->k->k);
while (i + 1 < trans->updates + trans->nr_updates &&
- i[0].iter->btree_id == i[1].iter->btree_id &&
+ i[0].btree_id == i[1].btree_id &&
!bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
i++;
- ret = extent_handle_overwrites(trans, i->iter->btree_id,
+ ret = extent_handle_overwrites(trans, i->btree_id,
start, i->k->k.p);
if (ret)
goto out;
}
trans_for_each_update(trans, i) {
- if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
- ret = extent_update_to_keys(trans, i->iter, i->k);
- } else {
- ret = bch2_trans_update2(trans, i->iter, i->k);
- }
+ ret = i->is_extent
+ ? extent_update_to_keys(trans, *i)
+ : __bch2_trans_update2(trans, *i);
if (ret)
goto out;
}
trans_for_each_update2(trans, i) {
- BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
- BUG_ON(i->iter->locks_want < 1);
+ ret = bch2_btree_iter_traverse(i->iter);
+ if (unlikely(ret)) {
+ trace_trans_restart_traverse(trans->ip);
+ goto out;
+ }
+
+ /*
+ * We're not using bch2_btree_iter_upgrade here because
+ * we know trans->nounlock can't be set:
+ */
+ if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
+ !__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
+ trace_trans_restart_upgrade(trans->ip);
+ ret = -EINTR;
+ goto out;
+ }
u64s = jset_u64s(i->k->k.u64s);
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
@@ -959,7 +946,7 @@ retry:
goto err;
trans_for_each_iter(trans, iter)
- if ((trans->iters_live & (1ULL << iter->idx)) &&
+ if (btree_iter_live(trans, iter) &&
(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT))
bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
out:
@@ -983,80 +970,101 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_trigger_flags flags)
{
struct btree_insert_entry *i, n = (struct btree_insert_entry) {
- .trigger_flags = flags, .iter = iter, .k = k
+ .trigger_flags = flags,
+ .bkey_type = __btree_node_type(iter->level, iter->btree_id),
+ .btree_id = iter->btree_id,
+ .level = iter->level,
+ .is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0,
+ .iter = iter,
+ .k = k
};
+ BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
+
#ifdef CONFIG_BCACHEFS_DEBUG
BUG_ON(bkey_cmp(iter->pos,
- (iter->flags & BTREE_ITER_IS_EXTENTS)
- ? bkey_start_pos(&k->k)
- : k->k.p));
+ n.is_extent ? bkey_start_pos(&k->k) : k->k.p));
trans_for_each_update(trans, i) {
BUG_ON(bkey_cmp(i->iter->pos,
- (i->iter->flags & BTREE_ITER_IS_EXTENTS)
- ? bkey_start_pos(&i->k->k)
- : i->k->k.p));
+ i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p));
BUG_ON(i != trans->updates &&
- btree_iter_pos_cmp(i[-1].iter, i[0].iter) >= 0);
+ btree_insert_entry_cmp(i - 1, i) >= 0);
}
#endif
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
- if (btree_node_type_is_extents(iter->btree_id)) {
+ if (n.is_extent) {
iter->pos_after_commit = k->k.p;
iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
}
/*
- * Pending updates are kept sorted: first, find position of new update:
+ * Pending updates are kept sorted: first, find position of new update,
+ * then delete/trim any updates the new update overwrites:
*/
- trans_for_each_update(trans, i)
- if (btree_iter_pos_cmp(iter, i->iter) <= 0)
- break;
+ if (!n.is_extent) {
+ trans_for_each_update(trans, i)
+ if (btree_insert_entry_cmp(&n, i) <= 0)
+ break;
- /*
- * Now delete/trim any updates the new update overwrites:
- */
- if (i > trans->updates &&
- i[-1].iter->btree_id == iter->btree_id &&
- bkey_cmp(iter->pos, i[-1].k->k.p) < 0)
- bch2_cut_back(n.iter->pos, i[-1].k);
-
- while (i < trans->updates + trans->nr_updates &&
- iter->btree_id == i->iter->btree_id &&
- bkey_cmp(n.k->k.p, i->k->k.p) >= 0)
- array_remove_item(trans->updates, trans->nr_updates,
- i - trans->updates);
-
- if (i < trans->updates + trans->nr_updates &&
- iter->btree_id == i->iter->btree_id &&
- bkey_cmp(n.k->k.p, i->iter->pos) > 0) {
- /*
- * When we have an extent that overwrites the start of another
- * update, trimming that extent will mean the iterator's
- * position has to change since the iterator position has to
- * match the extent's start pos - but we don't want to change
- * the iterator pos if some other code is using it, so we may
- * need to clone it:
- */
- if (trans->iters_live & (1ULL << i->iter->idx)) {
- i->iter = bch2_trans_copy_iter(trans, i->iter);
+ if (i < trans->updates + trans->nr_updates &&
+ !btree_insert_entry_cmp(&n, i))
+ *i = n;
+ else
+ array_insert_item(trans->updates, trans->nr_updates,
+ i - trans->updates, n);
+ } else {
+ trans_for_each_update(trans, i)
+ if (btree_insert_entry_cmp(&n, i) < 0)
+ break;
- i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
- bch2_trans_iter_put(trans, i->iter);
+ while (i > trans->updates &&
+ i[-1].btree_id == n.btree_id &&
+ bkey_cmp(bkey_start_pos(&n.k->k),
+ bkey_start_pos(&i[-1].k->k)) <= 0) {
+ --i;
+ array_remove_item(trans->updates, trans->nr_updates,
+ i - trans->updates);
}
- bch2_cut_front(n.k->k.p, i->k);
- bch2_btree_iter_set_pos(i->iter, n.k->k.p);
- }
+ if (i > trans->updates &&
+ i[-1].btree_id == n.btree_id &&
+ bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
+ bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
- EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
+ if (i < trans->updates + trans->nr_updates &&
+ i->btree_id == n.btree_id &&
+ bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
+ /* We don't handle splitting extents here: */
+ BUG_ON(bkey_cmp(bkey_start_pos(&n.k->k),
+ bkey_start_pos(&i->k->k)) > 0);
+
+ /*
+ * When we have an extent that overwrites the start of another
+ * update, trimming that extent will mean the iterator's
+ * position has to change since the iterator position has to
+ * match the extent's start pos - but we don't want to change
+ * the iterator pos if some other code is using it, so we may
+ * need to clone it:
+ */
+ if (btree_iter_live(trans, i->iter)) {
+ i->iter = bch2_trans_copy_iter(trans, i->iter);
+
+ i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+ bch2_trans_iter_put(trans, i->iter);
+ }
+
+ bch2_cut_front(n.k->k.p, i->k);
+ bch2_btree_iter_set_pos(i->iter, n.k->k.p);
+ }
+
+ array_insert_item(trans->updates, trans->nr_updates,
+ i - trans->updates, n);
+ }
- array_insert_item(trans->updates, trans->nr_updates,
- i - trans->updates, n);
return 0;
}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 4226f3b9..be59e37e 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -1196,6 +1196,8 @@ static int bch2_mark_stripe(struct bch_fs *c,
m->block_sectors[i] =
stripe_blockcount_get(new_s, i);
m->blocks_nonempty += !!m->block_sectors[i];
+
+ m->ptrs[i] = new_s->ptrs[i];
}
bch2_bkey_to_replicas(&m->r.e, new);
@@ -1847,8 +1849,6 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
}
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
- BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-
bch2_trans_update(trans, iter, n, 0);
out:
ret = sectors;
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index c0538e2f..e36ef095 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -151,7 +151,8 @@ static int bkey_matches_stripe(struct bch_stripe *s,
bkey_for_each_ptr(ptrs, ptr)
for (i = 0; i < nr_data; i++)
- if (__bch2_ptr_matches_stripe(s, ptr, i))
+ if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr,
+ le16_to_cpu(s->sectors)))
return i;
return -1;
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index 765baa9d..744e51ea 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -84,27 +84,42 @@ static inline void stripe_csum_set(struct bch_stripe *s,
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
}
-static inline bool __bch2_ptr_matches_stripe(const struct bch_stripe *s,
- const struct bch_extent_ptr *ptr,
- unsigned block)
+static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
+ const struct bch_extent_ptr *data_ptr,
+ unsigned sectors)
+{
+ return data_ptr->dev == stripe_ptr->dev &&
+ data_ptr->gen == stripe_ptr->gen &&
+ data_ptr->offset >= stripe_ptr->offset &&
+ data_ptr->offset < stripe_ptr->offset + sectors;
+}
+
+static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s,
+ struct extent_ptr_decoded p)
{
unsigned nr_data = s->nr_blocks - s->nr_redundant;
- if (block >= nr_data)
+ BUG_ON(!p.has_ec);
+
+ if (p.ec.block >= nr_data)
return false;
- return ptr->dev == s->ptrs[block].dev &&
- ptr->gen == s->ptrs[block].gen &&
- ptr->offset >= s->ptrs[block].offset &&
- ptr->offset < s->ptrs[block].offset + le16_to_cpu(s->sectors);
+ return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr,
+ le16_to_cpu(s->sectors));
}
-static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s,
- struct extent_ptr_decoded p)
+static inline bool bch2_ptr_matches_stripe_m(const struct stripe *m,
+ struct extent_ptr_decoded p)
{
+ unsigned nr_data = m->nr_blocks - m->nr_redundant;
+
BUG_ON(!p.has_ec);
- return __bch2_ptr_matches_stripe(s, &p.ptr, p.ec.block);
+ if (p.ec.block >= nr_data)
+ return false;
+
+ return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr,
+ m->sectors);
}
struct bch_read_bio;
diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h
index 84777016..3fc31222 100644
--- a/libbcachefs/ec_types.h
+++ b/libbcachefs/ec_types.h
@@ -22,6 +22,7 @@ struct stripe {
unsigned on_heap:1;
u8 blocks_nonempty;
u16 block_sectors[BCH_BKEY_PTRS_MAX];
+ struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
struct bch_replicas_padded r;
};
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index df94a570..7f6b4ac4 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -58,7 +58,7 @@ static int __remove_dirent(struct btree_trans *trans,
buf[name.len] = '\0';
name.name = buf;
- ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
+ ret = __bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode, 0);
if (ret && ret != -EINTR)
bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
if (ret)
@@ -126,8 +126,8 @@ static int walk_inode(struct btree_trans *trans,
struct inode_walker *w, u64 inum)
{
if (inum != w->cur_inum) {
- int ret = bch2_inode_find_by_inum_trans(trans, inum,
- &w->inode);
+ int ret = __bch2_inode_find_by_inum_trans(trans, inum,
+ &w->inode, 0);
if (ret && ret != -ENOENT)
return ret;
@@ -442,7 +442,8 @@ static int bch2_fix_overlapping_extent(struct btree_trans *trans,
* We don't want to go through the
* extent_handle_overwrites path:
*/
- __bch2_btree_iter_set_pos(u_iter, u->k.p, false);
+ u_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
+ bch2_btree_iter_set_pos(u_iter, u->k.p);
/*
* XXX: this is going to leave disk space
@@ -673,7 +674,7 @@ retry:
continue;
}
- ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
+ ret = __bch2_inode_find_by_inum_trans(&trans, d_inum, &target, 0);
if (ret && ret != -ENOENT)
break;
@@ -787,7 +788,9 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
bch_verbose(c, "checking root directory");
- ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ __bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO,
+ root_inode, 0));
if (ret && ret != -ENOENT)
return ret;
@@ -834,7 +837,8 @@ static int check_lostfound(struct bch_fs *c,
goto create_lostfound;
}
- ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ __bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0));
if (ret && ret != -ENOENT)
return ret;
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 76157e2b..e72c49e1 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -628,16 +628,19 @@ err:
return ret;
}
-int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
- struct bch_inode_unpacked *inode)
+int __bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
+ struct bch_inode_unpacked *inode,
+ unsigned flags)
{
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_inodes,
- POS(0, inode_nr), BTREE_ITER_CACHED);
- k = bch2_btree_iter_peek_cached(iter);
+ POS(0, inode_nr), flags);
+ k = (flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED
+ ? bch2_btree_iter_peek_cached(iter)
+ : bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
@@ -650,6 +653,14 @@ err:
return ret;
}
+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
+ struct bch_inode_unpacked *inode)
+{
+ return __bch2_inode_find_by_inum_trans(trans, inode_nr,
+ inode, BTREE_ITER_CACHED);
+
+}
+
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index dbdfcf63..1caf036a 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -73,6 +73,8 @@ int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *);
int bch2_inode_rm(struct bch_fs *, u64, bool);
+int __bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
+ struct bch_inode_unpacked *, unsigned);
int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index ba0e9e04..54f2e205 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -202,22 +202,19 @@ static void journal_entry_null_range(void *start, void *end)
#define FSCK_DELETED_KEY 5
-static int journal_validate_key(struct bch_fs *c, struct jset *jset,
+static int journal_validate_key(struct bch_fs *c, const char *where,
struct jset_entry *entry,
unsigned level, enum btree_id btree_id,
- struct bkey_i *k,
- const char *type, int write)
+ struct bkey_i *k, const char *type,
+ unsigned version, int big_endian, int write)
{
void *next = vstruct_next(entry);
const char *invalid;
- unsigned version = le32_to_cpu(jset->version);
int ret = 0;
if (journal_entry_err_on(!k->k.u64s, c,
- "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: k->u64s 0",
- type, le64_to_cpu(jset->seq),
- (u64 *) entry - jset->_data,
- le32_to_cpu(jset->u64s),
+ "invalid %s in %s entry offset %zi/%u: k->u64s 0",
+ type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s))) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
@@ -227,10 +224,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
if (journal_entry_err_on((void *) bkey_next(k) >
(void *) vstruct_next(entry), c,
- "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: extends past end of journal entry",
- type, le64_to_cpu(jset->seq),
- (u64 *) entry - jset->_data,
- le32_to_cpu(jset->u64s),
+ "invalid %s in %s entry offset %zi/%u: extends past end of journal entry",
+ type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s))) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
@@ -239,10 +234,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
- "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: bad format %u",
- type, le64_to_cpu(jset->seq),
- (u64 *) entry - jset->_data,
- le32_to_cpu(jset->u64s),
+ "invalid %s in %s entry offset %zi/%u: bad format %u",
+ type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s),
k->k.format)) {
@@ -253,9 +246,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (!write)
- bch2_bkey_compat(level, btree_id, version,
- JSET_BIG_ENDIAN(jset), write,
- NULL, bkey_to_packed(k));
+ bch2_bkey_compat(level, btree_id, version, big_endian,
+ write, NULL, bkey_to_packed(k));
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
__btree_node_type(level, btree_id));
@@ -263,10 +255,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
- mustfix_fsck_err(c, "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: %s\n%s",
- type, le64_to_cpu(jset->seq),
- (u64 *) entry - jset->_data,
- le32_to_cpu(jset->u64s),
+ mustfix_fsck_err(c, "invalid %s in %s entry offset %zi/%u: %s\n%s",
+ type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s),
invalid, buf);
@@ -278,25 +268,24 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (write)
- bch2_bkey_compat(level, btree_id, version,
- JSET_BIG_ENDIAN(jset), write,
- NULL, bkey_to_packed(k));
+ bch2_bkey_compat(level, btree_id, version, big_endian,
+ write, NULL, bkey_to_packed(k));
fsck_err:
return ret;
}
static int journal_entry_validate_btree_keys(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct bkey_i *k = entry->start;
while (k != vstruct_last(entry)) {
- int ret = journal_validate_key(c, jset, entry,
+ int ret = journal_validate_key(c, where, entry,
entry->level,
entry->btree_id,
- k, "key", write);
+ k, "key", version, big_endian, write);
if (ret == FSCK_DELETED_KEY)
continue;
@@ -307,9 +296,9 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c,
}
static int journal_entry_validate_btree_root(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct bkey_i *k = entry->start;
int ret = 0;
@@ -328,25 +317,25 @@ static int journal_entry_validate_btree_root(struct bch_fs *c,
return 0;
}
- return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
- "btree root", write);
+ return journal_validate_key(c, where, entry, 1, entry->btree_id, k,
+ "btree root", version, big_endian, write);
fsck_err:
return ret;
}
static int journal_entry_validate_prio_ptrs(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
/* obsolete, don't care: */
return 0;
}
static int journal_entry_validate_blacklist(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
int ret = 0;
@@ -359,9 +348,9 @@ fsck_err:
}
static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct jset_entry_blacklist_v2 *bl_entry;
int ret = 0;
@@ -385,9 +374,9 @@ fsck_err:
}
static int journal_entry_validate_usage(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
@@ -406,9 +395,9 @@ fsck_err:
}
static int journal_entry_validate_data_usage(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
@@ -428,9 +417,9 @@ fsck_err:
}
static int journal_entry_validate_clock(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct jset_entry_clock *clock =
container_of(entry, struct jset_entry_clock, entry);
@@ -454,9 +443,9 @@ fsck_err:
}
static int journal_entry_validate_dev_usage(struct bch_fs *c,
- struct jset *jset,
+ const char *where,
struct jset_entry *entry,
- int write)
+ unsigned version, int big_endian, int write)
{
struct jset_entry_dev_usage *u =
container_of(entry, struct jset_entry_dev_usage, entry);
@@ -491,8 +480,8 @@ fsck_err:
}
struct jset_entry_ops {
- int (*validate)(struct bch_fs *, struct jset *,
- struct jset_entry *, int);
+ int (*validate)(struct bch_fs *, const char *,
+ struct jset_entry *, unsigned, int, int);
};
static const struct jset_entry_ops bch2_jset_entry_ops[] = {
@@ -504,22 +493,29 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = {
#undef x
};
-static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
- struct jset_entry *entry, int write)
+int bch2_journal_entry_validate(struct bch_fs *c, const char *where,
+ struct jset_entry *entry,
+ unsigned version, int big_endian, int write)
{
return entry->type < BCH_JSET_ENTRY_NR
- ? bch2_jset_entry_ops[entry->type].validate(c, jset,
- entry, write)
+ ? bch2_jset_entry_ops[entry->type].validate(c, where, entry,
+ version, big_endian, write)
: 0;
}
static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
int write)
{
+ char buf[100];
struct jset_entry *entry;
int ret = 0;
vstruct_for_each(jset, entry) {
+ scnprintf(buf, sizeof(buf), "jset %llu entry offset %zi/%u",
+ le64_to_cpu(jset->seq),
+ (u64 *) entry - jset->_data,
+ le32_to_cpu(jset->u64s));
+
if (journal_entry_err_on(vstruct_next(entry) >
vstruct_last(jset), c,
"journal entry extends past end of jset")) {
@@ -527,7 +523,9 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
break;
}
- ret = journal_entry_validate(c, jset, entry, write);
+ ret = bch2_journal_entry_validate(c, buf, entry,
+ le32_to_cpu(jset->version),
+ JSET_BIG_ENDIAN(jset), write);
if (ret)
break;
}
@@ -1386,6 +1384,7 @@ void bch2_journal_write(struct closure *cl)
struct jset_entry *start, *end;
struct jset *jset;
struct bio *bio;
+ char *journal_debug_buf = NULL;
bool validate_before_checksum = false;
unsigned i, sectors, bytes, u64s, nr_rw_members = 0;
int ret;
@@ -1487,6 +1486,12 @@ retry_alloc:
goto retry_alloc;
}
+ if (ret) {
+ journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
+ if (journal_debug_buf)
+ __bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
+ }
+
/*
* write is allocated, no longer need to account for it in
* bch2_journal_space_available():
@@ -1501,7 +1506,9 @@ retry_alloc:
spin_unlock(&j->lock);
if (ret) {
- bch_err(c, "Unable to allocate journal write");
+ bch_err(c, "Unable to allocate journal write:\n%s",
+ journal_debug_buf);
+ kfree(journal_debug_buf);
bch2_fatal_error(c);
continue_at(cl, journal_write_done, system_highpri_wq);
return;
diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h
index a4931ab9..f34281a2 100644
--- a/libbcachefs/journal_io.h
+++ b/libbcachefs/journal_io.h
@@ -40,6 +40,9 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
vstruct_for_each_safe(entry, k, _n)
+int bch2_journal_entry_validate(struct bch_fs *, const char *, struct jset_entry *,
+ unsigned, int, int);
+
int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *);
void bch2_journal_write(struct closure *);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 4e3cf219..bbf8e5ad 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -691,8 +691,10 @@ int bch2_journal_reclaim_start(struct journal *j)
p = kthread_create(bch2_journal_reclaim_thread, j,
"bch-reclaim/%s", c->name);
- if (IS_ERR(p))
+ if (IS_ERR(p)) {
+ bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p));
return PTR_ERR(p);
+ }
get_task_struct(p);
j->reclaim_thread = p;
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index f915b30a..0b1faee5 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -348,8 +348,10 @@ int bch2_copygc_start(struct bch_fs *c)
return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
- if (IS_ERR(t))
+ if (IS_ERR(t)) {
+ bch_err(c, "error creating copygc thread: %li", PTR_ERR(t));
return PTR_ERR(t);
+ }
get_task_struct(t);
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c
index 041da982..8e272519 100644
--- a/libbcachefs/quota.c
+++ b/libbcachefs/quota.c
@@ -746,7 +746,6 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
struct qc_dqblk *qdq)
{
struct bch_fs *c = sb->s_fs_info;
- struct btree_trans trans;
struct bkey_i_quota new_quota;
int ret;
@@ -756,14 +755,10 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
- bch2_trans_init(&trans, c, 0, 0);
-
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
- bch2_trans_exit(&trans);
-
return ret;
}
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index d89920b8..482aca43 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -280,10 +280,10 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
h1);
break;
case REBALANCE_RUNNING:
- pr_buf(out, "running\n");
- pr_buf(out, "pos %llu:%llu\n",
- r->move_stats.pos.inode,
- r->move_stats.pos.offset);
+ pr_buf(out, "running\n"
+ "pos ");
+ bch2_bpos_to_text(out, r->move_stats.pos);
+ pr_buf(out, "\n");
break;
}
}
@@ -315,8 +315,10 @@ int bch2_rebalance_start(struct bch_fs *c)
return 0;
p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
- if (IS_ERR(p))
+ if (IS_ERR(p)) {
+ bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p));
return PTR_ERR(p);
+ }
get_task_struct(p);
rcu_assign_pointer(c->rebalance.thread, p);
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 73746eba..0975cf33 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -122,8 +122,11 @@ int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
};
new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL);
- if (!new_keys.d)
+ if (!new_keys.d) {
+ bch_err(c, "%s: error allocating new key array (size %zu)",
+ __func__, new_keys.size);
return -ENOMEM;
+ }
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
kvfree(keys->d);
@@ -145,8 +148,10 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
kmalloc(sizeof(struct bkey), GFP_KERNEL);
int ret;
- if (!whiteout)
+ if (!whiteout) {
+ bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
+ }
bkey_init(&whiteout->k);
whiteout->k.p = pos;
@@ -523,7 +528,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
* want that here, journal replay is supposed to treat extents like
* regular keys:
*/
- __bch2_btree_iter_set_pos(iter, k->k.p, false);
+ BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
ret = bch2_btree_iter_traverse(iter) ?:
bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
@@ -902,9 +907,11 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
return ERR_PTR(-ENOMEM);
}
- if (le16_to_cpu(c->disk_sb.sb->version) <
- bcachefs_metadata_version_bkey_renumber)
- bch2_sb_clean_renumber(clean, READ);
+ ret = bch2_sb_clean_validate(c, clean, READ);
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ return ERR_PTR(ret);
+ }
mutex_unlock(&c->sb_lock);
@@ -1336,8 +1343,10 @@ int bch2_fs_initialize(struct bch_fs *c)
&lostfound,
0, 0, S_IFDIR|0700, 0,
NULL, NULL));
- if (ret)
+ if (ret) {
+ bch_err(c, "error creating lost+found");
goto err;
+ }
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index f5569971..b9ad9c4d 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -9,6 +9,7 @@
#include "error.h"
#include "io.h"
#include "journal.h"
+#include "journal_io.h"
#include "journal_seq_blacklist.h"
#include "replicas.h"
#include "quota.h"
@@ -709,6 +710,8 @@ int bch2_write_super(struct bch_fs *c)
if (test_bit(BCH_FS_ERROR, &c->flags))
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
+ SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
+
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
@@ -932,14 +935,23 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
/* BCH_SB_FIELD_clean: */
-void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
+int bch2_sb_clean_validate(struct bch_fs *c, struct bch_sb_field_clean *clean, int write)
{
struct jset_entry *entry;
+ int ret;
for (entry = clean->start;
entry < (struct jset_entry *) vstruct_end(&clean->field);
- entry = vstruct_next(entry))
- bch2_bkey_renumber(BKEY_TYPE_btree, bkey_to_packed(entry->start), write);
+ entry = vstruct_next(entry)) {
+ ret = bch2_journal_entry_validate(c, "superblock", entry,
+ le16_to_cpu(c->disk_sb.sb->version),
+ BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
+ write);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
int bch2_fs_mark_dirty(struct bch_fs *c)
@@ -1072,6 +1084,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
struct bch_sb_field_clean *sb_clean;
struct jset_entry *entry;
unsigned u64s;
+ int ret;
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
@@ -1106,9 +1119,15 @@ void bch2_fs_mark_clean(struct bch_fs *c)
memset(entry, 0,
vstruct_end(&sb_clean->field) - (void *) entry);
- if (le16_to_cpu(c->disk_sb.sb->version) <
- bcachefs_metadata_version_bkey_renumber)
- bch2_sb_clean_renumber(sb_clean, WRITE);
+ /*
+ * this should be in the write path, and we should be validating every
+ * superblock section:
+ */
+ ret = bch2_sb_clean_validate(c, sb_clean, WRITE);
+ if (ret) {
+ bch_err(c, "error writing marking filesystem clean: validate error");
+ goto out;
+ }
bch2_write_super(c);
out:
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
index 1a35124f..b64ac2fb 100644
--- a/libbcachefs/super-io.h
+++ b/libbcachefs/super-io.h
@@ -125,7 +125,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
void bch2_journal_super_entries_add_common(struct bch_fs *,
struct jset_entry **, u64);
-void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
+int bch2_sb_clean_validate(struct bch_fs *, struct bch_sb_field_clean *, int);
int bch2_fs_mark_dirty(struct bch_fs *);
void bch2_fs_mark_clean(struct bch_fs *);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index eee7d6c0..2096c76e 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -424,6 +424,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
+ for_each_rw_member(ca, c, i)
+ bch2_wake_allocator(ca);
+
ret = bch2_journal_reclaim_start(&c->journal);
if (ret) {
bch_err(c, "error starting journal reclaim: %i", ret);
@@ -1001,6 +1004,8 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
+ bch2_dev_allocator_stop(ca);
+
cancel_work_sync(&ca->io_error_work);
if (ca->kobj.state_in_sysfs &&
@@ -1169,6 +1174,14 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
if (!ca)
goto err;
+ ca->fs = c;
+
+ if (ca->mi.state == BCH_MEMBER_STATE_rw &&
+ bch2_dev_allocator_start(ca)) {
+ bch2_dev_free(ca);
+ goto err;
+ }
+
bch2_dev_attach(c, ca, dev_idx);
out:
pr_verbose_init(c->opts, "ret %i", ret);