summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-04-21 18:13:43 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2021-04-24 01:37:09 -0400
commitf87850496ea0ad0e5e8f8feb1c454662d0973530 (patch)
tree89f0c013f58a51fb3c484638ed2ad5805ea86d13
parent30f72f75f51bd42ab3ac943745bd7b4cee5eec9d (diff)
Update bcachefs sources to 3c41353bc1 bcachefs: Fix bch2_verify_keylist_sorted
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/bcachefs.h16
-rw-r--r--libbcachefs/bkey_methods.c1
-rw-r--r--libbcachefs/btree_cache.c88
-rw-r--r--libbcachefs/btree_cache.h1
-rw-r--r--libbcachefs/btree_gc.c6
-rw-r--r--libbcachefs/btree_io.c20
-rw-r--r--libbcachefs/btree_io.h1
-rw-r--r--libbcachefs/btree_iter.c37
-rw-r--r--libbcachefs/btree_key_cache.c16
-rw-r--r--libbcachefs/btree_types.h2
-rw-r--r--libbcachefs/btree_update_interior.c67
-rw-r--r--libbcachefs/btree_update_leaf.c6
-rw-r--r--libbcachefs/buckets.c69
-rw-r--r--libbcachefs/debug.c120
-rw-r--r--libbcachefs/debug.h4
-rw-r--r--libbcachefs/ec.c1
-rw-r--r--libbcachefs/fsck.c222
-rw-r--r--libbcachefs/journal_reclaim.c2
-rw-r--r--libbcachefs/keylist.c2
-rw-r--r--libbcachefs/move.c6
-rw-r--r--libbcachefs/movinggc.c13
-rw-r--r--libbcachefs/replicas.c18
-rw-r--r--libbcachefs/replicas.h1
-rw-r--r--libbcachefs/super.c7
25 files changed, 458 insertions, 270 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 82c9b19f..feafaff4 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-fe72e70682cd2430a099c08c3135253675030d28
+3c41353bc185e0a0da4c6f63b1203575c41a2da1
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index aade5624..ce058d55 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -259,7 +259,11 @@ do { \
BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \
"Disables rewriting of btree nodes during mark and sweep")\
BCH_DEBUG_PARAM(btree_shrinker_disabled, \
- "Disables the shrinker callback for the btree node cache")
+ "Disables the shrinker callback for the btree node cache")\
+ BCH_DEBUG_PARAM(verify_btree_ondisk, \
+ "Reread btree nodes at various points to verify the " \
+ "mergesort in the read path against modifications " \
+ "done in memory")
/* Parameters that should only be compiled in in debug mode: */
#define BCH_DEBUG_PARAMS_DEBUG() \
@@ -273,10 +277,6 @@ do { \
"information) when iterating over keys") \
BCH_DEBUG_PARAM(debug_check_btree_accounting, \
"Verify btree accounting for keys within a node") \
- BCH_DEBUG_PARAM(verify_btree_ondisk, \
- "Reread btree nodes at various points to verify the " \
- "mergesort in the read path against modifications " \
- "done in memory") \
BCH_DEBUG_PARAM(journal_seq_verify, \
"Store the journal sequence number in the version " \
"number of every btree key, and verify that btree " \
@@ -545,6 +545,8 @@ struct btree_iter_buf {
struct btree_iter *iter;
};
+#define REPLICAS_DELTA_LIST_MAX (1U << 16)
+
struct bch_fs {
struct closure cl;
@@ -572,6 +574,7 @@ struct bch_fs {
struct bch_replicas_cpu replicas;
struct bch_replicas_cpu replicas_gc;
struct mutex replicas_gc_lock;
+ mempool_t replicas_delta_pool;
struct journal_entry_res btree_root_journal_res;
struct journal_entry_res replicas_journal_res;
@@ -644,6 +647,7 @@ struct bch_fs {
struct mutex btree_trans_lock;
struct list_head btree_trans_list;
mempool_t btree_iters_pool;
+ mempool_t btree_trans_mem_pool;
struct btree_iter_buf __percpu *btree_iters_bufs;
struct srcu_struct btree_trans_barrier;
@@ -813,11 +817,9 @@ struct bch_fs {
/* DEBUG JUNK */
struct dentry *debug;
struct btree_debug btree_debug[BTREE_ID_NR];
-#ifdef CONFIG_BCACHEFS_DEBUG
struct btree *verify_data;
struct btree_node *verify_ondisk;
struct mutex verify_lock;
-#endif
u64 *unused_inode_hints;
unsigned inode_shard_bits;
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index 450b613d..9f869bed 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -100,7 +100,6 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
static unsigned bch2_key_types_allowed[] = {
[BKEY_TYPE_extents] =
- (1U << KEY_TYPE_discard)|
(1U << KEY_TYPE_error)|
(1U << KEY_TYPE_extent)|
(1U << KEY_TYPE_reservation)|
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 9f963179..edc3c5ed 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -33,21 +33,21 @@ static inline unsigned btree_cache_can_free(struct btree_cache *bc)
return max_t(int, 0, bc->used - bc->reserve);
}
-static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
{
+ struct btree_cache *bc = &c->btree_cache;
+
EBUG_ON(btree_node_write_in_flight(b));
kvpfree(b->data, btree_bytes(c));
b->data = NULL;
+#ifdef __KERNEL__
vfree(b->aux_data);
+#else
+ munmap(b->aux_data, btree_aux_data_bytes(b));
+#endif
b->aux_data = NULL;
-}
-static void btree_node_data_free(struct bch_fs *c, struct btree *b)
-{
- struct btree_cache *bc = &c->btree_cache;
-
- __btree_node_data_free(c, b);
bc->used--;
list_move(&b->list, &bc->freed);
}
@@ -75,8 +75,13 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
b->data = kvpmalloc(btree_bytes(c), gfp);
if (!b->data)
return -ENOMEM;
-
+#ifdef __KERNEL__
b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
+#else
+ b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+#endif
if (!b->aux_data) {
kvpfree(b->data, btree_bytes(c));
b->data = NULL;
@@ -100,7 +105,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
return b;
}
-static struct btree *btree_node_mem_alloc(struct bch_fs *c)
+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
struct btree *b = __btree_node_mem_alloc(c);
@@ -360,12 +365,10 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
flags = memalloc_nofs_save();
mutex_lock(&bc->lock);
-#ifdef CONFIG_BCACHEFS_DEBUG
if (c->verify_data)
list_move(&c->verify_data->list, &bc->live);
kvpfree(c->verify_ondisk, btree_bytes(c));
-#endif
for (i = 0; i < BTREE_ID_NR; i++)
if (c->btree_roots[i].b)
@@ -419,31 +422,15 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bch2_recalc_btree_reserve(c);
for (i = 0; i < bc->reserve; i++)
- if (!btree_node_mem_alloc(c)) {
+ if (!__bch2_btree_node_mem_alloc(c)) {
ret = -ENOMEM;
goto out;
}
list_splice_init(&bc->live, &bc->freeable);
-#ifdef CONFIG_BCACHEFS_DEBUG
mutex_init(&c->verify_lock);
- c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
- if (!c->verify_ondisk) {
- ret = -ENOMEM;
- goto out;
- }
-
- c->verify_data = btree_node_mem_alloc(c);
- if (!c->verify_data) {
- ret = -ENOMEM;
- goto out;
- }
-
- list_del_init(&c->verify_data->list);
-#endif
-
bc->shrink.count_objects = bch2_btree_cache_count;
bc->shrink.scan_objects = bch2_btree_cache_scan;
bc->shrink.seeks = 4;
@@ -703,6 +690,41 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
}
+static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
+{
+ char buf1[100], buf2[100], buf3[100], buf4[100];
+
+ if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
+ return;
+
+ bch2_bpos_to_text(&PBUF(buf1), b->key.k.type == KEY_TYPE_btree_ptr_v2
+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
+ : POS_MIN);
+ bch2_bpos_to_text(&PBUF(buf2), b->data->min_key);
+
+ bch2_bpos_to_text(&PBUF(buf3), b->key.k.p);
+ bch2_bpos_to_text(&PBUF(buf4), b->data->max_key);
+ bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n"
+ "btree: ptr %u header %llu\n"
+ "level: ptr %u header %llu\n"
+ "min ptr %s node header %s\n"
+ "max ptr %s node header %s",
+ b->c.btree_id, BTREE_NODE_ID(b->data),
+ b->c.level, BTREE_NODE_LEVEL(b->data),
+ buf1, buf2, buf3, buf4);
+}
+
+static inline void btree_check_header(struct bch_fs *c, struct btree *b)
+{
+ if (b->c.btree_id != BTREE_NODE_ID(b->data) ||
+ b->c.level != BTREE_NODE_LEVEL(b->data) ||
+ bpos_cmp(b->data->max_key, b->key.k.p) ||
+ (b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ bpos_cmp(b->data->min_key,
+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)))
+ btree_bad_header(c, b);
+}
+
/**
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
@@ -833,10 +855,7 @@ lock_node:
EBUG_ON(b->c.btree_id != iter->btree_id);
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
- EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
- EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
- bpos_cmp(b->data->min_key,
- bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
+ btree_check_header(c, b);
return b;
}
@@ -916,10 +935,7 @@ lock_node:
EBUG_ON(b->c.btree_id != btree_id);
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
- EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
- EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
- bpos_cmp(b->data->min_key,
- bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
+ btree_check_header(c, b);
out:
bch2_btree_cache_cannibalize_unlock(c);
return b;
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index 4791c3b6..c517cc02 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -17,6 +17,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 536947cc..864931ea 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -330,6 +330,10 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > journal_cur_seq(&c->journal));
+ ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
+ if (ret)
+ goto err;
+
if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
"key version number higher than recorded: %llu > %llu",
k->k->version.lo,
@@ -346,8 +350,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
goto err;
}
}
-
- ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
}
ptrs = bch2_bkey_ptrs_c(*k);
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index c8d8df96..2de31a6b 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1340,6 +1340,13 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
return ret;
}
+static void btree_write_submit(struct work_struct *work)
+{
+ struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work);
+
+ bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key);
+}
+
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
{
struct btree_write_bio *wbio;
@@ -1347,7 +1354,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
struct bset *i;
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
- struct bkey_buf k;
struct bch_extent_ptr *ptr;
struct sort_iter sort_iter;
struct nonce nonce;
@@ -1358,8 +1364,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
bool validate_before_checksum = false;
void *data;
- bch2_bkey_buf_init(&k);
-
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return;
@@ -1536,6 +1540,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
wbio_init(&wbio->wbio.bio);
wbio->data = data;
wbio->bytes = bytes;
+ wbio->wbio.c = c;
wbio->wbio.used_mempool = used_mempool;
wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META;
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
@@ -1558,9 +1563,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
* just make all btree node writes FUA to keep things sane.
*/
- bch2_bkey_buf_copy(&k, c, &b->key);
+ bkey_copy(&wbio->key, &b->key);
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(k.k)), ptr)
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&wbio->key)), ptr)
ptr->offset += b->written;
b->written += sectors_to_write;
@@ -1568,9 +1573,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
atomic64_inc(&c->btree_writes_nr);
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
- /* XXX: submitting IO with btree locks held: */
- bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k);
- bch2_bkey_buf_exit(&k, c);
+ INIT_WORK(&wbio->work, btree_write_submit);
+ schedule_work(&wbio->work);
return;
err:
set_btree_node_noevict(b);
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index 95c35161..c8a8b05a 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -42,6 +42,7 @@ struct btree_read_bio {
struct btree_write_bio {
struct work_struct work;
+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
void *data;
unsigned bytes;
struct bch_write_bio wbio;
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index c8f527bc..93194e62 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2145,7 +2145,16 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (new_top > trans->mem_bytes) {
size_t old_bytes = trans->mem_bytes;
size_t new_bytes = roundup_pow_of_two(new_top);
- void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
+ void *new_mem;
+
+ WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+
+ new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
+ if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
+ new_bytes = BTREE_TRANS_MEM_MAX;
+ kfree(trans->mem);
+ }
if (!new_mem)
return ERR_PTR(-ENOMEM);
@@ -2249,6 +2258,11 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
if (expected_mem_bytes) {
trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes);
trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL);
+
+ if (!unlikely(trans->mem)) {
+ trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
+ trans->mem_bytes = BTREE_TRANS_MEM_MAX;
+ }
}
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
@@ -2290,8 +2304,19 @@ int bch2_trans_exit(struct btree_trans *trans)
bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
- kfree(trans->fs_usage_deltas);
- kfree(trans->mem);
+ if (trans->fs_usage_deltas) {
+ if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
+ REPLICAS_DELTA_LIST_MAX)
+ mempool_free(trans->fs_usage_deltas,
+ &trans->c->replicas_delta_pool);
+ else
+ kfree(trans->fs_usage_deltas);
+ }
+
+ if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
+ mempool_free(trans->mem, &trans->c->btree_trans_mem_pool);
+ else
+ kfree(trans->mem);
#ifdef __KERNEL__
/*
@@ -2299,6 +2324,7 @@ int bch2_trans_exit(struct btree_trans *trans)
*/
trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
#endif
+
if (trans->iters)
mempool_free(trans->iters, &trans->c->btree_iters_pool);
@@ -2392,6 +2418,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
+ mempool_exit(&c->btree_trans_mem_pool);
mempool_exit(&c->btree_iters_pool);
cleanup_srcu_struct(&c->btree_trans_barrier);
}
@@ -2407,5 +2434,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * nr +
sizeof(struct btree_insert_entry) * nr +
- sizeof(struct btree_insert_entry) * nr);
+ sizeof(struct btree_insert_entry) * nr) ?:
+ mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
+ BTREE_TRANS_MEM_MAX);
}
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index 53191c99..a5181a96 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -218,8 +218,14 @@ static int btree_key_cache_fill(struct btree_trans *trans,
goto err;
}
- if (k.k->u64s > ck->u64s) {
- new_u64s = roundup_pow_of_two(k.k->u64s);
+ /*
+ * bch2_varint_decode can read past the end of the buffer by at
+ * most 7 bytes (it won't be used):
+ */
+ new_u64s = k.k->u64s + 1;
+
+ if (new_u64s > ck->u64s) {
+ new_u64s = roundup_pow_of_two(new_u64s);
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
if (!new_k) {
ret = -ENOMEM;
@@ -385,12 +391,18 @@ retry:
goto evict;
}
+ /*
+ * Since journal reclaim depends on us making progress here, and the
+ * allocator/copygc depend on journal reclaim making progress, we need
+ * to be using alloc reserves:
+ * */
ret = bch2_btree_iter_traverse(b_iter) ?:
bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
(ck->journal.seq == journal_last_seq(j)
? BTREE_INSERT_JOURNAL_RESERVED
: 0)|
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index f942ccf6..06a2c412 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -352,6 +352,8 @@ struct btree_trans_commit_hook {
struct btree_trans_commit_hook *next;
};
+#define BTREE_TRANS_MEM_MAX 4096
+
struct btree_trans {
struct bch_fs *c;
#ifdef CONFIG_BCACHEFS_DEBUG
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 07c92534..87426d17 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -887,6 +887,14 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
btree_update_drop_new_node(c, b);
btree_update_will_delete_key(as, &b->key);
+
+ /*
+ * XXX: Waiting on io with btree node locks held, we don't want to be
+ * doing this. We can't have btree writes happening after the space has
+ * been freed, but we really only need to block before
+ * btree_update_nodes_written_trans() happens.
+ */
+ btree_node_wait_on_io(b);
}
void bch2_btree_update_done(struct btree_update *as)
@@ -1146,6 +1154,24 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
set_btree_node_need_write(b);
}
+static void
+__bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
+ struct btree_iter *iter, struct keylist *keys,
+ struct btree_node_iter node_iter)
+{
+ struct bkey_i *insert = bch2_keylist_front(keys);
+ struct bkey_packed *k;
+
+ BUG_ON(btree_node_type(b) != BKEY_TYPE_btree);
+
+ while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
+ (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
+ ;
+
+ for_each_keylist_key(keys, insert)
+ bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
+}
+
/*
* Move keys from n1 (original replacement node, now lower node) to n2 (higher
* node)
@@ -1276,16 +1302,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
struct bkey_packed *src, *dst, *n;
struct bset *i;
- BUG_ON(btree_node_type(b) != BKEY_TYPE_btree);
-
bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
- while (!bch2_keylist_empty(keys)) {
- k = bch2_keylist_front(keys);
-
- bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter);
- bch2_keylist_pop_front(keys);
- }
+ __bch2_btree_insert_keys_interior(as, b, iter, keys, node_iter);
/*
* We can't tolerate whiteouts here - with whiteouts there can be
@@ -1431,24 +1450,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
struct btree_iter *iter, struct keylist *keys)
{
struct btree_iter *linked;
- struct btree_node_iter node_iter;
- struct bkey_i *insert = bch2_keylist_front(keys);
- struct bkey_packed *k;
-
- /* Don't screw up @iter's position: */
- node_iter = iter->l[b->c.level].iter;
-
- /*
- * btree_split(), btree_gc_coalesce() will insert keys before
- * the iterator's current position - they know the keys go in
- * the node the iterator points to:
- */
- while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
- (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
- ;
- for_each_keylist_key(keys, insert)
- bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
+ __bch2_btree_insert_keys_interior(as, b, iter, keys, iter->l[b->c.level].iter);
btree_update_updated_node(as, b);
@@ -1598,7 +1601,19 @@ retry:
next = m;
}
- BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key));
+ if (bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key)) {
+ char buf1[100], buf2[100];
+
+ bch2_bpos_to_text(&PBUF(buf1), prev->data->max_key);
+ bch2_bpos_to_text(&PBUF(buf2), next->data->min_key);
+ bch2_fs_inconsistent(c,
+ "btree topology error in btree merge:\n"
+ "prev ends at %s\n"
+ "next starts at %s\n",
+ buf1, buf2);
+ ret = -EIO;
+ goto err;
+ }
bch2_bkey_format_init(&new_s);
bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index afdcc98d..b793ab77 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -293,6 +293,12 @@ btree_key_can_insert_cached(struct btree_trans *trans,
!(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
return BTREE_INSERT_NEED_JOURNAL_RECLAIM;
+ /*
+ * bch2_varint_decode can read past the end of the buffer by at most 7
+ * bytes (it won't be used):
+ */
+ u64s += 1;
+
if (u64s <= ck->u64s)
return BTREE_INSERT_OK;
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 6b99f127..c3ad0bc8 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -396,20 +396,22 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
-static inline void update_replicas(struct bch_fs *c,
+static inline int update_replicas(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct bch_replicas_entry *r,
s64 sectors)
{
int idx = bch2_replicas_entry_idx(c, r);
- BUG_ON(idx < 0);
+ if (idx < 0)
+ return -1;
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
fs_usage->replicas[idx] += sectors;
+ return 0;
}
-static inline void update_cached_sectors(struct bch_fs *c,
+static inline int update_cached_sectors(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
unsigned dev, s64 sectors)
{
@@ -417,7 +419,7 @@ static inline void update_cached_sectors(struct bch_fs *c,
bch2_replicas_entry_cached(&r.e, dev);
- update_replicas(c, fs_usage, &r.e, sectors);
+ return update_replicas(c, fs_usage, &r.e, sectors);
}
static struct replicas_delta_list *
@@ -425,10 +427,26 @@ replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
{
struct replicas_delta_list *d = trans->fs_usage_deltas;
unsigned new_size = d ? (d->size + more) * 2 : 128;
+ unsigned alloc_size = sizeof(*d) + new_size;
+
+ WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX);
if (!d || d->used + more > d->size) {
- d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
- BUG_ON(!d);
+ d = krealloc(d, alloc_size, GFP_NOIO|__GFP_ZERO);
+
+ BUG_ON(!d && alloc_size > REPLICAS_DELTA_LIST_MAX);
+
+ if (!d) {
+ d = mempool_alloc(&trans->c->replicas_delta_pool, GFP_NOIO);
+ memset(d, 0, REPLICAS_DELTA_LIST_MAX);
+
+ if (trans->fs_usage_deltas)
+ memcpy(d, trans->fs_usage_deltas,
+ trans->fs_usage_deltas->size + sizeof(*d));
+
+ new_size = REPLICAS_DELTA_LIST_MAX - sizeof(*d);
+ kfree(trans->fs_usage_deltas);
+ }
d->size = new_size;
trans->fs_usage_deltas = d;
@@ -553,8 +571,12 @@ static int bch2_mark_alloc(struct bch_fs *c,
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
old_m.cached_sectors) {
- update_cached_sectors(c, fs_usage, ca->dev_idx,
- -old_m.cached_sectors);
+ if (update_cached_sectors(c, fs_usage, ca->dev_idx,
+ -old_m.cached_sectors)) {
+ bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
+ return -1;
+ }
+
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
old_m.cached_sectors);
}
@@ -936,8 +958,12 @@ static int bch2_mark_extent(struct bch_fs *c,
if (p.ptr.cached) {
if (!stale)
- update_cached_sectors(c, fs_usage, p.ptr.dev,
- disk_sectors);
+ if (update_cached_sectors(c, fs_usage, p.ptr.dev,
+ disk_sectors)) {
+ bch2_fs_fatal_error(c, "bch2_mark_extent(): no replicas entry while updating cached sectors");
+ return -1;
+
+ }
} else if (!p.has_ec) {
dirty_sectors += disk_sectors;
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
@@ -956,8 +982,15 @@ static int bch2_mark_extent(struct bch_fs *c,
}
}
- if (r.e.nr_devs)
- update_replicas(c, fs_usage, &r.e, dirty_sectors);
+ if (r.e.nr_devs) {
+ if (update_replicas(c, fs_usage, &r.e, dirty_sectors)) {
+ char buf[200];
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch2_fs_fatal_error(c, "no replicas entry for %s", buf);
+ return -1;
+ }
+ }
return 0;
}
@@ -1031,8 +1064,14 @@ static int bch2_mark_stripe(struct bch_fs *c,
return ret;
}
- update_replicas(c, fs_usage, &m->r.e,
- ((s64) m->sectors * m->nr_redundant));
+ if (update_replicas(c, fs_usage, &m->r.e,
+ ((s64) m->sectors * m->nr_redundant))) {
+ char buf[200];
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, new);
+ bch2_fs_fatal_error(c, "no replicas entry for %s", buf);
+ return -1;
+ }
}
return 0;
@@ -1292,7 +1331,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
added += d->delta;
}
- update_replicas(c, dst, &d->r, d->delta);
+ BUG_ON(update_replicas(c, dst, &d->r, d->delta));
}
dst->nr_inodes += deltas->nr_inodes;
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 90364b55..4215c119 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -29,40 +29,19 @@
static struct dentry *bch_debug;
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
+static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
+ struct extent_ptr_decoded pick)
{
struct btree *v = c->verify_data;
- struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
- struct bset *sorted, *inmemory;
- struct extent_ptr_decoded pick;
- struct bch_dev *ca;
+ struct btree_node *n_ondisk = c->verify_ondisk;
+ struct btree_node *n_sorted = c->verify_data->data;
+ struct bset *sorted, *inmemory = &b->data->keys;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
struct bio *bio;
+ bool failed = false;
- if (c->opts.nochanges)
- return;
-
- btree_node_io_lock(b);
- mutex_lock(&c->verify_lock);
-
- n_ondisk = c->verify_ondisk;
- n_sorted = c->verify_data->data;
- n_inmemory = b->data;
-
- bkey_copy(&v->key, &b->key);
- v->written = 0;
- v->c.level = b->c.level;
- v->c.btree_id = b->c.btree_id;
- bch2_btree_keys_init(v);
-
- if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
- NULL, &pick) <= 0)
- return;
-
- ca = bch_dev_bkey_exists(c, pick.ptr.dev);
if (!bch2_dev_get_ioref(ca, READ))
- return;
+ return false;
bio = bio_alloc_bioset(GFP_NOIO,
buf_pages(n_sorted, btree_bytes(c)),
@@ -79,12 +58,12 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
memcpy(n_ondisk, n_sorted, btree_bytes(c));
+ v->written = 0;
if (bch2_btree_node_read_done(c, ca, v, false))
- goto out;
+ return false;
n_sorted = c->verify_data->data;
sorted = &n_sorted->keys;
- inmemory = &n_inmemory->keys;
if (inmemory->u64s != sorted->u64s ||
memcmp(inmemory->start,
@@ -102,8 +81,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
printk(KERN_ERR "*** read back in:\n");
bch2_dump_bset(c, v, sorted, 0);
- while (offset < b->written) {
- if (!offset ) {
+ while (offset < v->written) {
+ if (!offset) {
i = &n_ondisk->keys;
sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
c->block_bits;
@@ -122,25 +101,84 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
offset += sectors;
}
- printk(KERN_ERR "*** block %u/%u not written\n",
- offset >> c->block_bits, btree_blocks(c));
-
for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
if (inmemory->_data[j] != sorted->_data[j])
break;
- printk(KERN_ERR "b->written %u\n", b->written);
-
console_unlock();
- panic("verify failed at %u\n", j);
+ bch_err(c, "verify failed at key %u", j);
+
+ failed = true;
+ }
+
+ if (v->written != b->written) {
+ bch_err(c, "written wrong: expected %u, got %u",
+ b->written, v->written);
+ failed = true;
+ }
+
+ return failed;
+}
+
+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
+{
+ struct bkey_ptrs_c ptrs;
+ struct extent_ptr_decoded p;
+ const union bch_extent_entry *entry;
+ struct btree *v;
+ struct bset *inmemory = &b->data->keys;
+ struct bkey_packed *k;
+ bool failed = false;
+
+ if (c->opts.nochanges)
+ return;
+
+ btree_node_io_lock(b);
+ mutex_lock(&c->verify_lock);
+
+ if (!c->verify_ondisk) {
+ c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+ if (!c->verify_ondisk)
+ goto out;
+ }
+
+ if (!c->verify_data) {
+ c->verify_data = __bch2_btree_node_mem_alloc(c);
+ if (!c->verify_data)
+ goto out;
+
+ list_del_init(&c->verify_data->list);
+ }
+
+ BUG_ON(b->nsets != 1);
+
+ for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_next(k))
+ if (k->type == KEY_TYPE_btree_ptr_v2) {
+ struct bch_btree_ptr_v2 *v = (void *) bkeyp_val(&b->format, k);
+ v->mem_ptr = 0;
+ }
+
+ v = c->verify_data;
+ bkey_copy(&v->key, &b->key);
+ v->c.level = b->c.level;
+ v->c.btree_id = b->c.btree_id;
+ bch2_btree_keys_init(v);
+
+ ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
+ bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry)
+ failed |= bch2_btree_verify_replica(c, b, p);
+
+ if (failed) {
+ char buf[200];
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key));
+ bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf);
}
out:
mutex_unlock(&c->verify_lock);
btree_node_io_unlock(b);
}
-#endif
-
#ifdef CONFIG_DEBUG_FS
/* XXX: bch_fs refcounting */
diff --git a/libbcachefs/debug.h b/libbcachefs/debug.h
index 7ac1615e..0b86736e 100644
--- a/libbcachefs/debug.h
+++ b/libbcachefs/debug.h
@@ -8,11 +8,7 @@ struct bio;
struct btree;
struct bch_fs;
-#ifdef CONFIG_BCACHEFS_DEBUG
void __bch2_btree_verify(struct bch_fs *, struct btree *);
-#else
-static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
-#endif
static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
{
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index f712f685..7062ab9c 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -1621,6 +1621,7 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
if (ret)
break;
}
+ bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index eb8ac164..26fbd8c2 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -38,9 +38,9 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
return ret ?: sectors;
}
-static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
- struct bch_inode_unpacked *inode,
- u32 *snapshot)
+static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
+ struct bch_inode_unpacked *inode,
+ u32 *snapshot)
{
struct btree_iter *iter;
struct bkey_s_c k;
@@ -63,19 +63,34 @@ err:
return ret;
}
-static int write_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *inode,
- u32 snapshot)
+static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
+ struct bch_inode_unpacked *inode,
+ u32 *snapshot)
+{
+ return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
+}
+
+static int __write_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
{
struct btree_iter *inode_iter =
bch2_trans_get_iter(trans, BTREE_ID_inodes,
SPOS(0, inode->bi_inum, snapshot),
BTREE_ITER_INTENT);
+ int ret = bch2_inode_write(trans, inode_iter, inode);
+ bch2_trans_iter_put(trans, inode_iter);
+ return ret;
+}
+
+static int write_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
+{
int ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
- bch2_inode_write(trans, inode_iter, inode));
- bch2_trans_iter_put(trans, inode_iter);
+ __write_inode(trans, inode, snapshot));
if (ret)
bch_err(trans->c, "error in fsck: error %i updating inode", ret);
return ret;
@@ -114,57 +129,101 @@ static int remove_dirent(struct btree_trans *trans, struct bpos pos)
return ret;
}
-static int __reattach_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *lostfound,
- u64 inum)
+/* Get lost+found, create if it doesn't exist: */
+static int lookup_lostfound(struct btree_trans *trans,
+ struct bch_inode_unpacked *lostfound)
{
- struct bch_hash_info dir_hash =
- bch2_hash_info_init(trans->c, lostfound);
- struct bch_inode_unpacked inode_u;
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked root;
+ struct bch_hash_info root_hash_info;
+ struct qstr lostfound_str = QSTR("lost+found");
+ u64 inum;
+ u32 snapshot;
+ int ret;
+
+ ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root, &snapshot);
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ root_hash_info = bch2_hash_info_init(c, &root);
+ inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
+ &lostfound_str);
+ if (!inum) {
+ bch_notice(c, "creating lost+found");
+ goto create_lostfound;
+ }
+
+ ret = lookup_inode(trans, inum, lostfound, &snapshot);
+ if (ret && ret != -ENOENT) {
+ /*
+ * The check_dirents pass has already run, dangling dirents
+ * shouldn't exist here:
+ */
+ bch_err(c, "error looking up lost+found: %i", ret);
+ return ret;
+ }
+
+ if (ret == -ENOENT) {
+create_lostfound:
+ bch2_inode_init_early(c, lostfound);
+
+ ret = __bch2_trans_do(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW,
+ bch2_create_trans(trans,
+ BCACHEFS_ROOT_INO, &root,
+ lostfound,
+ &lostfound_str,
+ 0, 0, S_IFDIR|0700, 0, NULL, NULL));
+ if (ret)
+ bch_err(c, "error creating lost+found: %i", ret);
+ }
+
+ return 0;
+}
+
+static int reattach_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode)
+{
+ struct bch_hash_info dir_hash;
+ struct bch_inode_unpacked lostfound;
char name_buf[20];
struct qstr name;
u64 dir_offset = 0;
- u32 snapshot;
int ret;
- snprintf(name_buf, sizeof(name_buf), "%llu", inum);
- name = (struct qstr) QSTR(name_buf);
-
- ret = lookup_inode(trans, inum, &inode_u, &snapshot);
+ ret = lookup_lostfound(trans, &lostfound);
if (ret)
return ret;
- if (S_ISDIR(inode_u.bi_mode)) {
- lostfound->bi_nlink++;
+ if (S_ISDIR(inode->bi_mode)) {
+ lostfound.bi_nlink++;
- ret = write_inode(trans, lostfound, U32_MAX);
+ ret = write_inode(trans, &lostfound, U32_MAX);
if (ret)
return ret;
}
- ret = bch2_dirent_create(trans, lostfound->bi_inum, &dir_hash,
- mode_to_type(inode_u.bi_mode),
- &name, inum, &dir_offset,
- BCH_HASH_SET_MUST_CREATE);
- if (ret)
- return ret;
+ dir_hash = bch2_hash_info_init(trans->c, &lostfound);
- inode_u.bi_dir = lostfound->bi_inum;
- inode_u.bi_dir_offset = dir_offset;
+ snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
+ name = (struct qstr) QSTR(name_buf);
- return write_inode(trans, &inode_u, U32_MAX);
-}
+ ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
+ bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash,
+ mode_to_type(inode->bi_mode),
+ &name, inode->bi_inum, &dir_offset,
+ BCH_HASH_SET_MUST_CREATE));
+ if (ret) {
+ bch_err(trans->c, "error %i reattaching inode %llu",
+ ret, inode->bi_inum);
+ return ret;
+ }
-static int reattach_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *lostfound,
- u64 inum)
-{
- int ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
- __reattach_inode(trans, lostfound, inum));
- if (ret)
- bch_err(trans->c, "error %i reattaching inode %llu", ret, inum);
+ inode->bi_dir = lostfound.bi_inum;
+ inode->bi_dir_offset = dir_offset;
- return ret;
+ return write_inode(trans, inode, U32_MAX);
}
static int remove_backpointer(struct btree_trans *trans,
@@ -931,58 +990,6 @@ create_root:
BTREE_INSERT_LAZY_RW);
}
-/* Get lost+found, create if it doesn't exist: */
-static int check_lostfound(struct bch_fs *c,
- struct bch_inode_unpacked *root_inode,
- struct bch_inode_unpacked *lostfound_inode)
-{
- struct qstr lostfound = QSTR("lost+found");
- struct bch_hash_info root_hash_info =
- bch2_hash_info_init(c, root_inode);
- u64 inum;
- u32 snapshot;
- int ret;
-
- bch_verbose(c, "checking lost+found");
-
- inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
- &lostfound);
- if (!inum) {
- bch_notice(c, "creating lost+found");
- goto create_lostfound;
- }
-
- ret = bch2_trans_do(c, NULL, NULL, 0,
- lookup_inode(&trans, inum, lostfound_inode, &snapshot));
- if (ret && ret != -ENOENT)
- return ret;
-
- if (fsck_err_on(ret, c, "lost+found missing"))
- goto create_lostfound;
-
- if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
- "lost+found inode not a directory"))
- goto create_lostfound;
-
- return 0;
-fsck_err:
- return ret;
-create_lostfound:
- bch2_inode_init_early(c, lostfound_inode);
-
- ret = bch2_trans_do(c, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW,
- bch2_create_trans(&trans,
- BCACHEFS_ROOT_INO, root_inode,
- lostfound_inode, &lostfound,
- 0, 0, S_IFDIR|0700, 0, NULL, NULL));
- if (ret)
- bch_err(c, "error creating lost+found: %i", ret);
-
- return ret;
-}
-
struct pathbuf {
size_t nr;
size_t size;
@@ -1014,7 +1021,6 @@ static int path_down(struct pathbuf *p, u64 inum)
}
static int check_path(struct btree_trans *trans,
- struct bch_inode_unpacked *lostfound,
struct pathbuf *p,
struct bch_inode_unpacked *inode)
{
@@ -1038,7 +1044,7 @@ static int check_path(struct btree_trans *trans,
inode->bi_nlink,
inode->bi_dir,
inode->bi_dir_offset))
- ret = reattach_inode(trans, lostfound, inode->bi_inum);
+ ret = reattach_inode(trans, inode);
break;
}
ret = 0;
@@ -1067,12 +1073,11 @@ static int check_path(struct btree_trans *trans,
break;
}
- ret = reattach_inode(trans, lostfound, inode->bi_inum);
+ ret = reattach_inode(trans, inode);
break;
}
- ret = lockrestart_do(trans,
- lookup_inode(trans, inode->bi_dir, inode, &snapshot));
+ ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
if (ret) {
/* Should have been caught in dirents pass */
bch_err(c, "error looking up parent directory: %i", ret);
@@ -1090,8 +1095,7 @@ fsck_err:
* After check_dirents(), if an inode backpointer doesn't exist that means it's
* unreachable:
*/
-static int check_directory_structure(struct bch_fs *c,
- struct bch_inode_unpacked *lostfound)
+static int check_directory_structure(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter *iter;
@@ -1113,7 +1117,7 @@ static int check_directory_structure(struct bch_fs *c,
break;
}
- ret = check_path(&trans, lostfound, &path, &u);
+ ret = check_path(&trans, &path, &u);
if (ret)
break;
}
@@ -1190,7 +1194,6 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
}
static int check_inode_nlink(struct btree_trans *trans,
- struct bch_inode_unpacked *lostfound_inode,
struct btree_iter *iter,
struct bkey_s_c_inode inode,
unsigned nlink)
@@ -1238,7 +1241,6 @@ fsck_err:
noinline_for_stack
static int bch2_gc_walk_inodes(struct bch_fs *c,
- struct bch_inode_unpacked *lostfound_inode,
nlink_table *links,
u64 range_start, u64 range_end)
{
@@ -1259,7 +1261,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
continue;
link = genradix_ptr(links, k.k->p.offset - range_start);
- ret = check_inode_nlink(&trans, lostfound_inode, iter,
+ ret = check_inode_nlink(&trans, iter,
bkey_s_c_to_inode(k), link ? link->count : 0);
if (ret)
break;
@@ -1275,8 +1277,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
}
noinline_for_stack
-static int check_nlinks(struct bch_fs *c,
- struct bch_inode_unpacked *lostfound_inode)
+static int check_nlinks(struct bch_fs *c)
{
nlink_table links;
u64 this_iter_range_start, next_iter_range_start = 0;
@@ -1296,7 +1297,7 @@ static int check_nlinks(struct bch_fs *c,
if (ret)
break;
- ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
+ ret = bch2_gc_walk_inodes(c, &links,
this_iter_range_start,
next_iter_range_start);
if (ret)
@@ -1316,16 +1317,15 @@ static int check_nlinks(struct bch_fs *c,
*/
int bch2_fsck_full(struct bch_fs *c)
{
- struct bch_inode_unpacked root_inode, lostfound_inode;
+ struct bch_inode_unpacked root_inode;
return check_inodes(c, true) ?:
check_extents(c) ?:
check_dirents(c) ?:
check_xattrs(c) ?:
check_root(c, &root_inode) ?:
- check_lostfound(c, &root_inode, &lostfound_inode) ?:
- check_directory_structure(c, &lostfound_inode) ?:
- check_nlinks(c, &lostfound_inode);
+ check_directory_structure(c) ?:
+ check_nlinks(c);
}
int bch2_fsck_walk_inodes_only(struct bch_fs *c)
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index f117d361..24d04e51 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -634,7 +634,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
msecs_to_jiffies(j->reclaim_delay_ms)))
min_nr = 1;
- if (j->prereserved.reserved * 2 > j->prereserved.remaining)
+ if (j->prereserved.reserved * 4 > j->prereserved.remaining)
min_nr = 1;
if (fifo_free(&j->pin) <= 32)
diff --git a/libbcachefs/keylist.c b/libbcachefs/keylist.c
index 864dfaa6..cda77835 100644
--- a/libbcachefs/keylist.c
+++ b/libbcachefs/keylist.c
@@ -62,6 +62,6 @@ void bch2_verify_keylist_sorted(struct keylist *l)
for_each_keylist_key(l, k)
BUG_ON(bkey_next(k) != l->top &&
- bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
+ bpos_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
}
#endif
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index aa8e8c25..778ff72c 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -762,7 +762,7 @@ static int bch2_move_btree(struct bch_fs *c,
id == start_btree_id ? start_pos : POS_MIN,
BTREE_ITER_PREFETCH, b) {
if (kthread && kthread_should_stop())
- goto out;
+ break;
if ((cmp_int(id, end_btree_id) ?:
bkey_cmp(b->key.k.p, end_pos)) > 0)
@@ -789,8 +789,10 @@ next:
}
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
+ if (kthread && kthread_should_stop())
+ break;
}
-out:
+
bch2_trans_exit(&trans);
if (ret)
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index 80772cff..4ac7e61f 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -87,9 +87,20 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
if (i >= 0 &&
p.ptr.offset < h->data[i].offset + ca->mi.bucket_size &&
p.ptr.gen == h->data[i].gen) {
+ /*
+ * We need to use the journal reserve here, because
+ * - journal reclaim depends on btree key cache
+ * flushing to make forward progress,
+ * - which has to make forward progress when the
+ * journal is pre-reservation full,
+ * - and depends on allocation - meaning allocator and
+ * copygc
+ */
+
data_opts->target = io_opts->background_target;
data_opts->nr_replicas = 1;
- data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE;
+ data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_JOURNAL_RESERVED;
data_opts->rewrite_dev = p.ptr.dev;
if (p.has_ec)
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index 4128a1b3..8e6cccd3 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -1063,11 +1063,27 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
return ret;
}
+void bch2_fs_replicas_exit(struct bch_fs *c)
+{
+ unsigned i;
+
+ kfree(c->usage_scratch);
+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+ free_percpu(c->usage[i]);
+ kfree(c->usage_base);
+ kfree(c->replicas.entries);
+ kfree(c->replicas_gc.entries);
+
+ mempool_exit(&c->replicas_delta_pool);
+}
+
int bch2_fs_replicas_init(struct bch_fs *c)
{
bch2_journal_entry_res_resize(&c->journal,
&c->replicas_journal_res,
reserve_journal_replicas(c, &c->replicas));
- return replicas_table_update(c, &c->replicas);
+ return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1,
+ REPLICAS_DELTA_LIST_MAX) ?:
+ replicas_table_update(c, &c->replicas);
}
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
index c77e873e..72ac544f 100644
--- a/libbcachefs/replicas.h
+++ b/libbcachefs/replicas.h
@@ -102,6 +102,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
+void bch2_fs_replicas_exit(struct bch_fs *);
int bch2_fs_replicas_init(struct bch_fs *);
#endif /* _BCACHEFS_REPLICAS_H */
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 61fd1144..b6e449a7 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -477,6 +477,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_btree_iter_exit(c);
bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
bch2_fs_btree_cache_exit(c);
+ bch2_fs_replicas_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
bch2_io_clock_exit(&c->io_clock[READ]);
@@ -484,10 +485,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_journal_keys_free(&c->journal_keys);
bch2_journal_entries_free(&c->journal_entries);
percpu_free_rwsem(&c->mark_lock);
- kfree(c->usage_scratch);
- for (i = 0; i < ARRAY_SIZE(c->usage); i++)
- free_percpu(c->usage[i]);
- kfree(c->usage_base);
if (c->btree_iters_bufs)
for_each_possible_cpu(cpu)
@@ -501,8 +498,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bioset_exit(&c->btree_bio);
mempool_exit(&c->fill_iter);
percpu_ref_exit(&c->writes);
- kfree(c->replicas.entries);
- kfree(c->replicas_gc.entries);
kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
kfree(c->unused_inode_hints);