summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/backpointers.c3
-rw-r--r--libbcachefs/btree_cache.c26
-rw-r--r--libbcachefs/btree_cache.h1
-rw-r--r--libbcachefs/btree_io.c20
-rw-r--r--libbcachefs/btree_journal_iter.c17
-rw-r--r--libbcachefs/btree_node_scan.c84
-rw-r--r--libbcachefs/btree_trans_commit.c2
-rw-r--r--libbcachefs/btree_update.c27
-rw-r--r--libbcachefs/btree_update.h21
-rw-r--r--libbcachefs/data_update.c13
-rw-r--r--libbcachefs/debug.c11
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/error.c2
-rw-r--r--libbcachefs/extents.c16
-rw-r--r--libbcachefs/fs-io-buffered.c2
-rw-r--r--libbcachefs/fsck.c3
-rw-r--r--libbcachefs/io_read.h2
-rw-r--r--libbcachefs/io_write.c1
-rw-r--r--libbcachefs/journal.c14
-rw-r--r--libbcachefs/journal_io.c2
-rw-r--r--libbcachefs/opts.h11
-rw-r--r--libbcachefs/recovery.c23
-rw-r--r--libbcachefs/sb-members.c1
-rw-r--r--libbcachefs/super.c10
-rw-r--r--libbcachefs/trace.h5
25 files changed, 162 insertions, 156 deletions
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index 77d93beb..bc277f42 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -144,7 +144,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
if (!will_check && __bch2_inconsistent_error(c, &buf))
ret = bch_err_throw(c, erofs_unfixed_errors);
- bch_err(c, "%s", buf.buf);
+ if (buf.buf)
+ bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
return ret;
}
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index a3631a90..49505653 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -86,7 +86,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
six_unlock_intent(&b->c.lock);
}
-static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
+void __btree_node_data_free(struct btree *b)
{
BUG_ON(!list_empty(&b->list));
BUG_ON(btree_node_hashed(b));
@@ -113,16 +113,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
munmap(b->aux_data, btree_aux_data_bytes(b));
#endif
b->aux_data = NULL;
-
- btree_node_to_freedlist(bc, b);
}
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
{
BUG_ON(list_empty(&b->list));
list_del_init(&b->list);
+
+ __btree_node_data_free(b);
+
--bc->nr_freeable;
- __btree_node_data_free(bc, b);
+ btree_node_to_freedlist(bc, b);
}
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
@@ -186,10 +187,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
{
- struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
-
- b = __btree_node_mem_alloc(c, GFP_KERNEL);
+ struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL);
if (!b)
return NULL;
@@ -199,8 +197,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
}
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
-
- __bch2_btree_node_to_freelist(bc, b);
return b;
}
@@ -526,7 +522,8 @@ restart:
--touched;;
} else if (!btree_node_reclaim(c, b)) {
__bch2_btree_node_hash_remove(bc, b);
- __btree_node_data_free(bc, b);
+ __btree_node_data_free(b);
+ btree_node_to_freedlist(bc, b);
freed++;
bc->nr_freed++;
@@ -667,9 +664,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bch2_recalc_btree_reserve(c);
- for (i = 0; i < bc->nr_reserve; i++)
- if (!__bch2_btree_node_mem_alloc(c))
+ for (i = 0; i < bc->nr_reserve; i++) {
+ struct btree *b = __bch2_btree_node_mem_alloc(c);
+ if (!b)
goto err;
+ __bch2_btree_node_to_freelist(bc, b);
+ }
list_splice_init(&bc->live[0].list, &bc->freeable);
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index 3264801c..649e9dfd 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
+void __btree_node_data_free(struct btree *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 84e302af..8924dae1 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -24,8 +24,15 @@
#include "super-io.h"
#include "trace.h"
+#include <linux/moduleparam.h>
#include <linux/sched/mm.h>
+#ifdef CONFIG_BCACHEFS_DEBUG
+static unsigned bch2_btree_read_corrupt_ratio;
+module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644);
+MODULE_PARM_DESC(btree_read_corrupt_ratio, "");
+#endif
+
static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn)
{
bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn));
@@ -568,9 +575,9 @@ static int __btree_err(int ret,
bch2_mark_btree_validate_failure(failed, ca->dev_idx);
struct extent_ptr_decoded pick;
- have_retry = !bch2_bkey_pick_read_device(c,
+ have_retry = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
- failed, &pick, -1);
+ failed, &pick, -1) == 1;
}
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
@@ -615,7 +622,6 @@ static int __btree_err(int ret,
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
- ret = __bch2_topology_error(c, &out);
break;
}
@@ -644,7 +650,6 @@ static int __btree_err(int ret,
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
- ret = __bch2_topology_error(c, &out);
break;
}
print:
@@ -1408,7 +1413,7 @@ static void btree_node_read_work(struct work_struct *work)
ret = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
&failed, &rb->pick, -1);
- if (ret) {
+ if (ret <= 0) {
set_btree_node_read_error(b);
break;
}
@@ -1439,6 +1444,11 @@ start:
continue;
}
+ memset(&bio->bi_iter, 0, sizeof(bio->bi_iter));
+ bio->bi_iter.bi_size = btree_buf_bytes(b);
+
+ bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio);
+
ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
ret == -BCH_ERR_btree_node_read_err_must_retry)
diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c
index 341d31b3..ea839560 100644
--- a/libbcachefs/btree_journal_iter.c
+++ b/libbcachefs/btree_journal_iter.c
@@ -717,18 +717,6 @@ static void __journal_keys_sort(struct journal_keys *keys)
keys->nr = dst - keys->data;
}
-static bool should_rewind_entry(struct bch_fs *c, struct jset_entry *entry)
-{
- if (entry->level)
- return false;
- if (btree_id_is_alloc(entry->btree_id))
- return false;
- if (c->opts.journal_rewind_no_extents &&
- entry->btree_id == BTREE_ID_extents)
- return false;
- return true;
-}
-
int bch2_journal_keys_sort(struct bch_fs *c)
{
struct genradix_iter iter;
@@ -747,8 +735,9 @@ int bch2_journal_keys_sort(struct bch_fs *c)
cond_resched();
vstruct_for_each(&i->j, entry) {
- bool rewind = le64_to_cpu(i->j.seq) >= rewind_seq &&
- should_rewind_entry(c, entry);
+ bool rewind = !entry->level &&
+ !btree_id_is_alloc(entry->btree_id) &&
+ le64_to_cpu(i->j.seq) >= rewind_seq;
if (entry->type != (rewind
? BCH_JSET_ENTRY_overwrite
diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c
index 23d8c62e..42c9eb2c 100644
--- a/libbcachefs/btree_node_scan.c
+++ b/libbcachefs/btree_node_scan.c
@@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k)
}
}
-static bool found_btree_node_is_readable(struct btree_trans *trans,
- struct found_btree_node *f)
-{
- struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
-
- found_btree_node_to_key(&tmp.k, f);
-
- struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
- bool ret = !IS_ERR_OR_NULL(b);
- if (!ret)
- return ret;
-
- f->sectors_written = b->written;
- f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);
-
- struct bkey_s_c k;
- struct bkey unpacked;
- struct btree_node_iter iter;
- for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
- f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));
-
- six_unlock_read(&b->c.lock);
-
- /*
- * We might update this node's range; if that happens, we need the node
- * to be re-read so the read path can trim keys that are no longer in
- * this node
- */
- if (b != btree_node_root(trans->c, b))
- bch2_btree_node_evict(trans, &tmp.k);
- return ret;
-}
-
static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
{
const struct found_btree_node *l = _l;
@@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = {
};
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
- struct bio *bio, struct btree_node *bn, u64 offset)
+ struct btree *b, struct bio *bio, u64 offset)
{
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+ struct btree_node *bn = b->data;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = offset;
- bch2_bio_map(bio, bn, PAGE_SIZE);
+ bch2_bio_map(bio, b->data, c->opts.block_size);
u64 submit_time = local_clock();
submit_bio_wait(bio);
-
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
if (bio->bi_status) {
@@ -201,6 +168,14 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX)
return;
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, b->data, c->opts.btree_node_size);
+
+ submit_time = local_clock();
+ submit_bio_wait(bio);
+ bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
+
rcu_read_lock();
struct found_btree_node n = {
.btree_id = BTREE_NODE_ID(bn),
@@ -217,7 +192,20 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
};
rcu_read_unlock();
- if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+ found_btree_node_to_key(&b->key, &n);
+
+ CLASS(printbuf, buf)();
+ if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) {
+ /* read_done will swap out b->data for another buffer */
+ bn = b->data;
+ /*
+ * Grab journal_seq here because we want the max journal_seq of
+ * any bset; read_done sorts down to a single set and picks the
+ * max journal_seq
+ */
+ n.journal_seq = le64_to_cpu(bn->keys.journal_seq),
+ n.sectors_written = b->written;
+
mutex_lock(&f->lock);
if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
bch_err(c, "try_read_btree_node() can't handle endian conversion");
@@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p)
struct find_btree_nodes_worker *w = p;
struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
struct bch_dev *ca = w->ca;
- void *buf = (void *) __get_free_page(GFP_KERNEL);
- struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
unsigned long last_print = jiffies;
+ struct btree *b = NULL;
+ struct bio *bio = NULL;
+
+ b = __bch2_btree_node_mem_alloc(c);
+ if (!b) {
+ bch_err(c, "read_btree_nodes_worker: error allocating buf");
+ w->f->ret = -ENOMEM;
+ goto err;
+ }
- if (!buf || !bio) {
- bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+ bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
+ if (!bio) {
+ bch_err(c, "read_btree_nodes_worker: error allocating bio");
w->f->ret = -ENOMEM;
goto err;
}
@@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p)
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
continue;
- try_read_btree_node(w->f, ca, bio, buf, sector);
+ try_read_btree_node(w->f, ca, b, bio, sector);
}
err:
+ if (b)
+ __btree_node_data_free(b);
+ kfree(b);
bio_put(bio);
- free_page((unsigned long) buf);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
closure_put(w->cl);
kfree(w);
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 7fcf248a..a7e9d891 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -1008,7 +1008,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
return 0;
}
-int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
+int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags flags)
{
struct btree_insert_entry *errored_at = NULL;
struct bch_fs *c = trans->c;
diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c
index 5d9e0237..7983c494 100644
--- a/libbcachefs/btree_update.c
+++ b/libbcachefs/btree_update.c
@@ -661,21 +661,22 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
* @k: key to insert
* @disk_res: must be non-NULL whenever inserting or potentially
* splitting data extents
- * @flags: transaction commit flags
+ * @commit_flags: transaction commit flags
* @iter_flags: btree iter update trigger flags
*
* Returns: 0 on success, error code on failure
*/
int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
- struct disk_reservation *disk_res, int flags,
+ struct disk_reservation *disk_res,
+ enum bch_trans_commit_flags commit_flags,
enum btree_iter_update_trigger_flags iter_flags)
{
- return bch2_trans_commit_do(c, disk_res, NULL, flags,
+ return bch2_trans_commit_do(c, disk_res, NULL, commit_flags,
bch2_btree_insert_trans(trans, id, k, iter_flags));
}
-int bch2_btree_delete_at(struct btree_trans *trans,
- struct btree_iter *iter, unsigned update_flags)
+int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter,
+ enum btree_iter_update_trigger_flags flags)
{
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
int ret = PTR_ERR_OR_ZERO(k);
@@ -684,12 +685,12 @@ int bch2_btree_delete_at(struct btree_trans *trans,
bkey_init(&k->k);
k->k.p = iter->pos;
- return bch2_trans_update(trans, iter, k, update_flags);
+ return bch2_trans_update(trans, iter, k, flags);
}
int bch2_btree_delete(struct btree_trans *trans,
enum btree_id btree, struct bpos pos,
- unsigned update_flags)
+ enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter;
int ret;
@@ -698,7 +699,7 @@ int bch2_btree_delete(struct btree_trans *trans,
BTREE_ITER_cached|
BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(trans, &iter) ?:
- bch2_btree_delete_at(trans, &iter, update_flags);
+ bch2_btree_delete_at(trans, &iter, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -706,7 +707,7 @@ int bch2_btree_delete(struct btree_trans *trans,
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
- unsigned update_flags,
+ enum btree_iter_update_trigger_flags flags,
u64 *journal_seq)
{
u32 restart_count = trans->restart_count;
@@ -714,7 +715,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bkey_s_c k;
int ret = 0;
- bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent);
+ bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent|flags);
while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(trans->c, 0);
@@ -747,7 +748,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
bpos_min(end, k.k->p).offset -
iter.pos.offset);
- ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
+ ret = bch2_trans_update(trans, &iter, &delete, flags) ?:
bch2_trans_commit(trans, &disk_res, journal_seq,
BCH_TRANS_COMMIT_no_enospc);
bch2_disk_reservation_put(trans->c, &disk_res);
@@ -777,12 +778,12 @@ err:
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
- unsigned update_flags,
+ enum btree_iter_update_trigger_flags flags,
u64 *journal_seq)
{
int ret = bch2_trans_run(c,
bch2_btree_delete_range_trans(trans, id, start, end,
- update_flags, journal_seq));
+ flags, journal_seq));
if (ret == -BCH_ERR_transaction_restart_nested)
ret = 0;
return ret;
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index 2c6f9b44..222a9f8f 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -47,22 +47,27 @@ enum bch_trans_commit_flags {
void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags);
-int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *,
+ enum btree_iter_update_trigger_flags);
+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos,
+ enum btree_iter_update_trigger_flags);
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
struct bkey_i *, enum btree_iter_update_trigger_flags);
int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
enum btree_iter_update_trigger_flags);
-int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct
- disk_reservation *, int flags, enum
- btree_iter_update_trigger_flags iter_flags);
+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
+ struct disk_reservation *,
+ enum bch_trans_commit_flags,
+ enum btree_iter_update_trigger_flags);
int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
- struct bpos, struct bpos, unsigned, u64 *);
+ struct bpos, struct bpos,
+ enum btree_iter_update_trigger_flags, u64 *);
int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
- struct bpos, struct bpos, unsigned, u64 *);
+ struct bpos, struct bpos,
+ enum btree_iter_update_trigger_flags, u64 *);
int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool);
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
@@ -226,7 +231,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
-int __bch2_trans_commit(struct btree_trans *, unsigned);
+int __bch2_trans_commit(struct btree_trans *, enum bch_trans_commit_flags);
int bch2_trans_log_str(struct btree_trans *, const char *);
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 3968f3be..e848e210 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -783,9 +783,6 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
darray_for_each(m->op.devs_have, i)
__clear_bit(*i, devs.d);
- CLASS(printbuf, buf)();
- buf.atomic++;
-
guard(rcu)();
unsigned nr_replicas = 0, i;
@@ -797,11 +794,7 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
struct bch_dev_usage usage;
bch2_dev_usage_read_fast(ca, &usage);
- u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark);
-
- prt_printf(&buf, "%s=%llu ", ca->name, nr_free);
-
- if (!nr_free)
+ if (!dev_buckets_free(ca, usage, m->op.watermark))
continue;
nr_replicas += ca->mi.durability;
@@ -809,10 +802,8 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
break;
}
- if (!nr_replicas) {
- trace_data_update_done_no_rw_devs(c, buf.buf);
+ if (!nr_replicas)
return bch_err_throw(c, data_update_done_no_rw_devs);
- }
if (nr_replicas < m->op.nr_replicas)
return bch_err_throw(c, insufficient_devices);
return 0;
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 901f643e..07c2a0f7 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
c->verify_data = __bch2_btree_node_mem_alloc(c);
if (!c->verify_data)
goto out;
-
- list_del_init(&c->verify_data->list);
}
BUG_ON(b->nsets != 1);
@@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
i->ubuf = buf;
i->size = size;
i->ret = 0;
+
+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
restart:
seqmutex_lock(&c->btree_trans_lock);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
@@ -599,6 +599,11 @@ restart:
if (!closure_get_not_zero(&trans->ref))
continue;
+ if (!trans->srcu_held) {
+ closure_put(&trans->ref);
+ continue;
+ }
+
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
@@ -620,6 +625,8 @@ restart:
}
seqmutex_unlock(&c->btree_trans_lock);
unlocked:
+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+
if (i->buf.allocation_failure)
ret = -ENOMEM;
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index d27b94a6..2de0dc91 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -289,7 +289,6 @@
x(EIO, sb_not_downgraded) \
x(EIO, btree_node_write_all_failed) \
x(EIO, btree_node_read_error) \
- x(EIO, btree_node_read_validate_error) \
x(EIO, btree_need_topology_repair) \
x(EIO, bucket_ref_update) \
x(EIO, trigger_alloc) \
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index a9a9fe19..71649b41 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
return bch_err_throw(c, btree_need_topology_repair);
} else {
return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
- bch_err_throw(c, btree_node_read_validate_error);
+ bch_err_throw(c, btree_need_topology_repair);
}
}
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 036e4ad9..83cbd77d 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -50,19 +50,17 @@ void bch2_io_failures_to_text(struct printbuf *out,
struct bch_io_failures *failed)
{
static const char * const error_types[] = {
- "io", "checksum", "ec reconstruct", NULL
+ "btree validate", "io", "checksum", "ec reconstruct", NULL
};
for (struct bch_dev_io_failures *f = failed->devs;
f < failed->devs + failed->nr;
f++) {
unsigned errflags =
- ((!!f->failed_io) << 0) |
- ((!!f->failed_csum_nr) << 1) |
- ((!!f->failed_ec) << 2);
-
- if (!errflags)
- continue;
+ ((!!f->failed_btree_validate) << 0) |
+ ((!!f->failed_io) << 1) |
+ ((!!f->failed_csum_nr) << 2) |
+ ((!!f->failed_ec) << 3);
bch2_printbuf_make_room(out, 1024);
out->atomic++;
@@ -77,7 +75,9 @@ void bch2_io_failures_to_text(struct printbuf *out,
prt_char(out, ' ');
- if (is_power_of_2(errflags)) {
+ if (!errflags) {
+ prt_str(out, "no error - confused");
+ } else if (is_power_of_2(errflags)) {
prt_bitflags(out, error_types, errflags);
prt_str(out, " error");
} else {
diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c
index dad48d44..4e82dfa6 100644
--- a/libbcachefs/fs-io-buffered.c
+++ b/libbcachefs/fs-io-buffered.c
@@ -257,7 +257,7 @@ err:
struct printbuf buf = PRINTBUF;
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9));
- prt_printf(&buf, "read error %i from btree lookup", ret);
+ prt_printf(&buf, "read error %s from btree lookup", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 1ceca63c..471e93a3 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -1638,7 +1638,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
i->count = count2;
}
- if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) &&
+ i->inode.bi_sectors != i->count,
trans, inode_i_sectors_wrong,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
w->last_pos.inode, i->inode.bi_snapshot,
diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h
index 9c5ddbf8..cfc8ef35 100644
--- a/libbcachefs/io_read.h
+++ b/libbcachefs/io_read.h
@@ -147,7 +147,7 @@ static inline void bch2_read_extent(struct btree_trans *trans,
int ret = __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
data_btree, k, offset_into_extent, NULL, flags, -1);
/* __bch2_read_extent only returns errors if BCH_READ_in_retry is set */
- WARN(ret, "unhandled error from __bch2_read_extent()");
+ WARN(ret, "unhandled error from __bch2_read_extent(): %s", bch2_err_str(ret));
}
int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c
index 88b1eec8..fa077341 100644
--- a/libbcachefs/io_write.c
+++ b/libbcachefs/io_write.c
@@ -32,6 +32,7 @@
#include "trace.h"
#include <linux/blkdev.h>
+#include <linux/moduleparam.h>
#include <linux/prefetch.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index ce534061..f22b05e0 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1376,6 +1376,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
return bch_err_throw(c, erofs_filesystem_full);
}
+ unsigned nr;
int ret;
if (dynamic_fault("bcachefs:add:journal_alloc")) {
@@ -1384,19 +1385,16 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
}
/* 1/128th of the device by default: */
- unsigned nr = ca->mi.nbuckets >> 7;
+ nr = ca->mi.nbuckets >> 7;
/*
- * clamp journal size to 8GB, or 32GB with large_journal option:
+ * clamp journal size to 8192 buckets or 8GB (in sectors), whichever
+ * is smaller:
*/
- unsigned max_sectors = 1 << 24;
-
- if (c->opts.large_journal)
- max_sectors *= 4;
-
nr = clamp_t(unsigned, nr,
BCH_JOURNAL_BUCKETS_MIN,
- max_sectors / ca->mi.bucket_size);
+ min(1 << 13,
+ (1 << 24) / ca->mi.bucket_size));
ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs);
err:
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 3f06c4b2..2d6ce434 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -1245,6 +1245,8 @@ noinline_for_stack
static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j)
{
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j);
bool have_good = false;
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 4a7a6058..63f8e254 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -343,12 +343,6 @@ enum fsck_err_opts {
OPT_UINT(0, U32_MAX), \
BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \
NULL, "Delay in milliseconds before automatic journal reclaim")\
- x(large_journal, bool, \
- OPT_FS|OPT_MOUNT|OPT_FORMAT, \
- OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
- NULL, "Allocate a bigger than normal journal: recovery from unclean "\
- "shutdown will be slower, but more info will be available for debugging")\
x(move_bytes_in_flight, u32, \
OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(1024, U32_MAX), \
@@ -395,11 +389,6 @@ enum fsck_err_opts {
OPT_UINT(0, U64_MAX), \
BCH2_NO_SB_OPT, 0, \
NULL, "Rewind journal") \
- x(journal_rewind_no_extents, bool, \
- OPT_FS|OPT_MOUNT, \
- OPT_BOOL(), \
- BCH2_NO_SB_OPT, 0, \
- NULL, "Don't rewind extents when rewinding journal") \
x(recovery_passes, u64, \
OPT_FS|OPT_MOUNT, \
OPT_BITFIELD(bch2_recovery_passes), \
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 974f8bf9..0def4ecb 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -273,24 +273,35 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
goto out;
struct btree_path *path = btree_iter_path(trans, &iter);
- if (unlikely(!btree_path_node(path, k->level) &&
- !k->allocated)) {
+ if (unlikely(!btree_path_node(path, k->level))) {
struct bch_fs *c = trans->c;
+ CLASS(printbuf, buf)();
+ prt_str(&buf, "btree=");
+ bch2_btree_id_to_text(&buf, k->btree_id);
+ prt_printf(&buf, " level=%u ", k->level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k));
+
if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)|
BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) {
- bch_err(c, "have key in journal replay for btree depth that does not exist, confused");
+ bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s",
+ buf.buf);
ret = -EINVAL;
}
-#if 0
+
+ if (!k->allocated) {
+ bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s",
+ buf.buf);
+ k->overwritten = true;
+ goto out;
+ }
+
bch2_trans_iter_exit(trans, &iter);
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, 0, iter_flags);
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_increase_depth(trans, iter.path, 0) ?:
-BCH_ERR_transaction_restart_nested;
-#endif
- k->overwritten = true;
goto out;
}
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index f2abe92c..340d4fb7 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -20,6 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
prt_printf(&buf, "pointer to %s device %u in key\n",
removed ? "removed" : "nonexistent", dev);
bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
bool print = removed
? bch2_count_fsck_err(c, ptr_to_removed_device, &buf)
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 6980cd5b..a3438b0d 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -1974,11 +1974,15 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ca->disk_sb.sb->dev_idx = dev_idx;
bch2_dev_attach(c, ca, dev_idx);
+ set_bit(ca->dev_idx, c->online_devs.d);
+
if (BCH_MEMBER_GROUP(&dev_mi)) {
ret = __bch2_dev_group_set(c, ca, label.buf);
bch_err_msg(c, ret, "creating new label");
- if (ret)
- goto err_unlock;
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ goto err_late;
+ }
}
bch2_write_super(c);
@@ -2526,6 +2530,8 @@ static int bch2_param_get_static_key_t(char *buffer, const struct kernel_param *
return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y');
}
+/* this is unused in userspace - silence the warning */
+__maybe_unused
static const struct kernel_param_ops bch2_param_ops_static_key_t = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = bch2_param_set_static_key_t,
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index 9324ef32..b5dae114 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -1330,11 +1330,6 @@ DEFINE_EVENT(fs_str, data_update,
TP_ARGS(c, str)
);
-DEFINE_EVENT(fs_str, data_update_done_no_rw_devs,
- TP_PROTO(struct bch_fs *c, const char *str),
- TP_ARGS(c, str)
-);
-
DEFINE_EVENT(fs_str, io_move_pred,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)