diff options
Diffstat (limited to 'libbcachefs')
32 files changed, 547 insertions, 318 deletions
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index c43aaab4..cb25cddb 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -532,10 +532,6 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; CLASS(printbuf, buf)(); - if (bpos_lt(bp->k.p, s->bp_start) || - bpos_gt(bp->k.p, s->bp_end)) - return 0; - CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter); int ret = bkey_err(bp_k); @@ -690,6 +686,10 @@ static int check_extent_to_backpointers(struct btree_trans *trans, struct bkey_i_backpointer bp; bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); + if (bpos_lt(bp.k.p, s->bp_start) || + bpos_gt(bp.k.p, s->bp_end)) + continue; + int ret = !empty ? check_bp_exists(trans, s, &bp, k) : bch2_bucket_backpointer_mod(trans, k, &bp, true); @@ -809,8 +809,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, for (enum btree_id btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - /* btree_type_has_ptrs should probably include BTREE_ID_stripes, - * definitely her... */ int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, @@ -899,7 +897,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && + if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointer_bucket_gen && (bp.v->bucket_gen != a->gen || bp.v->pad)) { ret = bch2_backpointer_del(trans, bp_k.k->p); @@ -931,6 +929,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (sectors[ALLOC_dirty] != a->dirty_sectors || sectors[ALLOC_cached] != a->cached_sectors || sectors[ALLOC_stripe] != a->stripe_sectors) { + /* + * Post 1.14 upgrade, we assume that backpointers are mostly + * correct and a sector count mismatch is probably due to a + * write buffer race + * + * Pre upgrade, we expect all the buckets to be wrong, a write + * buffer flush is pointless: + */ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); if (ret) @@ -978,12 +984,22 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) goto next; struct bpos bucket = bp_pos_to_bucket(ca, pos); - u64 next = ca->mi.nbuckets; + u64 next = min(bucket.offset, ca->mi.nbuckets); - unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); - if (bitmap) - next = min_t(u64, next, - find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset)); + unsigned long *mismatch = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); + unsigned long *empty = READ_ONCE(ca->bucket_backpointer_empty.buckets); + /* + * Find the first bucket with mismatches - but + * not empty buckets; we don't need to pin those + * because we just recreate all backpointers in + * those buckets + */ + if (mismatch && empty) + next = find_next_andnot_bit(mismatch, empty, ca->mi.nbuckets, next); + else if (mismatch) + next = find_next_bit(mismatch, ca->mi.nbuckets, next); + else + next = ca->mi.nbuckets; bucket.offset = next; if (bucket.offset == ca->mi.nbuckets) @@ -1110,17 +1126,18 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (ret) goto err; - u64 nr_buckets = 0, nr_mismatches = 0; + u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; for_each_member_device(c, ca) { nr_buckets += ca->mi.nbuckets; nr_mismatches += ca->bucket_backpointer_mismatch.nr; + nr_empty += ca->bucket_backpointer_empty.nr; } if (!nr_mismatches) goto err; - bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", - nr_mismatches, nr_buckets); + bch_info(c, "scanning for missing backpointers in %llu/%llu buckets, %llu buckets with no backpointers", + nr_mismatches - nr_empty, nr_buckets, nr_empty); while (1) { ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 52594e92..42e2cb5d 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -71,6 +71,7 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk) #define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk) #define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state) +#define BCH_IOCTL_DISK_SET_STATE_v2 _IOW(0xbc, 22, struct bch_ioctl_disk_set_state_v2) #define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data) #define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage) #define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage) @@ -93,6 +94,12 @@ struct bch_ioctl_incremental { #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) +struct bch_ioctl_err_msg { + __u64 msg_ptr; + __u32 msg_len; + __u32 pad; +}; + /* * BCH_IOCTL_QUERY_UUID: get filesystem UUID * @@ -181,6 +188,14 @@ struct bch_ioctl_disk_set_state { __u64 dev; }; +struct bch_ioctl_disk_set_state_v2 { + __u32 flags; + __u8 new_state; + __u8 pad[3]; + __u64 dev; + struct bch_ioctl_err_msg err; +}; + #define BCH_DATA_OPS() \ x(scrub, 0) \ x(rereplicate, 1) \ diff --git a/libbcachefs/bkey_buf.h b/libbcachefs/bkey_buf.h index a30c4ae8..0a1fc582 100644 --- a/libbcachefs/bkey_buf.h +++ b/libbcachefs/bkey_buf.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_BKEY_BUF_H #define _BCACHEFS_BKEY_BUF_H +#include <linux/mempool.h> + #include "bcachefs.h" #include "bkey.h" @@ -10,41 +12,49 @@ struct bkey_buf { u64 onstack[12]; }; -static inline void bch2_bkey_buf_realloc(struct bkey_buf *s, - struct bch_fs *c, unsigned u64s) +static inline int bch2_bkey_buf_realloc_noprof(struct bkey_buf *s, + struct bch_fs *c, unsigned u64s) { if (s->k == (void *) s->onstack && u64s > ARRAY_SIZE(s->onstack)) { - s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + s->k = mempool_alloc_noprof(&c->large_bkey_pool, GFP_NOFS); memcpy(s->k, s->onstack, sizeof(s->onstack)); } + + return 0; /* for alloc_hooks() macro */ } +#define bch2_bkey_buf_realloc(...) alloc_hooks(bch2_bkey_buf_realloc_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s, - struct bch_fs *c, - struct bkey_s_c k) +static inline int bch2_bkey_buf_reassemble_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_s_c k) { bch2_bkey_buf_realloc(s, c, k.k->u64s); bkey_reassemble(s->k, k); + return 0; } +#define bch2_bkey_buf_reassemble(...) alloc_hooks(bch2_bkey_buf_reassemble_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_copy(struct bkey_buf *s, - struct bch_fs *c, - struct bkey_i *src) +static inline int bch2_bkey_buf_copy_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_i *src) { bch2_bkey_buf_realloc(s, c, src->k.u64s); bkey_copy(s->k, src); + return 0; } +#define bch2_bkey_buf_copy(...) alloc_hooks(bch2_bkey_buf_copy_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_unpack(struct bkey_buf *s, - struct bch_fs *c, - struct btree *b, - struct bkey_packed *src) +static inline int bch2_bkey_buf_unpack_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct btree *b, + struct bkey_packed *src) { - bch2_bkey_buf_realloc(s, c, BKEY_U64s + - bkeyp_val_u64s(&b->format, src)); + bch2_bkey_buf_realloc(s, c, BKEY_U64s + bkeyp_val_u64s(&b->format, src)); bch2_bkey_unpack(b, s->k, src); + return 0; } +#define bch2_bkey_buf_unpack(...) alloc_hooks(bch2_bkey_buf_unpack_noprof(__VA_ARGS__)) static inline void bch2_bkey_buf_init(struct bkey_buf *s) { diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index ae7d2605..43f29428 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -356,7 +356,7 @@ again: bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); BUG_ON(bpos_gt(k.k->p, b->data->max_key)); @@ -470,7 +470,7 @@ again: bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { bch2_bkey_buf_reassemble(&cur_k, c, k); bch2_btree_and_journal_iter_advance(&iter); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index b1f58fb3..2e3dd9ba 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1014,6 +1014,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k = bkey_p_next(k); continue; drop_this_key: + ret = 0; next_good_key = k->u64s; if (!next_good_key || diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 546b559f..76f430f9 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -650,7 +650,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v; if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); @@ -848,7 +848,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p break; bch2_btree_and_journal_iter_advance(jiter); - k = bch2_btree_and_journal_iter_peek(jiter); + k = bch2_btree_and_journal_iter_peek(c, jiter); if (!k.k) break; @@ -898,7 +898,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos); - k = bch2_btree_and_journal_iter_peek(&jiter); + k = bch2_btree_and_journal_iter_peek(c, &jiter); if (!k.k) { CLASS(printbuf, buf)(); @@ -2120,10 +2120,10 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ } } -static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos search_pos, - struct bpos end_pos) +static const struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos search_pos, + struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2139,7 +2139,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); + const struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); if (k) { iter->k = k->k; @@ -2156,7 +2156,7 @@ void btree_trans_peek_journal(struct btree_trans *trans, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = + const struct bkey_i *next_journal = bch2_btree_journal_peek(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { @@ -2165,10 +2165,10 @@ void btree_trans_peek_journal(struct btree_trans *trans, } } -static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos search_key, - struct bpos end_pos) +static const struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos search_key, + struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2186,7 +2186,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = + const struct bkey_i *next_journal = bch2_btree_journal_peek_prev(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->data->min_key); diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c index 24f2fbe8..a6f344fa 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree_journal_iter.c @@ -46,21 +46,22 @@ static size_t __bch2_journal_key_search(struct journal_keys *keys, enum btree_id id, unsigned level, struct bpos pos) { + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); size_t l = 0, r = keys->nr, m; while (l < r) { m = l + ((r - l) >> 1); - if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0) + if (__journal_key_cmp(c, id, level, pos, idx_to_key(keys, m)) > 0) l = m + 1; else r = m; } BUG_ON(l < keys->nr && - __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0); + __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l)) > 0); BUG_ON(l && - __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0); + __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l - 1)) <= 0); return l; } @@ -72,10 +73,20 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); } +static inline struct journal_key_range_overwritten *__overwrite_range(struct journal_keys *keys, u32 idx) +{ + return idx ? keys->overwrites.data + idx : NULL; +} + +static inline struct journal_key_range_overwritten *overwrite_range(struct journal_keys *keys, u32 idx) +{ + return idx ? rcu_dereference(keys->overwrites.data) + idx : NULL; +} + /* Returns first non-overwritten key >= search key: */ -struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) +const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; @@ -87,7 +98,7 @@ search: *idx = __bch2_journal_key_search(keys, btree_id, level, pos); while (*idx && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { --(*idx); iters++; if (iters == 10) { @@ -96,23 +107,23 @@ search: } } - struct bkey_i *ret = NULL; + const struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { - if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) + if (__journal_key_cmp(c, btree_id, level, end_pos, k) < 0) break; if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->end; + *idx = overwrite_range(keys, k->overwritten_range)->end; else *idx += 1; continue; } - if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { - ret = k->k; + if (__journal_key_cmp(c, btree_id, level, pos, k) <= 0) { + ret = journal_key_k(c, k); break; } @@ -129,9 +140,9 @@ search: return ret; } -struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) +const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; @@ -146,7 +157,7 @@ search: *idx = __bch2_journal_key_search(keys, btree_id, level, pos); while (*idx < keys->nr && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { + __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { (*idx)++; iters++; if (iters == 10) { @@ -158,25 +169,25 @@ search: if (*idx == keys->nr) --(*idx); - struct bkey_i *ret = NULL; + const struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ while (true) { k = idx_to_key(keys, *idx); - if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) + if (__journal_key_cmp(c, btree_id, level, end_pos, k) > 0) break; if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->start; + *idx = overwrite_range(keys, k->overwritten_range)->start; if (!*idx) break; --(*idx); continue; } - if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { - ret = k->k; + if (__journal_key_cmp(c, btree_id, level, pos, k) >= 0) { + ret = journal_key_k(c, k); break; } @@ -194,8 +205,8 @@ search: return ret; } -struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos) +const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos) { size_t idx = 0; @@ -264,13 +275,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, struct journal_key n = { .btree_id = id, .level = level, - .k = k, .allocated = true, - /* - * Ensure these keys are done last by journal replay, to unblock - * journal reclaim: - */ - .journal_seq = U64_MAX, + .allocated_k = k, }; struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); @@ -278,8 +284,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, BUG_ON(test_bit(BCH_FS_rw, &c->flags)); if (idx < keys->size && - journal_key_cmp(&n, &keys->data[idx]) == 0) { - struct bkey_i *o = keys->data[idx].k; + journal_key_cmp(c, &n, &keys->data[idx]) == 0) { + struct bkey_i *o = journal_key_k(c, &keys->data[idx]); if (k->k.type == KEY_TYPE_accounting && o->k.type == KEY_TYPE_accounting) { @@ -291,7 +297,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, } if (keys->data[idx].allocated) - kfree(keys->data[idx].k); + kfree(keys->data[idx].allocated_k); keys->data[idx] = n; return 0; } @@ -376,17 +382,20 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bpos pos) { - struct journal_keys *keys = &trans->c->journal_keys; + if (!trans->journal_replay_not_finished) + return false; + + struct bch_fs *c = trans->c; + struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (!trans->journal_replay_not_finished) + if (idx >= keys->size || + keys->data[idx].btree_id != btree || + keys->data[idx].level != level) return false; - return (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - bkey_deleted(&keys->data[idx].k->k)); + struct bkey_i *k = journal_key_k(c, &keys->data[idx]); + return bpos_eq(k->k.p, pos) && bkey_deleted(&k->k); } static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) @@ -403,9 +412,9 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos bool next_overwritten = next && next->overwritten; struct journal_key_range_overwritten *prev_range = - prev_overwritten ? prev->overwritten_range : NULL; + prev_overwritten ? __overwrite_range(keys, prev->overwritten_range) : NULL; struct journal_key_range_overwritten *next_range = - next_overwritten ? next->overwritten_range : NULL; + next_overwritten ? __overwrite_range(keys, next->overwritten_range) : NULL; BUG_ON(prev_range && prev_range->end != idx); BUG_ON(next_range && next_range->start != idx + 1); @@ -413,37 +422,47 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos if (prev_range && next_range) { prev_range->end = next_range->end; - keys->data[pos].overwritten_range = prev_range; + keys->data[pos].overwritten_range = prev->overwritten_range; + + u32 old = next->overwritten_range; + for (size_t i = next_range->start; i < next_range->end; i++) { struct journal_key *ip = keys->data + idx_to_pos(keys, i); - BUG_ON(ip->overwritten_range != next_range); - ip->overwritten_range = prev_range; + BUG_ON(ip->overwritten_range != old); + ip->overwritten_range = prev->overwritten_range; } - - kfree_rcu_mightsleep(next_range); } else if (prev_range) { prev_range->end++; - k->overwritten_range = prev_range; + k->overwritten_range = prev->overwritten_range; if (next_overwritten) { prev_range->end++; - next->overwritten_range = prev_range; + next->overwritten_range = prev->overwritten_range; } } else if (next_range) { next_range->start--; - k->overwritten_range = next_range; + k->overwritten_range = next->overwritten_range; if (prev_overwritten) { next_range->start--; - prev->overwritten_range = next_range; + prev->overwritten_range = next->overwritten_range; } } else if (prev_overwritten || next_overwritten) { - struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); - if (!r) + /* 0 is a sentinel value */ + if (darray_resize_rcu(&keys->overwrites, max(keys->overwrites.nr + 1, 2))) return; - r->start = idx - (size_t) prev_overwritten; - r->end = idx + 1 + (size_t) next_overwritten; + if (!keys->overwrites.nr) + darray_push(&keys->overwrites, (struct journal_key_range_overwritten) {}); + + darray_push(&keys->overwrites, ((struct journal_key_range_overwritten) { + .start = idx - (size_t) prev_overwritten, + .end = idx + 1 + (size_t) next_overwritten, + })); + + smp_wmb(); + u32 r = keys->overwrites.nr - 1; + + k->overwritten_range = r; - rcu_assign_pointer(k->overwritten_range, r); if (prev_overwritten) prev->overwritten_range = r; if (next_overwritten) @@ -457,11 +476,15 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - !keys->data[idx].overwritten) { + if (idx >= keys->size || + keys->data[idx].btree_id != btree || + keys->data[idx].level != level || + keys->data[idx].overwritten) + return; + + struct bkey_i *k = journal_key_k(c, &keys->data[idx]); + + if (bpos_eq(k->k.p, pos)) { guard(mutex)(&keys->overwrite_lock); __bch2_journal_key_overwritten(keys, idx); } @@ -476,7 +499,7 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) } } -static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) +static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_iter *iter) { journal_iter_verify(iter); @@ -490,10 +513,10 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) BUG_ON(cmp); if (!k->overwritten) - return bkey_i_to_s_c(k->k); + return bkey_i_to_s_c(journal_key_k(c, k)); if (k->overwritten_range) - iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); + iter->idx = idx_to_pos(iter->keys, overwrite_range(iter->keys, k->overwritten_range)->end); else bch2_journal_iter_advance(iter); } @@ -554,7 +577,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter while (nr--) { bch2_btree_and_journal_iter_advance(&iter); - struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter); + struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &iter); if (!k.k) break; @@ -565,7 +588,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter bch2_bkey_buf_exit(&tmp, c); } -struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) +struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *c, struct btree_and_journal_iter *iter) { struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; size_t iters = 0; @@ -586,7 +609,7 @@ again: bch2_journal_iter_advance_btree(iter); if (iter->trans->journal_replay_not_finished) - while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && + while ((journal_k = bch2_journal_iter_peek(c, &iter->journal)).k && bpos_lt(journal_k.k->p, iter->pos)) bch2_journal_iter_advance(&iter->journal); @@ -658,15 +681,22 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, /* * When keys compare equal, oldest compares first: */ -static int journal_sort_key_cmp(const void *_l, const void *_r) +static int journal_sort_key_cmp(const void *_l, const void *_r, const void *priv) { + struct bch_fs *c = (void *) priv; const struct journal_key *l = _l; const struct journal_key *r = _r; int rewind = l->rewind && r->rewind ? -1 : 1; - return journal_key_cmp(l, r) ?: - ((cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset)) * rewind); + int cmp = journal_key_cmp(c, l, r); + if (cmp) + return cmp; + + if (l->allocated || r->allocated) + return cmp_int(l->allocated, r->allocated); + + return ((cmp_int(l->journal_seq_offset, r->journal_seq_offset) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -680,20 +710,16 @@ void bch2_journal_keys_put(struct bch_fs *c) move_gap(keys, keys->nr); - darray_for_each(*keys, i) { - if (i->overwritten_range && - (i == &darray_last(*keys) || - i->overwritten_range != i[1].overwritten_range)) - kfree(i->overwritten_range); - + darray_for_each(*keys, i) if (i->allocated) - kfree(i->k); - } + kfree(i->allocated_k); kvfree(keys->data); keys->data = NULL; keys->nr = keys->gap = keys->size = 0; + darray_exit(&keys->overwrites); + struct journal_replay **i; struct genradix_iter iter; @@ -704,8 +730,10 @@ void bch2_journal_keys_put(struct bch_fs *c) static void __journal_keys_sort(struct journal_keys *keys) { - sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), - journal_sort_key_cmp, NULL); + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + + sort_r_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), + journal_sort_key_cmp, NULL, c); cond_resched(); @@ -717,9 +745,10 @@ static void __journal_keys_sort(struct journal_keys *keys) * compare each individual accounting key against the version in * the btree during replay: */ - if (src->k->k.type != KEY_TYPE_accounting && + struct bkey_i *k = journal_key_k(c, src); + if (k->k.type != KEY_TYPE_accounting && src + 1 < &darray_top(*keys) && - !journal_key_cmp(src, src + 1)) + !journal_key_cmp(c, src, src + 1)) continue; *dst++ = *src; @@ -763,8 +792,7 @@ int bch2_journal_keys_sort(struct bch_fs *c) .btree_id = entry->btree_id, .level = entry->level, .rewind = rewind, - .k = k, - .journal_seq = le64_to_cpu(i->j.seq), + .journal_seq_offset = journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)), .journal_offset = k->_data - i->j._data, }; @@ -801,13 +829,18 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, move_gap(keys, keys->nr); - darray_for_each(*keys, i) + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (!(i->btree_id == btree && i->level >= level_min && i->level <= level_max && - bpos_ge(i->k->k.p, start) && - bpos_le(i->k->k.p, end))) + bpos_ge(k->k.p, start) && + bpos_le(k->k.p, end))) keys->data[dst++] = *i; + else if (i->allocated) + kfree(i->allocated_k); + } keys->nr = keys->gap = dst; } @@ -825,7 +858,7 @@ void bch2_journal_keys_dump(struct bch_fs *c) prt_printf(&buf, "btree="); bch2_btree_id_to_text(&buf, i->btree_id); prt_printf(&buf, " l=%u ", i->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(journal_key_k(c, i))); pr_err("%s", buf.buf); } } diff --git a/libbcachefs/btree_journal_iter.h b/libbcachefs/btree_journal_iter.h index 2a308291..8dc8e778 100644 --- a/libbcachefs/btree_journal_iter.h +++ b/libbcachefs/btree_journal_iter.h @@ -29,6 +29,22 @@ struct btree_and_journal_iter { bool fail_if_too_many_whiteouts; }; +static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) +{ + return (seq - c->journal_entries_base_seq) & (~0U >> 1); +} + +static inline struct bkey_i *journal_key_k(struct bch_fs *c, + const struct journal_key *k) +{ + if (k->allocated) + return k->allocated_k; + + struct journal_replay *i = *genradix_ptr(&c->journal_entries, k->journal_seq_offset); + + return (struct bkey_i *) (i->j._data + k->journal_offset); +} + static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, unsigned l_level, const struct journal_key *r) @@ -37,25 +53,28 @@ static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, cmp_int(l_btree_id, r->btree_id); } -static inline int __journal_key_cmp(enum btree_id l_btree_id, +static inline int __journal_key_cmp(struct bch_fs *c, + enum btree_id l_btree_id, unsigned l_level, struct bpos l_pos, const struct journal_key *r) { return __journal_key_btree_cmp(l_btree_id, l_level, r) ?: - bpos_cmp(l_pos, r->k->k.p); + bpos_cmp(l_pos, journal_key_k(c, r)->k.p); } -static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) +static inline int journal_key_cmp(struct bch_fs *c, + const struct journal_key *l, const struct journal_key *r) { - return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); + return __journal_key_cmp(c, l->btree_id, l->level, + journal_key_k(c, l)->k.p, r); } -struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); -struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); -struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, unsigned, struct bpos); int bch2_btree_and_journal_iter_prefetch(struct btree_trans *, struct btree_path *, @@ -71,7 +90,7 @@ bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); -struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); +struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *, struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *); void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *, diff --git a/libbcachefs/btree_journal_iter_types.h b/libbcachefs/btree_journal_iter_types.h index 86aacb25..4495fc92 100644 --- a/libbcachefs/btree_journal_iter_types.h +++ b/libbcachefs/btree_journal_iter_types.h @@ -2,21 +2,47 @@ #ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H #define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H +struct journal_ptr { + bool csum_good; + struct bch_csum csum; + u8 dev; + u32 bucket; + u32 bucket_offset; + u64 sector; +}; + +/* + * Only used for holding the journal entries we read in btree_journal_read() + * during cache_registration + */ +struct journal_replay { + DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs; + + bool csum_good; + bool ignore_blacklisted; + bool ignore_not_dirty; + /* must be last: */ + struct jset j; +}; + struct journal_key_range_overwritten { size_t start, end; }; struct journal_key { - u64 journal_seq; - u32 journal_offset; + union { + struct { + u32 journal_seq_offset; + u32 journal_offset; + }; + struct bkey_i *allocated_k; + }; enum btree_id btree_id:8; unsigned level:8; bool allocated:1; bool overwritten:1; bool rewind:1; - struct journal_key_range_overwritten __rcu * - overwritten_range; - struct bkey_i *k; + u32 overwritten_range; }; struct journal_keys { @@ -31,7 +57,9 @@ struct journal_keys { size_t gap; atomic_t ref; bool initial_ref_held; + struct mutex overwrite_lock; + DARRAY(struct journal_key_range_overwritten) overwrites; }; #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */ diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 4d58bdb2..5fa7f2f9 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -54,7 +54,7 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u); if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); if (j_k) diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c index 053a837c..b70eb095 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree_update.c @@ -403,7 +403,7 @@ __btree_trans_update_by_path(struct btree_trans *trans, i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); if (j_k) { diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 65ca54c5..a9877a47 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -95,7 +95,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) if (!b->c.level) goto out; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { if (k.k->type != KEY_TYPE_btree_ptr_v2) goto out; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 87a6f4dc..280b169e 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -111,7 +111,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, CLASS(printbuf, buf)(); int ret = 0; - CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); + CLASS(bch2_dev_tryget_noerror, ca)(c, p.ptr.dev); if (!ca) { if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, trans, ptr_to_invalid_device, diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 467fc45e..3b8c1409 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -287,11 +287,44 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); + CLASS(printbuf, err)(); + int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags, &err); bch_err_msg(ca, ret, "setting device state"); return ret; } +static long bch2_ioctl_disk_set_state_v2(struct bch_fs *c, + struct bch_ioctl_disk_set_state_v2 arg) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad[0] || arg.pad[1] || arg.pad[2] || + arg.new_state >= BCH_MEMBER_STATE_NR) + return -EINVAL; + + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + CLASS(printbuf, err)(); + int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags, &err); + if (ret) { + if (err.pos > arg.err.msg_len) + return -ERANGE; + + prt_printf(&err, "\nerror=%s", bch2_err_str(ret)); + ret = copy_to_user_errcode((void __user *)(ulong)arg.err.msg_ptr, + err.buf, + err.pos) ?: ret; + } + return ret; +} + struct bch_data_ctx { struct thread_with_file thr; @@ -692,6 +725,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) BCH_IOCTL(disk_offline, struct bch_ioctl_disk); case BCH_IOCTL_DISK_SET_STATE: BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state); + case BCH_IOCTL_DISK_SET_STATE_v2: + BCH_IOCTL(disk_set_state_v2, struct bch_ioctl_disk_set_state_v2); case BCH_IOCTL_DATA: BCH_IOCTL(data, struct bch_ioctl_data); case BCH_IOCTL_DISK_RESIZE: diff --git a/libbcachefs/darray.c b/libbcachefs/darray.c index e86d36d2..6940037b 100644 --- a/libbcachefs/darray.c +++ b/libbcachefs/darray.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/log2.h> +#include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include "darray.h" -int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp) +int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp, + bool rcu) { if (new_size > d->size) { new_size = roundup_pow_of_two(new_size); @@ -20,18 +22,25 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ if (unlikely(check_mul_overflow(new_size, element_size, &bytes))) return -ENOMEM; - void *data = likely(bytes < INT_MAX) + void *old = d->data; + void *new = likely(bytes < INT_MAX) ? kvmalloc_noprof(bytes, gfp) : vmalloc_noprof(bytes); - if (!data) + if (!new) return -ENOMEM; if (d->size) - memcpy(data, d->data, d->size * element_size); - if (d->data != d->preallocated) - kvfree(d->data); - d->data = data; + memcpy(new, old, d->size * element_size); + + rcu_assign_pointer(d->data, new); d->size = new_size; + + if (old != d->preallocated) { + if (!rcu) + kvfree(old); + else + kvfree_rcu_mightsleep(old); + } } return 0; diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h index 4080ee99..b4f284fe 100644 --- a/libbcachefs/darray.h +++ b/libbcachefs/darray.h @@ -34,17 +34,17 @@ typedef DARRAY(s16) darray_s16; typedef DARRAY(s32) darray_s32; typedef DARRAY(s64) darray_s64; -int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); +int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool); #define __bch2_darray_resize(...) alloc_hooks(__bch2_darray_resize_noprof(__VA_ARGS__)) -#define __darray_resize(_d, _element_size, _new_size, _gfp) \ +#define __darray_resize(_d, _element_size, _new_size, _gfp, _rcu) \ (unlikely((_new_size) > (_d)->size) \ - ? __bch2_darray_resize((_d), (_element_size), (_new_size), (_gfp))\ + ? __bch2_darray_resize((_d), (_element_size), (_new_size), (_gfp), _rcu)\ : 0) #define darray_resize_gfp(_d, _new_size, _gfp) \ - __darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp) + __darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp, false) #define darray_resize(_d, _new_size) \ darray_resize_gfp(_d, _new_size, GFP_KERNEL) @@ -55,6 +55,12 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); #define darray_make_room(_d, _more) \ darray_make_room_gfp(_d, _more, GFP_KERNEL) +#define darray_resize_rcu(_d, _new_size) \ + __darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), GFP_KERNEL, true) + +#define darray_make_room_rcu(_d, _more) \ + darray_resize_rcu((_d), (_d)->nr + (_more)) + #define darray_room(_d) ((_d).size - (_d).nr) #define darray_top(_d) ((_d).data[(_d).nr]) @@ -107,8 +113,11 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); #define __darray_for_each(_d, _i) \ for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++) +#define darray_for_each_from(_d, _i, _start) \ + for (typeof(&(_d).data[0]) _i = _start; _i < (_d).data + (_d).nr; _i++) + #define darray_for_each(_d, _i) \ - for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) + darray_for_each_from(_d, _i, (_d).data) #define darray_for_each_reverse(_d, _i) \ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) diff --git a/libbcachefs/disk_accounting.c b/libbcachefs/disk_accounting.c index 5944ad6d..809c76b6 100644 --- a/libbcachefs/disk_accounting.c +++ b/libbcachefs/disk_accounting.c @@ -734,6 +734,37 @@ invalid_device: goto fsck_err; } +static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i) +{ + struct journal_keys *keys = &c->journal_keys; + struct bkey_i *k = journal_key_k(c, i); + + darray_for_each_from(*keys, j, i + 1) { + if (journal_key_cmp(c, i, j)) + return j; + + struct bkey_i *n = journal_key_k(c, j); + if (n->k.type == KEY_TYPE_accounting) { + WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0); + + bch2_accounting_accumulate(bkey_i_to_accounting(k), + bkey_i_to_s_c_accounting(n)); + j->overwritten = true; + } + } + + return &darray_top(*keys); +} + +static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i) +{ + struct bch_fs *c = trans->c; + struct journal_key *next = accumulate_newer_accounting_keys(c, i); + + int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i))); + return ret ? ERR_PTR(ret) : next; +} + /* * At startup time, initialize the in memory accounting from the btree (and * journal) @@ -759,80 +790,76 @@ int bch2_accounting_read(struct bch_fs *c) percpu_memset(c->usage, 0, sizeof(*c->usage)); } + struct journal_keys *keys = &c->journal_keys; + struct journal_key *jk = keys->data; + + while (jk < &darray_top(*keys) && + __journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0) + jk++; + + struct journal_key *end = jk; + while (end < &darray_top(*keys) && + __journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0) + end++; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); iter.flags &= ~BTREE_ITER_with_journal; int ret = for_each_btree_key_continue(trans, iter, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); - - if (k.k->type != KEY_TYPE_accounting) - continue; - - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) - break; - - if (!bch2_accounting_is_mem(&acc_k)) { - struct disk_accounting_pos next; - memset(&next, 0, sizeof(next)); - next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; - } + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); - accounting_read_key(trans, k); - })); - bch2_trans_iter_exit(&iter); - if (ret) - return ret; - - struct journal_keys *keys = &c->journal_keys; - struct journal_key *dst = keys->data; - move_gap(keys, keys->nr); + if (k.k->type != KEY_TYPE_accounting) + continue; - darray_for_each(*keys, i) { - if (i->k->k.type == KEY_TYPE_accounting) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); + while (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0) + jk = accumulate_and_read_journal_accounting(trans, jk); - if (!bch2_accounting_is_mem(&acc_k)) - continue; + while (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 && + bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) { + jk->overwritten = true; + jk++; + } - struct bkey_s_c k = bkey_i_to_s_c(i->k); - unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, - sizeof(acc->k.data[0]), - accounting_pos_cmp, &k.k->p); + if (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0) + jk = accumulate_and_read_journal_accounting(trans, jk); - bool applied = idx < acc->k.nr && - bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); - if (applied) - continue; + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + break; - if (i + 1 < &darray_top(*keys) && - i[1].k->k.type == KEY_TYPE_accounting && - !journal_key_cmp(i, i + 1)) { - WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); + if (!bch2_accounting_is_mem(&acc_k)) { + struct disk_accounting_pos next_acc; + memset(&next_acc, 0, sizeof(next_acc)); + next_acc.type = acc_k.type + 1; + struct bpos next = disk_accounting_pos_to_bpos(&next_acc); + if (jk < end) + next = bpos_min(next, journal_key_k(c, jk)->k.p); - i[1].journal_seq = i[0].journal_seq; + bch2_btree_iter_set_pos(&iter, next); + continue; + } - bch2_accounting_accumulate(bkey_i_to_accounting(i[1].k), - bkey_s_c_to_accounting(k)); - continue; - } + accounting_read_key(trans, k); + })); + bch2_trans_iter_exit(&iter); + if (ret) + return ret; - ret = accounting_read_key(trans, k); - if (ret) - return ret; - } + while (jk < end) + jk = accumulate_and_read_journal_accounting(trans, jk); - *dst++ = *i; - } + struct journal_key *dst = keys->data; + darray_for_each(*keys, i) + if (!i->overwritten) + *dst++ = *i; keys->gap = keys->nr = dst - keys->data; guard(percpu_write)(&c->mark_lock); diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 32a286b3..e33f3166 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -141,14 +141,16 @@ void bch2_io_error_work(struct work_struct *work) if (ca->mi.state >= BCH_MEMBER_STATE_ro) return; - bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, - BCH_FORCE_IF_DEGRADED); CLASS(printbuf, buf)(); __bch2_log_msg_start(ca->name, &buf); - prt_printf(&buf, "writes erroring for %u seconds, setting %s ro", - c->opts.write_error_timeout, - dev ? "device" : "filesystem"); + prt_printf(&buf, "writes erroring for %u seconds\n", + c->opts.write_error_timeout); + + bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, + BCH_FORCE_IF_DEGRADED, &buf); + + prt_printf(&buf, "setting %s ro", dev ? "device" : "filesystem"); if (!dev) bch2_fs_emergency_read_only2(c, &buf); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d56959f1..93ac0fae 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -120,6 +120,7 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) INIT_LIST_HEAD(&p->flushed[i]); atomic_set(&p->count, count); p->devs.nr = 0; + p->bytes = 0; } /* @@ -264,6 +265,11 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t /* Close out old buffer: */ buf->data->u64s = cpu_to_le32(old.cur_entry_offset); + struct journal_entry_pin_list *pin_list = + journal_seq_pin(j, journal_cur_seq(j)); + pin_list->bytes = roundup_pow_of_two(vstruct_bytes(buf->data)); + j->dirty_entry_bytes += pin_list->bytes; + if (trace_journal_entry_close_enabled() && trace) { CLASS(printbuf, err)(); guard(printbuf_atomic)(&err); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 093e4aca..734ce88b 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "btree_io.h" +#include "btree_journal_iter.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "buckets.h" @@ -106,11 +107,6 @@ static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *cs return !bch2_crc_cmp(j->csum, *csum); } -static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) -{ - return (seq - c->journal_entries_base_seq) & (~0U >> 1); -} - static void __journal_replay_free(struct bch_fs *c, struct journal_replay *i) { @@ -195,6 +191,23 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, } } + /* Drop overwrites, log entries if we don't need them: */ + if (!c->opts.retain_recovery_info && + !c->opts.journal_rewind) { + struct jset_entry *dst = j->start; + vstruct_for_each_safe(j, src) { + if (src->type == BCH_JSET_ENTRY_log || + src->type == BCH_JSET_ENTRY_overwrite) + continue; + + memcpy(dst, src, vstruct_bytes(src)); + dst = vstruct_next(dst); + } + + j->u64s = cpu_to_le32((u64 *) dst - j->_data); + bytes = vstruct_bytes(j); + } + jlist->last_seq = max(jlist->last_seq, last_seq); _i = genradix_ptr_alloc(&c->journal_entries, diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h index f53c5c81..f8754bf7 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal_io.h @@ -7,29 +7,6 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *); void bch2_journal_pos_from_member_info_resume(struct bch_fs *); -struct journal_ptr { - bool csum_good; - struct bch_csum csum; - u8 dev; - u32 bucket; - u32 bucket_offset; - u64 sector; -}; - -/* - * Only used for holding the journal entries we read in btree_journal_read() - * during cache_registration - */ -struct journal_replay { - DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs; - - bool csum_good; - bool ignore_blacklisted; - bool ignore_not_dirty; - /* must be last: */ - struct jset j; -}; - static inline bool journal_replay_ignore(struct journal_replay *i) { return !i || i->ignore_blacklisted || i->ignore_not_dirty; diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index f23e5ee9..bd188560 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -148,6 +148,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); + size_t mem_limit = max_t(ssize_t, 0, + (totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes); + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { if (!ca->journal.nr || !ca->mi.durability) @@ -180,6 +183,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne * @nr_devs_want largest devices: */ space = dev_space[nr_devs_want - 1]; + space.total = min(space.total, mem_limit >> 9); space.next_entry = min(space.next_entry, min_bucket_size); return space; } @@ -328,9 +332,17 @@ void bch2_journal_reclaim_fast(struct journal *j) * Unpin journal entries whose reference counts reached zero, meaning * all btree nodes got written out */ + struct journal_entry_pin_list *pin_list; while (!fifo_empty(&j->pin) && j->pin.front <= j->seq_ondisk && - !atomic_read(&fifo_peek_front(&j->pin).count)) { + !atomic_read(&(pin_list = &fifo_peek_front(&j->pin))->count)) { + + if (WARN_ON(j->dirty_entry_bytes < pin_list->bytes)) + pin_list->bytes = j->dirty_entry_bytes; + + j->dirty_entry_bytes -= pin_list->bytes; + pin_list->bytes = 0; + j->pin.front++; popped = true; } diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 51104bbb..7c9273bd 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -71,6 +71,7 @@ struct journal_entry_pin_list { struct list_head flushed[JOURNAL_PIN_TYPE_NR]; atomic_t count; struct bch_devs_list devs; + size_t bytes; }; struct journal; @@ -253,6 +254,7 @@ struct journal { u64 front, back, size, mask; struct journal_entry_pin_list *data; } pin; + size_t dirty_entry_bytes; struct journal_space space[journal_space_nr]; diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 921f9049..c3ef35dc 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -525,7 +525,7 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id switch (id) { case Opt_state: if (ca) - return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED); + return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED, NULL); break; case Opt_compression: diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 8280ca33..6319144a 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -181,9 +181,12 @@ void bch2_reconstruct_alloc(struct bch_fs *c) */ static void zero_out_btree_mem_ptr(struct journal_keys *keys) { - darray_for_each(*keys, i) - if (i->k->k.type == KEY_TYPE_btree_ptr_v2) - bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (k->k.type == KEY_TYPE_btree_ptr_v2) + bkey_i_to_btree_ptr_v2(k)->v.mem_ptr = 0; + } } /* journal replay: */ @@ -201,8 +204,10 @@ static void replay_now_at(struct journal *j, u64 seq) static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct journal_key *k) { + struct bch_fs *c = trans->c; + struct bkey_i *bk = journal_key_k(c, k); struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, BTREE_ITER_intent); int ret = bch2_btree_iter_traverse(&iter); @@ -213,14 +218,14 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); /* Has this delta already been applied to the btree? */ - if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) { + if (bversion_cmp(old.k->bversion, bk->k.bversion) >= 0) { ret = 0; goto out; } - struct bkey_i *new = k->k; + struct bkey_i *new = bk; if (old.k->type == KEY_TYPE_accounting) { - new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k)); + new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(bk)); ret = PTR_ERR_OR_ZERO(new); if (ret) goto out; @@ -229,7 +234,8 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, bkey_s_c_to_accounting(old)); } - trans->journal_res.seq = k->journal_seq; + if (!k->allocated) + trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset; ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun); out: @@ -240,6 +246,7 @@ out: static int bch2_journal_replay_key(struct btree_trans *trans, struct journal_key *k) { + struct bch_fs *c = trans->c; struct btree_iter iter; unsigned iter_flags = BTREE_ITER_intent| @@ -250,7 +257,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) return 0; - trans->journal_res.seq = k->journal_seq; + if (!k->allocated) + trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset; /* * BTREE_UPDATE_key_cache_reclaim disables key cache lookup/update to @@ -265,7 +273,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, else update_flags |= BTREE_UPDATE_key_cache_reclaim; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + struct bkey_i *bk = journal_key_k(c, k); + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, iter_flags); ret = bch2_btree_iter_traverse(&iter); @@ -274,13 +283,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, &iter); if (unlikely(!btree_path_node(path, k->level))) { - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); prt_str(&buf, "btree="); bch2_btree_id_to_text(&buf, k->btree_id); prt_printf(&buf, " level=%u ", k->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(bk)); if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { @@ -297,7 +304,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, } bch2_trans_iter_exit(&iter); - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(&iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: @@ -309,17 +316,17 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; - if (k->k->k.type == KEY_TYPE_accounting) { - struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s); + if (bk->k.type == KEY_TYPE_accounting) { + struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, bk->k.u64s); ret = PTR_ERR_OR_ZERO(n); if (ret) goto out; - bkey_copy(n, k->k); + bkey_copy(n, bk); goto out; } - ret = bch2_trans_update(trans, &iter, k->k, update_flags); + ret = bch2_trans_update(trans, &iter, bk, update_flags); out: bch2_trans_iter_exit(&iter); return ret; @@ -330,13 +337,9 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - /* - * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last - * - * journal_seq == 0 means that the key comes from early repair, and - * should be inserted last so as to avoid overflowing the journal - */ - return cmp_int(l->journal_seq - 1, r->journal_seq - 1); + return !l->allocated && !r->allocated + ? cmp_int(l->journal_seq_offset, r->journal_seq_offset) + : cmp_int(l->allocated, r->allocated); } DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *) @@ -368,7 +371,9 @@ int bch2_journal_replay(struct bch_fs *c) * flush accounting keys until we're done */ darray_for_each(*keys, k) { - if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated)) + struct bkey_i *bk = journal_key_k(trans->c, k); + + if (!(bk->k.type == KEY_TYPE_accounting && !k->allocated)) continue; cond_resched(); @@ -411,7 +416,6 @@ int bch2_journal_replay(struct bch_fs *c) BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting); if (ret) { ret = darray_push(&keys_sorted, k); if (ret) @@ -433,8 +437,8 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - if (k->journal_seq) - replay_now_at(j, k->journal_seq); + if (!k->allocated) + replay_now_at(j, c->journal_entries_base_seq + k->journal_seq_offset); else replay_now_at(j, j->replay_journal_seq_end); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 0784283c..3ffd68d2 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -784,7 +784,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { /* Query replicas: */ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, - unsigned flags, bool print) + unsigned flags, struct printbuf *err) { struct bch_replicas_entry_v1 *e; @@ -823,16 +823,14 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, : BCH_FORCE_IF_DATA_DEGRADED; if (dflags & ~flags) { - if (print) { - CLASS(printbuf, buf)(); - - bch2_replicas_entry_to_text(&buf, e); - bch_err(c, "insufficient devices online (%u) for replicas entry %s", - nr_online, buf.buf); + if (err) { + prt_printf(err, "insufficient devices online (%u) for replicas entry ", + nr_online); + bch2_replicas_entry_to_text(err, e); + prt_newline(err); } return false; } - } return true; diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 5aba2c1c..15023a9b 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -44,7 +44,7 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e, } bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask, - unsigned, bool); + unsigned, struct printbuf *); unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned); unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); diff --git a/libbcachefs/sb-counters_format.h b/libbcachefs/sb-counters_format.h index bfeb713d..96ad6492 100644 --- a/libbcachefs/sb-counters_format.h +++ b/libbcachefs/sb-counters_format.h @@ -129,6 +129,7 @@ static inline void __maybe_unused check_bch_counter_ids_unique(void) { #define x(t, n, ...) case (n): BCH_PERSISTENT_COUNTERS() #undef x + ; } } diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index e3c73d90..d26a0ca4 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -36,10 +36,12 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { - if (dev != BCH_SB_MEMBER_INVALID) + if (dev != BCH_SB_MEMBER_INVALID) { bch2_fs_inconsistent(c, "pointer to %s device %u", test_bit(dev, c->devs_removed.d) ? "removed" : "nonexistent", dev); + dump_stack(); + } } void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index ec295a5e..61eeac67 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -90,7 +90,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v bch2_version_to_text(&buf, version); prt_str(&buf, " currently not enabled, allowed up to "); bch2_version_to_text(&buf, c->sb.version_incompat_allowed); - prt_printf(&buf, "\n set version_upgrade=incompatible to enable"); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); bch_notice(c, "%s", buf.buf); } @@ -1189,13 +1189,13 @@ int bch2_write_super(struct bch_fs *c) nr_wrote = dev_mask_nr(&sb_written); can_mount_with_written = - bch2_have_enough_devs(c, sb_written, degraded_flags, false); + bch2_have_enough_devs(c, sb_written, degraded_flags, NULL); for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++) sb_written.d[i] = ~sb_written.d[i]; can_mount_without_written = - bch2_have_enough_devs(c, sb_written, degraded_flags, false); + bch2_have_enough_devs(c, sb_written, degraded_flags, NULL); /* * If we would be able to mount _without_ the devices we successfully diff --git a/libbcachefs/super.c b/libbcachefs/super.c index ee3b30b1..4281a20f 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1368,10 +1368,14 @@ static bool bch2_fs_may_start(struct bch_fs *c) return false; } break; - } + } } - return bch2_have_enough_devs(c, c->online_devs, flags, true); + CLASS(printbuf, err)(); + bool ret = bch2_have_enough_devs(c, c->online_devs, flags, &err); + if (!ret) + bch2_print_str(c, KERN_ERR, err.buf); + return ret; } int bch2_fs_start(struct bch_fs *c) @@ -1833,7 +1837,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) * because we got an error or what have you? */ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) + enum bch_member_state new_state, int flags, + struct printbuf *err) { struct bch_devs_mask new_online_devs; int nr_rw = 0, required; @@ -1870,7 +1875,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, new_online_devs = c->online_devs; __clear_bit(ca->dev_idx, new_online_devs.d); - return bch2_have_enough_devs(c, new_online_devs, flags, false); + return bch2_have_enough_devs(c, new_online_devs, flags, err); default: BUG(); } @@ -1904,14 +1909,15 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) } int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) + enum bch_member_state new_state, int flags, + struct printbuf *err) { int ret = 0; if (ca->mi.state == new_state) return 0; - if (!bch2_dev_state_allowed(c, ca, new_state, flags)) + if (!bch2_dev_state_allowed(c, ca, new_state, flags, err)) return bch_err_throw(c, device_state_not_allowed); if (new_state != BCH_MEMBER_STATE_rw) @@ -1934,10 +1940,11 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, } int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) + enum bch_member_state new_state, int flags, + struct printbuf *err) { guard(rwsem_write)(&c->state_lock); - return __bch2_dev_set_state(c, ca, new_state, flags); + return __bch2_dev_set_state(c, ca, new_state, flags, err); } /* Device add/removal: */ @@ -1957,7 +1964,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) */ bch2_dev_put(ca); - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { + if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { bch_err(ca, "Cannot remove without losing data"); ret = bch_err_throw(c, device_state_not_allowed); goto err; @@ -2278,7 +2285,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; } - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { + if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { bch_err(ca, "Cannot offline required disk"); return bch_err_throw(c, device_state_not_allowed); } @@ -2455,10 +2462,14 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) struct bch_dev *ca = bdev_to_bch_dev(c, bdev); if (ca) { + CLASS(printbuf, buf)(); + __bch2_log_msg_start(ca->name, &buf); + prt_printf(&buf, "offline from block layer\n"); + bool dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, - BCH_FORCE_IF_DEGRADED); - + BCH_FORCE_IF_DEGRADED, + &buf); if (!dev && sb) { if (!surprise) sync_filesystem(sb); @@ -2466,11 +2477,6 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) evict_inodes(sb); } - CLASS(printbuf, buf)(); - __bch2_log_msg_start(ca->name, &buf); - - prt_printf(&buf, "offline from block layer"); - if (dev) { __bch2_dev_offline(c, ca); } else { diff --git a/libbcachefs/super.h b/libbcachefs/super.h index e90bab9a..de2c4430 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -17,11 +17,14 @@ struct bch_fs *bch2_dev_to_fs(dev_t); struct bch_fs *bch2_uuid_to_fs(__uuid_t); bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, - enum bch_member_state, int); + enum bch_member_state, int, + struct printbuf *); int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *, - enum bch_member_state, int); + enum bch_member_state, int, + struct printbuf *); int bch2_dev_set_state(struct bch_fs *, struct bch_dev *, - enum bch_member_state, int); + enum bch_member_state, int, + struct printbuf *); int bch2_dev_fail(struct bch_dev *, int); int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int); |