diff options
Diffstat (limited to 'fs/bcachefs')
56 files changed, 1090 insertions, 546 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 45d3db41225a..cb25cddb759b 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -532,10 +532,6 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; CLASS(printbuf, buf)(); - if (bpos_lt(bp->k.p, s->bp_start) || - bpos_gt(bp->k.p, s->bp_end)) - return 0; - CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter); int ret = bkey_err(bp_k); @@ -690,6 +686,10 @@ static int check_extent_to_backpointers(struct btree_trans *trans, struct bkey_i_backpointer bp; bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); + if (bpos_lt(bp.k.p, s->bp_start) || + bpos_gt(bp.k.p, s->bp_end)) + continue; + int ret = !empty ? check_bp_exists(trans, s, &bp, k) : bch2_bucket_backpointer_mod(trans, k, &bp, true); @@ -897,7 +897,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && + if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointer_bucket_gen && (bp.v->bucket_gen != a->gen || bp.v->pad)) { ret = bch2_backpointer_del(trans, bp_k.k->p); @@ -929,6 +929,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (sectors[ALLOC_dirty] != a->dirty_sectors || sectors[ALLOC_cached] != a->cached_sectors || sectors[ALLOC_stripe] != a->stripe_sectors) { + /* + * Post 1.14 upgrade, we assume that backpointers are mostly + * correct and a sector count mismatch is probably due to a + * write buffer race + * + * Pre upgrade, we expect all the buckets to be wrong, a write + * buffer flush is pointless: + */ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); if (ret) @@ -976,12 +984,22 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) goto next; struct bpos bucket = bp_pos_to_bucket(ca, pos); - u64 next = ca->mi.nbuckets; - - unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); - if (bitmap) - next = min_t(u64, next, - find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset)); + u64 next = min(bucket.offset, ca->mi.nbuckets); + + unsigned long *mismatch = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); + unsigned long *empty = READ_ONCE(ca->bucket_backpointer_empty.buckets); + /* + * Find the first bucket with mismatches - but + * not empty buckets; we don't need to pin those + * because we just recreate all backpointers in + * those buckets + */ + if (mismatch && empty) + next = find_next_andnot_bit(mismatch, empty, ca->mi.nbuckets, next); + else if (mismatch) + next = find_next_bit(mismatch, ca->mi.nbuckets, next); + else + next = ca->mi.nbuckets; bucket.offset = next; if (bucket.offset == ca->mi.nbuckets) @@ -1108,17 +1126,18 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (ret) goto err; - u64 nr_buckets = 0, nr_mismatches = 0; + u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; for_each_member_device(c, ca) { nr_buckets += ca->mi.nbuckets; nr_mismatches += ca->bucket_backpointer_mismatch.nr; + nr_empty += ca->bucket_backpointer_empty.nr; } if (!nr_mismatches) goto err; - bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", - nr_mismatches, nr_buckets); + bch_info(c, "scanning for missing backpointers in %llu/%llu buckets, %llu buckets with no backpointers", + nr_mismatches - nr_empty, nr_buckets, nr_empty); while (1) { ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 19961b4f30b8..b2de993d802b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -706,7 +706,8 @@ struct bch_sb_field_ext { x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \ x(fast_device_removal, BCH_VERSION(1, 27)) \ x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \ - x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) + x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \ + x(31bit_dirent_offset, BCH_VERSION(1, 30)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1378,7 +1379,8 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_alloc_v4)) \ x(quotas, 5, 0, \ BIT_ULL(KEY_TYPE_quota)) \ - x(stripes, 6, 0, \ + x(stripes, 6, \ + BTREE_IS_data, \ BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, \ BTREE_IS_extents| \ @@ -1463,7 +1465,6 @@ static inline bool btree_id_can_reconstruct(enum btree_id btree) switch (btree) { case BTREE_ID_snapshot_trees: case BTREE_ID_deleted_inodes: - case BTREE_ID_logged_ops: case BTREE_ID_rebalance_work: case BTREE_ID_subvolume_children: return true; diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 52594e925eb7..5dc562f2a881 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -66,33 +66,46 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_STOP _IO(0xbc, 3) #endif -#define BCH_IOCTL_DISK_ADD _IOW(0xbc, 4, struct bch_ioctl_disk) -#define BCH_IOCTL_DISK_REMOVE _IOW(0xbc, 5, struct bch_ioctl_disk) -#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk) -#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk) -#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state) -#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data) -#define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage) -#define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage) -#define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super) -#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx) -#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 14, struct bch_ioctl_disk_resize) -#define BCH_IOCTL_DISK_RESIZE_JOURNAL _IOW(0xbc,15, struct bch_ioctl_disk_resize_journal) - -#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume) -#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume) - -#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) - -#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline) -#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online) -#define BCH_IOCTL_QUERY_ACCOUNTING _IOW(0xbc, 21, struct bch_ioctl_query_accounting) -#define BCH_IOCTL_QUERY_COUNTERS _IOW(0xbc, 21, struct bch_ioctl_query_counters) +#define BCH_IOCTL_DISK_ADD _IOW(0xbc, 4, struct bch_ioctl_disk) +#define BCH_IOCTL_DISK_ADD_v2 _IOW(0xbc, 23, struct bch_ioctl_disk_v2) +#define BCH_IOCTL_DISK_REMOVE _IOW(0xbc, 5, struct bch_ioctl_disk) +#define BCH_IOCTL_DISK_REMOVE_v2 _IOW(0xbc, 24, struct bch_ioctl_disk_v2) +#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk) +#define BCH_IOCTL_DISK_ONLINE_v2 _IOW(0xbc, 25, struct bch_ioctl_disk_v2) +#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk) +#define BCH_IOCTL_DISK_OFFLINE_v2 _IOW(0xbc, 26, struct bch_ioctl_disk_v2) +#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state) +#define BCH_IOCTL_DISK_SET_STATE_v2 _IOW(0xbc, 22, struct bch_ioctl_disk_set_state_v2) +#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data) +#define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage) +#define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage) +#define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super) +#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx) +#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 14, struct bch_ioctl_disk_resize) +#define BCH_IOCTL_DISK_RESIZE_v2 _IOW(0xbc, 27, struct bch_ioctl_disk_resize_v2) +#define BCH_IOCTL_DISK_RESIZE_JOURNAL _IOW(0xbc, 15, struct bch_ioctl_disk_resize_journal) +#define BCH_IOCTL_DISK_RESIZE_JOURNAL_v2 _IOW(0xbc, 28, struct bch_ioctl_disk_resize_journal_v2) + +#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume) +#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume) + +#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) + +#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline) +#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online) +#define BCH_IOCTL_QUERY_ACCOUNTING _IOW(0xbc, 21, struct bch_ioctl_query_accounting) +#define BCH_IOCTL_QUERY_COUNTERS _IOW(0xbc, 21, struct bch_ioctl_query_counters) /* ioctl below act on a particular file, not the filesystem as a whole: */ #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) +struct bch_ioctl_err_msg { + __u64 msg_ptr; + __u32 msg_len; + __u32 pad; +}; + /* * BCH_IOCTL_QUERY_UUID: get filesystem UUID * @@ -104,13 +117,6 @@ struct bch_ioctl_query_uuid { __uuid_t uuid; }; -#if 0 -struct bch_ioctl_start { - __u32 flags; - __u32 pad; -}; -#endif - /* * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem * @@ -164,6 +170,13 @@ struct bch_ioctl_disk { __u64 dev; }; +struct bch_ioctl_disk_v2 { + __u32 flags; + __u32 pad; + __u64 dev; + struct bch_ioctl_err_msg err; +}; + /* * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem * @@ -181,6 +194,14 @@ struct bch_ioctl_disk_set_state { __u64 dev; }; +struct bch_ioctl_disk_set_state_v2 { + __u32 flags; + __u8 new_state; + __u8 pad[3]; + __u64 dev; + struct bch_ioctl_err_msg err; +}; + #define BCH_DATA_OPS() \ x(scrub, 0) \ x(rereplicate, 1) \ @@ -392,6 +413,14 @@ struct bch_ioctl_disk_resize { __u64 nbuckets; }; +struct bch_ioctl_disk_resize_v2 { + __u32 flags; + __u32 pad; + __u64 dev; + __u64 nbuckets; + struct bch_ioctl_err_msg err; +}; + /* * BCH_IOCTL_DISK_RESIZE_JOURNAL: resize journal on a device * @@ -405,6 +434,14 @@ struct bch_ioctl_disk_resize_journal { __u64 nbuckets; }; +struct bch_ioctl_disk_resize_journal_v2 { + __u32 flags; + __u32 pad; + __u64 dev; + __u64 nbuckets; + struct bch_ioctl_err_msg err; +}; + struct bch_ioctl_subvolume { __u32 flags; __u32 dirfd; diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h index a30c4ae8eb36..0a1fc582f53a 100644 --- a/fs/bcachefs/bkey_buf.h +++ b/fs/bcachefs/bkey_buf.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_BKEY_BUF_H #define _BCACHEFS_BKEY_BUF_H +#include <linux/mempool.h> + #include "bcachefs.h" #include "bkey.h" @@ -10,41 +12,49 @@ struct bkey_buf { u64 onstack[12]; }; -static inline void bch2_bkey_buf_realloc(struct bkey_buf *s, - struct bch_fs *c, unsigned u64s) +static inline int bch2_bkey_buf_realloc_noprof(struct bkey_buf *s, + struct bch_fs *c, unsigned u64s) { if (s->k == (void *) s->onstack && u64s > ARRAY_SIZE(s->onstack)) { - s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + s->k = mempool_alloc_noprof(&c->large_bkey_pool, GFP_NOFS); memcpy(s->k, s->onstack, sizeof(s->onstack)); } + + return 0; /* for alloc_hooks() macro */ } +#define bch2_bkey_buf_realloc(...) alloc_hooks(bch2_bkey_buf_realloc_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s, - struct bch_fs *c, - struct bkey_s_c k) +static inline int bch2_bkey_buf_reassemble_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_s_c k) { bch2_bkey_buf_realloc(s, c, k.k->u64s); bkey_reassemble(s->k, k); + return 0; } +#define bch2_bkey_buf_reassemble(...) alloc_hooks(bch2_bkey_buf_reassemble_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_copy(struct bkey_buf *s, - struct bch_fs *c, - struct bkey_i *src) +static inline int bch2_bkey_buf_copy_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_i *src) { bch2_bkey_buf_realloc(s, c, src->k.u64s); bkey_copy(s->k, src); + return 0; } +#define bch2_bkey_buf_copy(...) alloc_hooks(bch2_bkey_buf_copy_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_unpack(struct bkey_buf *s, - struct bch_fs *c, - struct btree *b, - struct bkey_packed *src) +static inline int bch2_bkey_buf_unpack_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct btree *b, + struct bkey_packed *src) { - bch2_bkey_buf_realloc(s, c, BKEY_U64s + - bkeyp_val_u64s(&b->format, src)); + bch2_bkey_buf_realloc(s, c, BKEY_U64s + bkeyp_val_u64s(&b->format, src)); bch2_bkey_unpack(b, s->k, src); + return 0; } +#define bch2_bkey_buf_unpack(...) alloc_hooks(bch2_bkey_buf_unpack_noprof(__VA_ARGS__)) static inline void bch2_bkey_buf_init(struct bkey_buf *s) { diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ae7d260589d8..43f294284d57 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -356,7 +356,7 @@ again: bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); BUG_ON(bpos_gt(k.k->p, b->data->max_key)); @@ -470,7 +470,7 @@ again: bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { bch2_bkey_buf_reassemble(&cur_k, c, k); bch2_btree_and_journal_iter_advance(&iter); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 276cf088539e..2e3dd9bacac5 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -131,10 +131,10 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; - p = kvmalloc(size, GFP_NOWAIT); + p = kvmalloc(size, GFP_NOWAIT|__GFP_ACCOUNT|__GFP_RECLAIMABLE); if (!p) { *used_mempool = true; - p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); + p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS|__GFP_ACCOUNT|__GFP_RECLAIMABLE); } memalloc_nofs_restore(flags); return p; @@ -1014,6 +1014,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k = bkey_p_next(k); continue; drop_this_key: + ret = 0; next_good_key = k->u64s; if (!next_good_key || @@ -1470,7 +1471,7 @@ start: } prt_newline(&buf); - if (failed.nr) + if (ret || failed.nr) bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); async_object_list_del(c, btree_read_bio, rb->list_idx); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8962c481e310..76f430f93dc1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -650,7 +650,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v; if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); @@ -848,7 +848,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p break; bch2_btree_and_journal_iter_advance(jiter); - k = bch2_btree_and_journal_iter_peek(jiter); + k = bch2_btree_and_journal_iter_peek(c, jiter); if (!k.k) break; @@ -898,7 +898,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos); - k = bch2_btree_and_journal_iter_peek(&jiter); + k = bch2_btree_and_journal_iter_peek(c, &jiter); if (!k.k) { CLASS(printbuf, buf)(); @@ -2120,10 +2120,10 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ } } -static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos search_pos, - struct bpos end_pos) +static const struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos search_pos, + struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2139,7 +2139,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); + const struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); if (k) { iter->k = k->k; @@ -2156,7 +2156,7 @@ void btree_trans_peek_journal(struct btree_trans *trans, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = + const struct bkey_i *next_journal = bch2_btree_journal_peek(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { @@ -2165,10 +2165,10 @@ void btree_trans_peek_journal(struct btree_trans *trans, } } -static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos search_key, - struct bpos end_pos) +static const struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos search_key, + struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2186,7 +2186,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = + const struct bkey_i *next_journal = bch2_btree_journal_peek_prev(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->data->min_key); @@ -2366,7 +2366,9 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && + !(iter->flags & BTREE_ITER_nofilter_whiteouts) && + bkey_eq(end, POS_MAX)); ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 24f2fbe84ad7..a6f344faf751 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -46,21 +46,22 @@ static size_t __bch2_journal_key_search(struct journal_keys *keys, enum btree_id id, unsigned level, struct bpos pos) { + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); size_t l = 0, r = keys->nr, m; while (l < r) { m = l + ((r - l) >> 1); - if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0) + if (__journal_key_cmp(c, id, level, pos, idx_to_key(keys, m)) > 0) l = m + 1; else r = m; } BUG_ON(l < keys->nr && - __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0); + __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l)) > 0); BUG_ON(l && - __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0); + __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l - 1)) <= 0); return l; } @@ -72,10 +73,20 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); } +static inline struct journal_key_range_overwritten *__overwrite_range(struct journal_keys *keys, u32 idx) +{ + return idx ? keys->overwrites.data + idx : NULL; +} + +static inline struct journal_key_range_overwritten *overwrite_range(struct journal_keys *keys, u32 idx) +{ + return idx ? rcu_dereference(keys->overwrites.data) + idx : NULL; +} + /* Returns first non-overwritten key >= search key: */ -struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) +const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; @@ -87,7 +98,7 @@ search: *idx = __bch2_journal_key_search(keys, btree_id, level, pos); while (*idx && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { --(*idx); iters++; if (iters == 10) { @@ -96,23 +107,23 @@ search: } } - struct bkey_i *ret = NULL; + const struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { - if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) + if (__journal_key_cmp(c, btree_id, level, end_pos, k) < 0) break; if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->end; + *idx = overwrite_range(keys, k->overwritten_range)->end; else *idx += 1; continue; } - if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { - ret = k->k; + if (__journal_key_cmp(c, btree_id, level, pos, k) <= 0) { + ret = journal_key_k(c, k); break; } @@ -129,9 +140,9 @@ search: return ret; } -struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) +const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) { struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; @@ -146,7 +157,7 @@ search: *idx = __bch2_journal_key_search(keys, btree_id, level, pos); while (*idx < keys->nr && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { + __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { (*idx)++; iters++; if (iters == 10) { @@ -158,25 +169,25 @@ search: if (*idx == keys->nr) --(*idx); - struct bkey_i *ret = NULL; + const struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ while (true) { k = idx_to_key(keys, *idx); - if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) + if (__journal_key_cmp(c, btree_id, level, end_pos, k) > 0) break; if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->start; + *idx = overwrite_range(keys, k->overwritten_range)->start; if (!*idx) break; --(*idx); continue; } - if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { - ret = k->k; + if (__journal_key_cmp(c, btree_id, level, pos, k) >= 0) { + ret = journal_key_k(c, k); break; } @@ -194,8 +205,8 @@ search: return ret; } -struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos) +const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos) { size_t idx = 0; @@ -264,13 +275,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, struct journal_key n = { .btree_id = id, .level = level, - .k = k, .allocated = true, - /* - * Ensure these keys are done last by journal replay, to unblock - * journal reclaim: - */ - .journal_seq = U64_MAX, + .allocated_k = k, }; struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); @@ -278,8 +284,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, BUG_ON(test_bit(BCH_FS_rw, &c->flags)); if (idx < keys->size && - journal_key_cmp(&n, &keys->data[idx]) == 0) { - struct bkey_i *o = keys->data[idx].k; + journal_key_cmp(c, &n, &keys->data[idx]) == 0) { + struct bkey_i *o = journal_key_k(c, &keys->data[idx]); if (k->k.type == KEY_TYPE_accounting && o->k.type == KEY_TYPE_accounting) { @@ -291,7 +297,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, } if (keys->data[idx].allocated) - kfree(keys->data[idx].k); + kfree(keys->data[idx].allocated_k); keys->data[idx] = n; return 0; } @@ -376,17 +382,20 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bpos pos) { - struct journal_keys *keys = &trans->c->journal_keys; + if (!trans->journal_replay_not_finished) + return false; + + struct bch_fs *c = trans->c; + struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (!trans->journal_replay_not_finished) + if (idx >= keys->size || + keys->data[idx].btree_id != btree || + keys->data[idx].level != level) return false; - return (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - bkey_deleted(&keys->data[idx].k->k)); + struct bkey_i *k = journal_key_k(c, &keys->data[idx]); + return bpos_eq(k->k.p, pos) && bkey_deleted(&k->k); } static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) @@ -403,9 +412,9 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos bool next_overwritten = next && next->overwritten; struct journal_key_range_overwritten *prev_range = - prev_overwritten ? prev->overwritten_range : NULL; + prev_overwritten ? __overwrite_range(keys, prev->overwritten_range) : NULL; struct journal_key_range_overwritten *next_range = - next_overwritten ? next->overwritten_range : NULL; + next_overwritten ? __overwrite_range(keys, next->overwritten_range) : NULL; BUG_ON(prev_range && prev_range->end != idx); BUG_ON(next_range && next_range->start != idx + 1); @@ -413,37 +422,47 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos if (prev_range && next_range) { prev_range->end = next_range->end; - keys->data[pos].overwritten_range = prev_range; + keys->data[pos].overwritten_range = prev->overwritten_range; + + u32 old = next->overwritten_range; + for (size_t i = next_range->start; i < next_range->end; i++) { struct journal_key *ip = keys->data + idx_to_pos(keys, i); - BUG_ON(ip->overwritten_range != next_range); - ip->overwritten_range = prev_range; + BUG_ON(ip->overwritten_range != old); + ip->overwritten_range = prev->overwritten_range; } - - kfree_rcu_mightsleep(next_range); } else if (prev_range) { prev_range->end++; - k->overwritten_range = prev_range; + k->overwritten_range = prev->overwritten_range; if (next_overwritten) { prev_range->end++; - next->overwritten_range = prev_range; + next->overwritten_range = prev->overwritten_range; } } else if (next_range) { next_range->start--; - k->overwritten_range = next_range; + k->overwritten_range = next->overwritten_range; if (prev_overwritten) { next_range->start--; - prev->overwritten_range = next_range; + prev->overwritten_range = next->overwritten_range; } } else if (prev_overwritten || next_overwritten) { - struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); - if (!r) + /* 0 is a sentinel value */ + if (darray_resize_rcu(&keys->overwrites, max(keys->overwrites.nr + 1, 2))) return; - r->start = idx - (size_t) prev_overwritten; - r->end = idx + 1 + (size_t) next_overwritten; + if (!keys->overwrites.nr) + darray_push(&keys->overwrites, (struct journal_key_range_overwritten) {}); + + darray_push(&keys->overwrites, ((struct journal_key_range_overwritten) { + .start = idx - (size_t) prev_overwritten, + .end = idx + 1 + (size_t) next_overwritten, + })); + + smp_wmb(); + u32 r = keys->overwrites.nr - 1; + + k->overwritten_range = r; - rcu_assign_pointer(k->overwritten_range, r); if (prev_overwritten) prev->overwritten_range = r; if (next_overwritten) @@ -457,11 +476,15 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - !keys->data[idx].overwritten) { + if (idx >= keys->size || + keys->data[idx].btree_id != btree || + keys->data[idx].level != level || + keys->data[idx].overwritten) + return; + + struct bkey_i *k = journal_key_k(c, &keys->data[idx]); + + if (bpos_eq(k->k.p, pos)) { guard(mutex)(&keys->overwrite_lock); __bch2_journal_key_overwritten(keys, idx); } @@ -476,7 +499,7 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) } } -static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) +static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_iter *iter) { journal_iter_verify(iter); @@ -490,10 +513,10 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) BUG_ON(cmp); if (!k->overwritten) - return bkey_i_to_s_c(k->k); + return bkey_i_to_s_c(journal_key_k(c, k)); if (k->overwritten_range) - iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); + iter->idx = idx_to_pos(iter->keys, overwrite_range(iter->keys, k->overwritten_range)->end); else bch2_journal_iter_advance(iter); } @@ -554,7 +577,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter while (nr--) { bch2_btree_and_journal_iter_advance(&iter); - struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter); + struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &iter); if (!k.k) break; @@ -565,7 +588,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter bch2_bkey_buf_exit(&tmp, c); } -struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) +struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *c, struct btree_and_journal_iter *iter) { struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; size_t iters = 0; @@ -586,7 +609,7 @@ again: bch2_journal_iter_advance_btree(iter); if (iter->trans->journal_replay_not_finished) - while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && + while ((journal_k = bch2_journal_iter_peek(c, &iter->journal)).k && bpos_lt(journal_k.k->p, iter->pos)) bch2_journal_iter_advance(&iter->journal); @@ -658,15 +681,22 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, /* * When keys compare equal, oldest compares first: */ -static int journal_sort_key_cmp(const void *_l, const void *_r) +static int journal_sort_key_cmp(const void *_l, const void *_r, const void *priv) { + struct bch_fs *c = (void *) priv; const struct journal_key *l = _l; const struct journal_key *r = _r; int rewind = l->rewind && r->rewind ? -1 : 1; - return journal_key_cmp(l, r) ?: - ((cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset)) * rewind); + int cmp = journal_key_cmp(c, l, r); + if (cmp) + return cmp; + + if (l->allocated || r->allocated) + return cmp_int(l->allocated, r->allocated); + + return ((cmp_int(l->journal_seq_offset, r->journal_seq_offset) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -680,20 +710,16 @@ void bch2_journal_keys_put(struct bch_fs *c) move_gap(keys, keys->nr); - darray_for_each(*keys, i) { - if (i->overwritten_range && - (i == &darray_last(*keys) || - i->overwritten_range != i[1].overwritten_range)) - kfree(i->overwritten_range); - + darray_for_each(*keys, i) if (i->allocated) - kfree(i->k); - } + kfree(i->allocated_k); kvfree(keys->data); keys->data = NULL; keys->nr = keys->gap = keys->size = 0; + darray_exit(&keys->overwrites); + struct journal_replay **i; struct genradix_iter iter; @@ -704,8 +730,10 @@ void bch2_journal_keys_put(struct bch_fs *c) static void __journal_keys_sort(struct journal_keys *keys) { - sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), - journal_sort_key_cmp, NULL); + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + + sort_r_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), + journal_sort_key_cmp, NULL, c); cond_resched(); @@ -717,9 +745,10 @@ static void __journal_keys_sort(struct journal_keys *keys) * compare each individual accounting key against the version in * the btree during replay: */ - if (src->k->k.type != KEY_TYPE_accounting && + struct bkey_i *k = journal_key_k(c, src); + if (k->k.type != KEY_TYPE_accounting && src + 1 < &darray_top(*keys) && - !journal_key_cmp(src, src + 1)) + !journal_key_cmp(c, src, src + 1)) continue; *dst++ = *src; @@ -763,8 +792,7 @@ int bch2_journal_keys_sort(struct bch_fs *c) .btree_id = entry->btree_id, .level = entry->level, .rewind = rewind, - .k = k, - .journal_seq = le64_to_cpu(i->j.seq), + .journal_seq_offset = journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)), .journal_offset = k->_data - i->j._data, }; @@ -801,13 +829,18 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, move_gap(keys, keys->nr); - darray_for_each(*keys, i) + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (!(i->btree_id == btree && i->level >= level_min && i->level <= level_max && - bpos_ge(i->k->k.p, start) && - bpos_le(i->k->k.p, end))) + bpos_ge(k->k.p, start) && + bpos_le(k->k.p, end))) keys->data[dst++] = *i; + else if (i->allocated) + kfree(i->allocated_k); + } keys->nr = keys->gap = dst; } @@ -825,7 +858,7 @@ void bch2_journal_keys_dump(struct bch_fs *c) prt_printf(&buf, "btree="); bch2_btree_id_to_text(&buf, i->btree_id); prt_printf(&buf, " l=%u ", i->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(journal_key_k(c, i))); pr_err("%s", buf.buf); } } diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index 2a3082919b8d..8dc8e778be6c 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -29,6 +29,22 @@ struct btree_and_journal_iter { bool fail_if_too_many_whiteouts; }; +static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) +{ + return (seq - c->journal_entries_base_seq) & (~0U >> 1); +} + +static inline struct bkey_i *journal_key_k(struct bch_fs *c, + const struct journal_key *k) +{ + if (k->allocated) + return k->allocated_k; + + struct journal_replay *i = *genradix_ptr(&c->journal_entries, k->journal_seq_offset); + + return (struct bkey_i *) (i->j._data + k->journal_offset); +} + static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, unsigned l_level, const struct journal_key *r) @@ -37,25 +53,28 @@ static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, cmp_int(l_btree_id, r->btree_id); } -static inline int __journal_key_cmp(enum btree_id l_btree_id, +static inline int __journal_key_cmp(struct bch_fs *c, + enum btree_id l_btree_id, unsigned l_level, struct bpos l_pos, const struct journal_key *r) { return __journal_key_btree_cmp(l_btree_id, l_level, r) ?: - bpos_cmp(l_pos, r->k->k.p); + bpos_cmp(l_pos, journal_key_k(c, r)->k.p); } -static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) +static inline int journal_key_cmp(struct bch_fs *c, + const struct journal_key *l, const struct journal_key *r) { - return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); + return __journal_key_cmp(c, l->btree_id, l->level, + journal_key_k(c, l)->k.p, r); } -struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); -struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); -struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, +const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, unsigned, struct bpos); int bch2_btree_and_journal_iter_prefetch(struct btree_trans *, struct btree_path *, @@ -71,7 +90,7 @@ bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); -struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); +struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *, struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *); void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *, diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 86aacb254fb2..4495fc92f848 100644 --- a/fs/bcachefs/btree_journal_iter_types.h +++ b/fs/bcachefs/btree_journal_iter_types.h @@ -2,21 +2,47 @@ #ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H #define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H +struct journal_ptr { + bool csum_good; + struct bch_csum csum; + u8 dev; + u32 bucket; + u32 bucket_offset; + u64 sector; +}; + +/* + * Only used for holding the journal entries we read in btree_journal_read() + * during cache_registration + */ +struct journal_replay { + DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs; + + bool csum_good; + bool ignore_blacklisted; + bool ignore_not_dirty; + /* must be last: */ + struct jset j; +}; + struct journal_key_range_overwritten { size_t start, end; }; struct journal_key { - u64 journal_seq; - u32 journal_offset; + union { + struct { + u32 journal_seq_offset; + u32 journal_offset; + }; + struct bkey_i *allocated_k; + }; enum btree_id btree_id:8; unsigned level:8; bool allocated:1; bool overwritten:1; bool rewind:1; - struct journal_key_range_overwritten __rcu * - overwritten_range; - struct bkey_i *k; + u32 overwritten_range; }; struct journal_keys { @@ -31,7 +57,9 @@ struct journal_keys { size_t gap; atomic_t ref; bool initial_ref_held; + struct mutex overwrite_lock; + DARRAY(struct journal_key_range_overwritten) overwrites; }; #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */ diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 6b747c053e91..b618a0bd1186 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -534,7 +534,7 @@ int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, unsigned level, struct bpos node_min, struct bpos node_max) { - if (btree_id_recovers_from_scan(btree)) + if (!btree_id_recovers_from_scan(btree)) return 0; struct find_btree_nodes *f = &c->found_btree_nodes; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 4d58bdb233e9..5fa7f2f9f1e9 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -54,7 +54,7 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u); if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); if (j_k) diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index f59f018fe0d8..b70eb095a37e 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -12,7 +12,6 @@ #include "extents.h" #include "keylist.h" #include "snapshot.h" -#include "super-io.h" #include "trace.h" #include <linux/string_helpers.h> @@ -159,21 +158,6 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, return ret; } -static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, const struct bkey *k) -{ - /* - * KEY_TYPE_extent_whiteout indicates that there isn't a real extent - * present at that position: key start positions inclusive of - * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are - * monotonically increasing - */ - return btree_id_is_extents_snapshots(btree) && - bkey_deleted(k) && - !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) - ? KEY_TYPE_extent_whiteout - : KEY_TYPE_whiteout; -} - int bch2_trans_update_extent_overwrite(struct btree_trans *trans, struct btree_iter *iter, enum btree_iter_update_trigger_flags flags, @@ -419,7 +403,7 @@ __btree_trans_update_by_path(struct btree_trans *trans, i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; if (unlikely(trans->journal_replay_not_finished)) { - struct bkey_i *j_k = + const struct bkey_i *j_k = bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); if (j_k) { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 663739db82b1..18560ca80057 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -5,6 +5,7 @@ #include "btree_iter.h" #include "journal.h" #include "snapshot.h" +#include "super-io.h" struct bch_fs; struct btree; @@ -110,6 +111,22 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, : 0; } +static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, + const struct bkey *k) +{ + /* + * KEY_TYPE_extent_whiteout indicates that there isn't a real extent + * present at that position: key start positions inclusive of + * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are + * monotonically increasing + */ + return btree_id_is_extents_snapshots(btree) && + bkey_deleted(k) && + !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) + ? KEY_TYPE_extent_whiteout + : KEY_TYPE_whiteout; +} + int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *, enum btree_iter_update_trigger_flags, struct bkey_s_c, struct bkey_s_c); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 65ca54c5b0ff..a9877a47bfc6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -95,7 +95,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) if (!b->c.level) goto out; - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { if (k.k->type != KEY_TYPE_btree_ptr_v2) goto out; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 87a6f4dce296..280b169efb62 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -111,7 +111,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, CLASS(printbuf, buf)(); int ret = 0; - CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); + CLASS(bch2_dev_tryget_noerror, ca)(c, p.ptr.dev); if (!ca) { if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, trans, ptr_to_invalid_device, diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 467fc45e84fe..f6f90d421f27 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -187,6 +187,18 @@ static long bch2_ioctl_stop(struct bch_fs *c) } #endif +static int copy_ioctl_err_msg(struct bch_ioctl_err_msg *dst, struct printbuf *src, int ret) +{ + if (ret) { + prt_printf(src, "error=%s", bch2_err_str(ret)); + ret = copy_to_user_errcode((void __user *)(ulong)dst->msg_ptr, + src->buf, + min(src->pos, dst->msg_len)) ?: ret; + } + + return ret; +} + static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) { char *path; @@ -203,13 +215,37 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) if (ret) return ret; - ret = bch2_dev_add(c, path); - if (!IS_ERR(path)) - kfree(path); + CLASS(printbuf, err)(); + ret = bch2_dev_add(c, path, &err); + if (ret) + bch_err(c, "%s", err.buf); + kfree(path); return ret; } +static long bch2_ioctl_disk_add_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 arg) +{ + char *path = NULL; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (arg.flags || arg.pad) + return -EINVAL; + + path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); + ret = PTR_ERR_OR_ZERO(path); + if (ret) + return ret; + + CLASS(printbuf, err)(); + ret = bch2_dev_add(c, path, &err); + kfree(path); + return copy_ioctl_err_msg(&arg.err, &err, ret); +} + static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) { if (!capable(CAP_SYS_ADMIN)) @@ -226,7 +262,32 @@ static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) if (IS_ERR(ca)) return PTR_ERR(ca); - return bch2_dev_remove(c, ca, arg.flags); + CLASS(printbuf, err)(); + int ret = bch2_dev_remove(c, ca, arg.flags, &err); + if (ret) + bch_err(ca, "%s", err.buf); + return ret; +} + +static long bch2_ioctl_disk_remove_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 arg) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad) + return -EINVAL; + + struct bch_dev *ca = bch2_device_lookup(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + CLASS(printbuf, err)(); + int ret = bch2_dev_remove(c, ca, arg.flags, &err); + return copy_ioctl_err_msg(&arg.err, &err, ret); } static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) @@ -245,11 +306,36 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) if (ret) return ret; - ret = bch2_dev_online(c, path); + CLASS(printbuf, err)(); + ret = bch2_dev_online(c, path, &err); + if (ret) + bch_err(c, "%s", err.buf); kfree(path); return ret; } +static long bch2_ioctl_disk_online_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 arg) +{ + char *path; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (arg.flags || arg.pad) + return -EINVAL; + + path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); + ret = PTR_ERR_OR_ZERO(path); + if (ret) + return ret; + + CLASS(printbuf, err)(); + ret = bch2_dev_online(c, path, &err); + kfree(path); + return copy_ioctl_err_msg(&arg.err, &err, ret); +} + static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) { if (!capable(CAP_SYS_ADMIN)) @@ -266,7 +352,32 @@ static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) if (IS_ERR(ca)) return PTR_ERR(ca); - return bch2_dev_offline(c, ca, arg.flags); + CLASS(printbuf, err)(); + int ret = bch2_dev_offline(c, ca, arg.flags, &err); + if (ret) + bch_err(ca, "%s", err.buf); + return ret; +} + +static long bch2_ioctl_disk_offline_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 arg) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad) + return -EINVAL; + + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + CLASS(printbuf, err)(); + int ret = bch2_dev_offline(c, ca, arg.flags, &err); + return copy_ioctl_err_msg(&arg.err, &err, ret); } static long bch2_ioctl_disk_set_state(struct bch_fs *c, @@ -287,11 +398,40 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); + CLASS(printbuf, err)(); + int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags, &err); bch_err_msg(ca, ret, "setting device state"); return ret; } +static long bch2_ioctl_disk_set_state_v2(struct bch_fs *c, + struct bch_ioctl_disk_set_state_v2 arg) +{ + CLASS(printbuf, err)(); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad[0] || arg.pad[1] || arg.pad[2] || + arg.new_state >= BCH_MEMBER_STATE_NR) + return -EINVAL; + + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); + int ret = PTR_ERR_OR_ZERO(ca); + if (ret) { + prt_printf(&err, "device %llu not found\n", arg.dev); + goto err; + } + + ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags, &err); +err: + return copy_ioctl_err_msg(&arg.err, &err, ret); +} + struct bch_data_ctx { struct thread_with_file thr; @@ -620,7 +760,30 @@ static long bch2_ioctl_disk_resize(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - return bch2_dev_resize(c, ca, arg.nbuckets); + CLASS(printbuf, err)(); + int ret = bch2_dev_resize(c, ca, arg.nbuckets, &err); + if (ret) + bch_err(ca, "%s", err.buf); + return ret; +} + +static long bch2_ioctl_disk_resize_v2(struct bch_fs *c, + struct bch_ioctl_disk_resize_v2 arg) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((arg.flags & ~BCH_BY_INDEX) || + arg.pad) + return -EINVAL; + + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + CLASS(printbuf, err)(); + int ret = bch2_dev_resize(c, ca, arg.nbuckets, &err); + return copy_ioctl_err_msg(&arg.err, &err, ret); } static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, @@ -643,6 +806,28 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, return bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); } +static long bch2_ioctl_disk_resize_journal_v2(struct bch_fs *c, + struct bch_ioctl_disk_resize_journal_v2 arg) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((arg.flags & ~BCH_BY_INDEX) || + arg.pad) + return -EINVAL; + + if (arg.nbuckets > U32_MAX) + return -EINVAL; + + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + CLASS(printbuf, err)(); + int ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); + return copy_ioctl_err_msg(&arg.err, &err, ret); +} + #define BCH_IOCTL(_name, _argtype) \ do { \ _argtype i; \ @@ -684,20 +869,34 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) switch (cmd) { case BCH_IOCTL_DISK_ADD: BCH_IOCTL(disk_add, struct bch_ioctl_disk); + case BCH_IOCTL_DISK_ADD_v2: + BCH_IOCTL(disk_add_v2, struct bch_ioctl_disk_v2); case BCH_IOCTL_DISK_REMOVE: BCH_IOCTL(disk_remove, struct bch_ioctl_disk); + case BCH_IOCTL_DISK_REMOVE_v2: + BCH_IOCTL(disk_remove_v2, struct bch_ioctl_disk_v2); case BCH_IOCTL_DISK_ONLINE: BCH_IOCTL(disk_online, struct bch_ioctl_disk); + case BCH_IOCTL_DISK_ONLINE_v2: + BCH_IOCTL(disk_online_v2, struct bch_ioctl_disk_v2); case BCH_IOCTL_DISK_OFFLINE: BCH_IOCTL(disk_offline, struct bch_ioctl_disk); + case BCH_IOCTL_DISK_OFFLINE_v2: + BCH_IOCTL(disk_offline_v2, struct bch_ioctl_disk_v2); case BCH_IOCTL_DISK_SET_STATE: BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state); + case BCH_IOCTL_DISK_SET_STATE_v2: + BCH_IOCTL(disk_set_state_v2, struct bch_ioctl_disk_set_state_v2); case BCH_IOCTL_DATA: BCH_IOCTL(data, struct bch_ioctl_data); case BCH_IOCTL_DISK_RESIZE: BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize); + case BCH_IOCTL_DISK_RESIZE_v2: + BCH_IOCTL(disk_resize_v2, struct bch_ioctl_disk_resize_v2); case BCH_IOCTL_DISK_RESIZE_JOURNAL: BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal); + case BCH_IOCTL_DISK_RESIZE_JOURNAL_v2: + BCH_IOCTL(disk_resize_journal_v2, struct bch_ioctl_disk_resize_journal_v2); case BCH_IOCTL_FSCK_ONLINE: BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online); case BCH_IOCTL_QUERY_ACCOUNTING: diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c index e86d36d23e9e..6940037bd19e 100644 --- a/fs/bcachefs/darray.c +++ b/fs/bcachefs/darray.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/log2.h> +#include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include "darray.h" -int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp) +int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp, + bool rcu) { if (new_size > d->size) { new_size = roundup_pow_of_two(new_size); @@ -20,18 +22,25 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ if (unlikely(check_mul_overflow(new_size, element_size, &bytes))) return -ENOMEM; - void *data = likely(bytes < INT_MAX) + void *old = d->data; + void *new = likely(bytes < INT_MAX) ? kvmalloc_noprof(bytes, gfp) : vmalloc_noprof(bytes); - if (!data) + if (!new) return -ENOMEM; if (d->size) - memcpy(data, d->data, d->size * element_size); - if (d->data != d->preallocated) - kvfree(d->data); - d->data = data; + memcpy(new, old, d->size * element_size); + + rcu_assign_pointer(d->data, new); d->size = new_size; + + if (old != d->preallocated) { + if (!rcu) + kvfree(old); + else + kvfree_rcu_mightsleep(old); + } } return 0; diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 4080ee99aadd..b4f284fe9652 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -34,17 +34,17 @@ typedef DARRAY(s16) darray_s16; typedef DARRAY(s32) darray_s32; typedef DARRAY(s64) darray_s64; -int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); +int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool); #define __bch2_darray_resize(...) alloc_hooks(__bch2_darray_resize_noprof(__VA_ARGS__)) -#define __darray_resize(_d, _element_size, _new_size, _gfp) \ +#define __darray_resize(_d, _element_size, _new_size, _gfp, _rcu) \ (unlikely((_new_size) > (_d)->size) \ - ? __bch2_darray_resize((_d), (_element_size), (_new_size), (_gfp))\ + ? __bch2_darray_resize((_d), (_element_size), (_new_size), (_gfp), _rcu)\ : 0) #define darray_resize_gfp(_d, _new_size, _gfp) \ - __darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp) + __darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp, false) #define darray_resize(_d, _new_size) \ darray_resize_gfp(_d, _new_size, GFP_KERNEL) @@ -55,6 +55,12 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); #define darray_make_room(_d, _more) \ darray_make_room_gfp(_d, _more, GFP_KERNEL) +#define darray_resize_rcu(_d, _new_size) \ + __darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), GFP_KERNEL, true) + +#define darray_make_room_rcu(_d, _more) \ + darray_resize_rcu((_d), (_d)->nr + (_more)) + #define darray_room(_d) ((_d).size - (_d).nr) #define darray_top(_d) ((_d).data[(_d).nr]) @@ -107,8 +113,11 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); #define __darray_for_each(_d, _i) \ for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++) +#define darray_for_each_from(_d, _i, _start) \ + for (typeof(&(_d).data[0]) _i = _start; _i < (_d).data + (_d).nr; _i++) + #define darray_for_each(_d, _i) \ - for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) + darray_for_each_from(_d, _i, (_d).data) #define darray_for_each_reverse(_d, _i) \ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index cb44b35e0f1d..fe6f3d874a47 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -95,7 +95,7 @@ static u64 bch2_dirent_hash(const struct bch_hash_info *info, bch2_str_hash_update(&ctx, info, name->name, name->len); /* [0,2) reserved for dots */ - return max_t(u64, bch2_str_hash_end(&ctx, info), 2); + return max_t(u64, bch2_str_hash_end(&ctx, info, true), 2); } static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 5944ad6d0f8d..809c76b68ba8 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -734,6 +734,37 @@ invalid_device: goto fsck_err; } +static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i) +{ + struct journal_keys *keys = &c->journal_keys; + struct bkey_i *k = journal_key_k(c, i); + + darray_for_each_from(*keys, j, i + 1) { + if (journal_key_cmp(c, i, j)) + return j; + + struct bkey_i *n = journal_key_k(c, j); + if (n->k.type == KEY_TYPE_accounting) { + WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0); + + bch2_accounting_accumulate(bkey_i_to_accounting(k), + bkey_i_to_s_c_accounting(n)); + j->overwritten = true; + } + } + + return &darray_top(*keys); +} + +static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i) +{ + struct bch_fs *c = trans->c; + struct journal_key *next = accumulate_newer_accounting_keys(c, i); + + int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i))); + return ret ? ERR_PTR(ret) : next; +} + /* * At startup time, initialize the in memory accounting from the btree (and * journal) @@ -759,80 +790,76 @@ int bch2_accounting_read(struct bch_fs *c) percpu_memset(c->usage, 0, sizeof(*c->usage)); } + struct journal_keys *keys = &c->journal_keys; + struct journal_key *jk = keys->data; + + while (jk < &darray_top(*keys) && + __journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0) + jk++; + + struct journal_key *end = jk; + while (end < &darray_top(*keys) && + __journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0) + end++; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); iter.flags &= ~BTREE_ITER_with_journal; int ret = for_each_btree_key_continue(trans, iter, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); - - if (k.k->type != KEY_TYPE_accounting) - continue; - - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) - break; - - if (!bch2_accounting_is_mem(&acc_k)) { - struct disk_accounting_pos next; - memset(&next, 0, sizeof(next)); - next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; - } + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); - accounting_read_key(trans, k); - })); - bch2_trans_iter_exit(&iter); - if (ret) - return ret; - - struct journal_keys *keys = &c->journal_keys; - struct journal_key *dst = keys->data; - move_gap(keys, keys->nr); - - darray_for_each(*keys, i) { - if (i->k->k.type == KEY_TYPE_accounting) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); + if (k.k->type != KEY_TYPE_accounting) + continue; - if (!bch2_accounting_is_mem(&acc_k)) - continue; + while (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0) + jk = accumulate_and_read_journal_accounting(trans, jk); - struct bkey_s_c k = bkey_i_to_s_c(i->k); - unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, - sizeof(acc->k.data[0]), - accounting_pos_cmp, &k.k->p); + while (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 && + bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) { + jk->overwritten = true; + jk++; + } - bool applied = idx < acc->k.nr && - bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0; + if (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0) + jk = accumulate_and_read_journal_accounting(trans, jk); - if (applied) - continue; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); - if (i + 1 < &darray_top(*keys) && - i[1].k->k.type == KEY_TYPE_accounting && - !journal_key_cmp(i, i + 1)) { - WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + break; - i[1].journal_seq = i[0].journal_seq; + if (!bch2_accounting_is_mem(&acc_k)) { + struct disk_accounting_pos next_acc; + memset(&next_acc, 0, sizeof(next_acc)); + next_acc.type = acc_k.type + 1; + struct bpos next = disk_accounting_pos_to_bpos(&next_acc); + if (jk < end) + next = bpos_min(next, journal_key_k(c, jk)->k.p); + + bch2_btree_iter_set_pos(&iter, next); + continue; + } - bch2_accounting_accumulate(bkey_i_to_accounting(i[1].k), - bkey_s_c_to_accounting(k)); - continue; - } + accounting_read_key(trans, k); + })); + bch2_trans_iter_exit(&iter); + if (ret) + return ret; - ret = accounting_read_key(trans, k); - if (ret) - return ret; - } + while (jk < end) + jk = accumulate_and_read_journal_accounting(trans, jk); - *dst++ = *i; - } + struct journal_key *dst = keys->data; + darray_for_each(*keys, i) + if (!i->overwritten) + *dst++ = *i; keys->gap = keys->nr = dst - keys->data; guard(percpu_write)(&c->mark_lock); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 85ec9f877c18..c2840cb674b2 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -35,8 +35,6 @@ #include <linux/raid/pq.h> #include <linux/raid/xor.h> -static bool bch2_stripe_is_open(struct bch_fs *, u64); - static void raid5_recov(unsigned disks, unsigned failed_idx, size_t size, void **data) { @@ -388,20 +386,11 @@ int bch2_trigger_stripe(struct btree_trans *trans, new_s->nr_redundant != old_s->nr_redundant)); if (flags & BTREE_TRIGGER_transactional) { - u64 old_lru_pos = stripe_lru_pos(old_s); - u64 new_lru_pos = stripe_lru_pos(new_s); - - if (new_lru_pos == STRIPE_LRU_POS_EMPTY && - !bch2_stripe_is_open(c, idx)) { - _new.k->type = KEY_TYPE_deleted; - set_bkey_val_u64s(_new.k, 0); - new_s = NULL; - new_lru_pos = 0; - } - int ret = bch2_lru_change(trans, - BCH_LRU_STRIPE_FRAGMENTATION, idx, - old_lru_pos, new_lru_pos); + BCH_LRU_STRIPE_FRAGMENTATION, + idx, + stripe_lru_pos(old_s), + stripe_lru_pos(new_s)); if (ret) return ret; } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 32a286b3a74e..e33f3166c48a 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -141,14 +141,16 @@ void bch2_io_error_work(struct work_struct *work) if (ca->mi.state >= BCH_MEMBER_STATE_ro) return; - bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, - BCH_FORCE_IF_DEGRADED); CLASS(printbuf, buf)(); __bch2_log_msg_start(ca->name, &buf); - prt_printf(&buf, "writes erroring for %u seconds, setting %s ro", - c->opts.write_error_timeout, - dev ? "device" : "filesystem"); + prt_printf(&buf, "writes erroring for %u seconds\n", + c->opts.write_error_timeout); + + bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, + BCH_FORCE_IF_DEGRADED, &buf); + + prt_printf(&buf, "setting %s ro", dev ? "device" : "filesystem"); if (!dev) bch2_fs_emergency_read_only2(c, &buf); diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index c4b0ea1adaa8..73eb28090bc7 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -98,11 +98,13 @@ static int count_iters_for_insert(struct btree_trans *trans, return ret2 ?: ret; } -int bch2_extent_atomic_end(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos *end) +int bch2_extent_trim_atomic(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *insert) { - unsigned nr_iters = 0; + enum bch_bkey_type whiteout_type = + extent_whiteout_type(trans->c, iter->btree_id, &insert->k); + struct bpos end = insert->k.p; struct btree_iter copy; bch2_trans_copy_iter(©, iter); @@ -111,42 +113,60 @@ int bch2_extent_atomic_end(struct btree_trans *trans, if (ret) goto err; + copy.flags |= BTREE_ITER_nofilter_whiteouts; + + /* + * We're doing our own whiteout filtering, but we still need to pass a + * max key to avoid popping an assert in bch2_snapshot_is_ancestor(): + */ struct bkey_s_c k; - for_each_btree_key_max_continue_norestart(copy, *end, 0, k, ret) { + unsigned nr_iters = 0; + for_each_btree_key_max_continue_norestart(copy, + POS(insert->k.p.inode, U64_MAX), + 0, k, ret) { unsigned offset = 0; if (bkey_gt(iter->pos, bkey_start_pos(k.k))) offset = iter->pos.offset - bkey_start_offset(k.k); - ret = count_iters_for_insert(trans, k, offset, end, &nr_iters); - if (ret) - break; + if (bkey_extent_whiteout(k.k)) { + if (bpos_gt(k.k->p, insert->k.p)) { + if (k.k->type == KEY_TYPE_extent_whiteout) + break; + else + continue; + } else if (k.k->type != whiteout_type) { + nr_iters += 1; + if (nr_iters >= EXTENT_ITERS_MAX) { + end = bpos_min(end, k.k->p); + break; + } + } + } else { + if (bpos_ge(bkey_start_pos(k.k), end)) + break; + + ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters); + if (ret) + break; + } } err: bch2_trans_iter_exit(©); - return ret < 0 ? ret : 0; -} - -int bch2_extent_trim_atomic(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *k) -{ - struct bpos end = k->k.p; - int ret = bch2_extent_atomic_end(trans, iter, &end); - if (ret) + if (ret < 0) return ret; /* tracepoint */ - if (bpos_lt(end, k->k.p)) { + if (bpos_lt(end, insert->k.p)) { if (trace_extent_trim_atomic_enabled()) { CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, end); prt_newline(&buf); - bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(insert)); trace_extent_trim_atomic(trans->c, buf.buf); } - bch2_cut_back(end, k); + bch2_cut_back(end, insert); } return 0; } diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h index 34467db53f45..2d956d971b11 100644 --- a/fs/bcachefs/extent_update.h +++ b/fs/bcachefs/extent_update.h @@ -4,8 +4,6 @@ #include "bcachefs.h" -int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *, - struct bpos *); int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *, struct bkey_i *); diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 0005569ecace..fd8beb5167ee 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -42,6 +42,14 @@ struct readpages_iter { folios folios; }; +static inline void readpages_iter_folio_revert(struct readahead_control *ractl, + struct folio *folio) +{ + bch2_folio_release(folio); + ractl->_nr_pages += folio_nr_pages(folio); + ractl->_index -= folio_nr_pages(folio); +} + static int readpages_iter_init(struct readpages_iter *iter, struct readahead_control *ractl) { @@ -52,9 +60,7 @@ static int readpages_iter_init(struct readpages_iter *iter, while ((folio = __readahead_folio(ractl))) { if (!bch2_folio_create(folio, GFP_KERNEL) || darray_push(&iter->folios, folio)) { - bch2_folio_release(folio); - ractl->_nr_pages += folio_nr_pages(folio); - ractl->_index -= folio_nr_pages(folio); + readpages_iter_folio_revert(ractl, folio); return iter->folios.nr ? 0 : -ENOMEM; } @@ -64,6 +70,15 @@ static int readpages_iter_init(struct readpages_iter *iter, return 0; } +static void readpages_iter_exit(struct readpages_iter *iter, + struct readahead_control *ractl) +{ + darray_for_each_reverse(iter->folios, folio) { + readpages_iter_folio_revert(ractl, *folio); + folio_get(*folio); + } +} + static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) { if (iter->idx >= iter->folios.nr) @@ -290,7 +305,10 @@ void bch2_readahead(struct readahead_control *ractl) * scheduling. */ blk_start_plug(&plug); - bch2_pagecache_add_get(inode); + if (!bch2_pagecache_add_tryget(inode)) { + readpages_iter_exit(&readpages_iter, ractl); + goto out; + } struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { @@ -317,6 +335,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_put(trans); bch2_pagecache_add_put(inode); +out: blk_finish_plug(&plug); darray_exit(&readpages_iter.folios); } @@ -759,7 +778,6 @@ int bch2_write_end(struct file *file, struct address_space *mapping, struct bch2_folio_reservation *res = fsdata; unsigned offset = pos - folio_pos(folio); - lockdep_assert_held(&inode->v.i_rwsem); BUG_ON(offset + copied > folio_size(folio)); if (unlikely(copied < len && !folio_test_uptodate(folio))) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 52722a5e8526..0425238a83ee 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -857,9 +857,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, if (IS_ERR(inode)) return bch2_err_class(PTR_ERR(inode)); - inode_lock(&inode->v); ret = page_symlink(&inode->v, symname, strlen(symname) + 1); - inode_unlock(&inode->v); if (unlikely(ret)) goto err; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 01c1c6372229..ccc44b1fc178 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -266,7 +266,8 @@ create_lostfound: root_inode.bi_nlink++; - ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); + ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu, + inode_opt_get(c, &root_inode, inodes_32bit)); if (ret) goto err; @@ -573,7 +574,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub new_inode.bi_subvol = subvolid; - int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: + int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu, false) ?: bch2_btree_iter_traverse(&inode_iter) ?: bch2_inode_write(trans, &inode_iter, &new_inode); bch2_trans_iter_exit(&inode_iter); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index d5e5190f0663..4aa130ff7cf6 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -944,11 +944,12 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, } static struct bkey_i_inode_alloc_cursor * -bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) +bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max, + bool is_32bit) { struct bch_fs *c = trans->c; - u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; + u64 cursor_idx = is_32bit ? 0 : cpu + 1; cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); @@ -967,7 +968,7 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m if (IS_ERR(cursor)) return cursor; - if (c->opts.inodes_32bit) { + if (is_32bit) { *min = BLOCKDEV_INODE_MAX; *max = INT_MAX; } else { @@ -996,11 +997,11 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m int bch2_inode_create(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode_u, - u32 snapshot, u64 cpu) + u32 snapshot, u64 cpu, bool is_32bit) { u64 min, max; struct bkey_i_inode_alloc_cursor *cursor = - bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); + bch2_inode_alloc_cursor_get(trans, cpu, &min, &max, is_32bit); int ret = PTR_ERR_OR_ZERO(cursor); if (ret) return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index b8ec3e628d90..79092ea74844 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -172,7 +172,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_unpacked *); int bch2_inode_create(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, u32, u64); + struct bch_inode_unpacked *, u32, u64, bool); int bch2_inode_rm(struct bch_fs *, subvol_inum); diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 1f00938b1bdc..e07fa6cc99bd 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -144,7 +144,8 @@ enum inode_opt_id { x(unlinked, 7) \ x(backptr_untrusted, 8) \ x(has_child_snapshot, 9) \ - x(has_case_insensitive, 10) + x(has_case_insensitive, 10) \ + x(31bit_dirent_offset, 11) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 07869436a964..93ac0faedf7d 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -120,6 +120,7 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) INIT_LIST_HEAD(&p->flushed[i]); atomic_set(&p->count, count); p->devs.nr = 0; + p->bytes = 0; } /* @@ -264,6 +265,11 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t /* Close out old buffer: */ buf->data->u64s = cpu_to_le32(old.cur_entry_offset); + struct journal_entry_pin_list *pin_list = + journal_seq_pin(j, journal_cur_seq(j)); + pin_list->bytes = roundup_pow_of_two(vstruct_bytes(buf->data)); + j->dirty_entry_bytes += pin_list->bytes; + if (trace_journal_entry_close_enabled() && trace) { CLASS(printbuf, err)(); guard(printbuf_atomic)(&err); @@ -737,9 +743,9 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, return ret; CLASS(printbuf, buf)(); + prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); bch2_journal_debug_to_text(&buf, j); bch2_print_str(c, KERN_ERR, buf.buf); - prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); closure_wait_event(&j->async_wait, !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 093e4acad085..c5458c61f49a 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "btree_io.h" +#include "btree_journal_iter.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "buckets.h" @@ -106,11 +107,6 @@ static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *cs return !bch2_crc_cmp(j->csum, *csum); } -static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) -{ - return (seq - c->journal_entries_base_seq) & (~0U >> 1); -} - static void __journal_replay_free(struct bch_fs *c, struct journal_replay *i) { @@ -195,6 +191,23 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, } } + /* Drop overwrites, log entries if we don't need them: */ + if (!c->opts.retain_recovery_info && + !c->opts.journal_rewind) { + struct jset_entry *dst = j->start; + vstruct_for_each_safe(j, src) { + if (src->type == BCH_JSET_ENTRY_log || + src->type == BCH_JSET_ENTRY_overwrite) + continue; + + memcpy(dst, src, vstruct_bytes(src)); + dst = vstruct_next(dst); + } + + j->u64s = cpu_to_le32((u64 *) dst - j->_data); + bytes = vstruct_bytes(j); + } + jlist->last_seq = max(jlist->last_seq, last_seq); _i = genradix_ptr_alloc(&c->journal_entries, @@ -209,6 +222,8 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, */ dup = *_i; if (dup) { + BUG_ON(dup->j.seq != j->seq); + bool identical = bytes == vstruct_bytes(&dup->j) && !memcmp(j, &dup->j, bytes); bool not_identical = !identical && @@ -239,6 +254,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, if (entry_ptr.csum_good && !identical) goto replace; + BUG_ON(dup->j.seq != j->seq); return ret; } replace: diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h index f53c5c81d137..f8754bf71264 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -7,29 +7,6 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *); void bch2_journal_pos_from_member_info_resume(struct bch_fs *); -struct journal_ptr { - bool csum_good; - struct bch_csum csum; - u8 dev; - u32 bucket; - u32 bucket_offset; - u64 sector; -}; - -/* - * Only used for holding the journal entries we read in btree_journal_read() - * during cache_registration - */ -struct journal_replay { - DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs; - - bool csum_good; - bool ignore_blacklisted; - bool ignore_not_dirty; - /* must be last: */ - struct jset j; -}; - static inline bool journal_replay_ignore(struct journal_replay *i) { return !i || i->ignore_blacklisted || i->ignore_not_dirty; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index f23e5ee9ad75..bd1885607d3e 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -148,6 +148,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); + size_t mem_limit = max_t(ssize_t, 0, + (totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes); + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { if (!ca->journal.nr || !ca->mi.durability) @@ -180,6 +183,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne * @nr_devs_want largest devices: */ space = dev_space[nr_devs_want - 1]; + space.total = min(space.total, mem_limit >> 9); space.next_entry = min(space.next_entry, min_bucket_size); return space; } @@ -328,9 +332,17 @@ void bch2_journal_reclaim_fast(struct journal *j) * Unpin journal entries whose reference counts reached zero, meaning * all btree nodes got written out */ + struct journal_entry_pin_list *pin_list; while (!fifo_empty(&j->pin) && j->pin.front <= j->seq_ondisk && - !atomic_read(&fifo_peek_front(&j->pin).count)) { + !atomic_read(&(pin_list = &fifo_peek_front(&j->pin))->count)) { + + if (WARN_ON(j->dirty_entry_bytes < pin_list->bytes)) + pin_list->bytes = j->dirty_entry_bytes; + + j->dirty_entry_bytes -= pin_list->bytes; + pin_list->bytes = 0; + j->pin.front++; popped = true; } diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 51104bbb99da..7c9273bd0e15 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -71,6 +71,7 @@ struct journal_entry_pin_list { struct list_head flushed[JOURNAL_PIN_TYPE_NR]; atomic_t count; struct bch_devs_list devs; + size_t bytes; }; struct journal; @@ -253,6 +254,7 @@ struct journal { u64 front, back, size, mask; struct journal_entry_pin_list *data; } pin; + size_t dirty_entry_bytes; struct journal_space space[journal_space_nr]; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index a66d01d04e57..892990b4a6a6 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -125,6 +125,10 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, if (!btree_type_has_ptrs(id)) continue; + /* Stripe keys have pointers, but are handled separately */ + if (id == BTREE_ID_stripes) + continue; + int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index df6833416855..4f41f1f6ec6c 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -819,7 +819,9 @@ static int bch2_move_data(struct bch_fs *c, unsigned min_depth_this_btree = min_depth; - if (!btree_type_has_ptrs(id)) + /* Stripe keys have pointers, but are handled separately */ + if (!btree_type_has_ptrs(id) || + id == BTREE_ID_stripes) min_depth_this_btree = max(min_depth_this_btree, 1); for (unsigned level = min_depth_this_btree; diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index d1019052f182..5c321a0d1f89 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -62,7 +62,8 @@ int bch2_create_trans(struct btree_trans *trans, if (flags & BCH_CREATE_TMPFILE) new_inode->bi_flags |= BCH_INODE_unlinked; - ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); + ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu, + inode_opt_get(c, dir_u, inodes_32bit)); if (ret) goto err; diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 921f9049912d..c3ef35dc01e2 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -525,7 +525,7 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id switch (id) { case Opt_state: if (ca) - return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED); + return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED, NULL); break; case Opt_compression: diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 84ce69a7f131..31a3abcbd83e 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -242,7 +242,7 @@ enum fsck_err_opts { x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - BCH_SB_INODE_32BIT, true, \ + BCH_SB_INODE_32BIT, false, \ NULL, "Constrain inode numbers to 32 bits") \ x(shard_inode_numbers_bits, u8, \ OPT_FS|OPT_FORMAT, \ @@ -321,6 +321,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Don't kick drives out when splitbrain detected")\ + x(no_version_check, u8, \ + OPT_HIDDEN, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Don't fail reading the superblock due to incompatible version")\ x(verbose, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 17ca56b0e2ac..e1db63d75a99 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -444,8 +444,9 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_init(&sk); - ret = bkey_err(k = next_rebalance_extent(trans, work_pos, - extent_iter, &io_opts, &data_opts)); + ret = lockrestart_do(trans, + bkey_err(k = next_rebalance_extent(trans, work_pos, + extent_iter, &io_opts, &data_opts))); if (ret || !k.k) goto out; @@ -587,7 +588,7 @@ static int do_rebalance(struct moving_context *ctxt) ret = k->k.type == KEY_TYPE_cookie ? do_rebalance_scan(ctxt, k->k.p.inode, le64_to_cpu(bkey_i_to_cookie(k)->v.cookie)) - : lockrestart_do(trans, do_rebalance_extent(ctxt, k->k.p, &extent_iter)); + : do_rebalance_extent(ctxt, k->k.p, &extent_iter); if (ret) break; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 29e81f96db0f..6319144a440c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -64,7 +64,6 @@ int bch2_btree_lost_data(struct bch_fs *c, * but in debug mode we want the next fsck run to be clean: */ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); @@ -182,9 +181,12 @@ void bch2_reconstruct_alloc(struct bch_fs *c) */ static void zero_out_btree_mem_ptr(struct journal_keys *keys) { - darray_for_each(*keys, i) - if (i->k->k.type == KEY_TYPE_btree_ptr_v2) - bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; + struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys); + darray_for_each(*keys, i) { + struct bkey_i *k = journal_key_k(c, i); + if (k->k.type == KEY_TYPE_btree_ptr_v2) + bkey_i_to_btree_ptr_v2(k)->v.mem_ptr = 0; + } } /* journal replay: */ @@ -202,8 +204,10 @@ static void replay_now_at(struct journal *j, u64 seq) static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct journal_key *k) { + struct bch_fs *c = trans->c; + struct bkey_i *bk = journal_key_k(c, k); struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, BTREE_ITER_intent); int ret = bch2_btree_iter_traverse(&iter); @@ -214,14 +218,14 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); /* Has this delta already been applied to the btree? */ - if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) { + if (bversion_cmp(old.k->bversion, bk->k.bversion) >= 0) { ret = 0; goto out; } - struct bkey_i *new = k->k; + struct bkey_i *new = bk; if (old.k->type == KEY_TYPE_accounting) { - new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k)); + new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(bk)); ret = PTR_ERR_OR_ZERO(new); if (ret) goto out; @@ -230,7 +234,8 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, bkey_s_c_to_accounting(old)); } - trans->journal_res.seq = k->journal_seq; + if (!k->allocated) + trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset; ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun); out: @@ -241,6 +246,7 @@ out: static int bch2_journal_replay_key(struct btree_trans *trans, struct journal_key *k) { + struct bch_fs *c = trans->c; struct btree_iter iter; unsigned iter_flags = BTREE_ITER_intent| @@ -251,7 +257,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) return 0; - trans->journal_res.seq = k->journal_seq; + if (!k->allocated) + trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset; /* * BTREE_UPDATE_key_cache_reclaim disables key cache lookup/update to @@ -266,7 +273,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, else update_flags |= BTREE_UPDATE_key_cache_reclaim; - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + struct bkey_i *bk = journal_key_k(c, k); + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, k->level, iter_flags); ret = bch2_btree_iter_traverse(&iter); @@ -275,13 +283,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, &iter); if (unlikely(!btree_path_node(path, k->level))) { - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); prt_str(&buf, "btree="); bch2_btree_id_to_text(&buf, k->btree_id); prt_printf(&buf, " level=%u ", k->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(bk)); if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { @@ -298,7 +304,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, } bch2_trans_iter_exit(&iter); - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(&iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: @@ -310,17 +316,17 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; - if (k->k->k.type == KEY_TYPE_accounting) { - struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s); + if (bk->k.type == KEY_TYPE_accounting) { + struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, bk->k.u64s); ret = PTR_ERR_OR_ZERO(n); if (ret) goto out; - bkey_copy(n, k->k); + bkey_copy(n, bk); goto out; } - ret = bch2_trans_update(trans, &iter, k->k, update_flags); + ret = bch2_trans_update(trans, &iter, bk, update_flags); out: bch2_trans_iter_exit(&iter); return ret; @@ -331,13 +337,9 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - /* - * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last - * - * journal_seq == 0 means that the key comes from early repair, and - * should be inserted last so as to avoid overflowing the journal - */ - return cmp_int(l->journal_seq - 1, r->journal_seq - 1); + return !l->allocated && !r->allocated + ? cmp_int(l->journal_seq_offset, r->journal_seq_offset) + : cmp_int(l->allocated, r->allocated); } DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *) @@ -369,7 +371,9 @@ int bch2_journal_replay(struct bch_fs *c) * flush accounting keys until we're done */ darray_for_each(*keys, k) { - if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated)) + struct bkey_i *bk = journal_key_k(trans->c, k); + + if (!(bk->k.type == KEY_TYPE_accounting && !k->allocated)) continue; cond_resched(); @@ -412,7 +416,6 @@ int bch2_journal_replay(struct bch_fs *c) BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting); if (ret) { ret = darray_push(&keys_sorted, k); if (ret) @@ -434,8 +437,8 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - if (k->journal_seq) - replay_now_at(j, k->journal_seq); + if (!k->allocated) + replay_now_at(j, c->journal_entries_base_seq + k->journal_seq_offset); else replay_now_at(j, j->replay_journal_seq_end); diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h index b63c20558d3d..2696eee00345 100644 --- a/fs/bcachefs/recovery_passes_format.h +++ b/fs/bcachefs/recovery_passes_format.h @@ -37,7 +37,7 @@ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \ - x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ + x(check_backpointers_to_extents, 13, PASS_ONLINE) \ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 0784283ce78c..3ffd68d2608d 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -784,7 +784,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { /* Query replicas: */ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, - unsigned flags, bool print) + unsigned flags, struct printbuf *err) { struct bch_replicas_entry_v1 *e; @@ -823,16 +823,14 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, : BCH_FORCE_IF_DATA_DEGRADED; if (dflags & ~flags) { - if (print) { - CLASS(printbuf, buf)(); - - bch2_replicas_entry_to_text(&buf, e); - bch_err(c, "insufficient devices online (%u) for replicas entry %s", - nr_online, buf.buf); + if (err) { + prt_printf(err, "insufficient devices online (%u) for replicas entry ", + nr_online); + bch2_replicas_entry_to_text(err, e); + prt_newline(err); } return false; } - } return true; diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 5aba2c1ce133..15023a9b0b1e 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -44,7 +44,7 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e, } bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask, - unsigned, bool); + unsigned, struct printbuf *); unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned); unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index 44bc12573a0c..96ad64920810 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -22,7 +22,7 @@ enum counters_flags { x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ - x(io_read_fail_and_poison, 82, TYPE_COUNTER) \ + x(io_read_fail_and_poison, 95, TYPE_COUNTER) \ x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ @@ -124,4 +124,13 @@ struct bch_sb_field_counters { __le64 d[]; }; +static inline void __maybe_unused check_bch_counter_ids_unique(void) { + switch(0){ +#define x(t, n, ...) case (n): + BCH_PERSISTENT_COUNTERS() +#undef x + ; + } +} + #endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */ diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index e3c73d903898..d26a0ca4a59d 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -36,10 +36,12 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { - if (dev != BCH_SB_MEMBER_INVALID) + if (dev != BCH_SB_MEMBER_INVALID) { bch2_fs_inconsistent(c, "pointer to %s device %u", test_bit(dev, c->devs_removed.d) ? "removed" : "nonexistent", dev); + dump_stack(); + } } void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 84f987d3a02a..eab0c1e3ff56 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1673,7 +1673,8 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, return ret; darray_for_each(*deleted, i) - nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); + nr_deleted_ancestors += bch2_snapshots_same_tree(c, s->k.p.offset, i->id) && + bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); if (!nr_deleted_ancestors) return 0; diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index fef32a0118c4..28d9a29a1fd0 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -51,6 +51,17 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) return s ? s->tree : 0; } +static inline bool bch2_snapshots_same_tree(struct bch_fs *c, u32 id1, u32 id2) +{ + if (id1 == id2) + return true; + + guard(rcu)(); + const struct snapshot_t *s1 = snapshot_t(c, id1); + const struct snapshot_t *s2 = snapshot_t(c, id2); + return s1 && s2 && s1->tree == s2->tree; +} + static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { const struct snapshot_t *s = snapshot_t(c, id); @@ -157,6 +168,10 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { + EBUG_ON(!id); + EBUG_ON(!ancestor); + EBUG_ON(!bch2_snapshots_same_tree(c, id, ancestor)); + return id == ancestor ? true : __bch2_snapshot_is_ancestor(c, id, ancestor); diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 8c0fb44929cc..2a61cc36ddbf 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -34,6 +34,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u32 inum_snapshot; u8 type; + bool is_31bit; struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of @@ -48,6 +49,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) struct bch_hash_info info = { .inum_snapshot = bi->bi_snapshot, .type = INODE_STR_HASH(bi), + .is_31bit = bi->bi_flags & BCH_INODE_31bit_dirent_offset, .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, .siphash_key = { .k0 = bi->bi_hash_seed } }; @@ -112,8 +114,8 @@ static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx, } } -static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, - const struct bch_hash_info *info) +static inline u64 __bch2_str_hash_end(struct bch_str_hash_ctx *ctx, + const struct bch_hash_info *info) { switch (info->type) { case BCH_STR_HASH_crc32c: @@ -128,6 +130,14 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, } } +static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, + const struct bch_hash_info *info, + bool maybe_31bit) +{ + return __bch2_str_hash_end(ctx, info) & + (maybe_31bit && info->is_31bit ? INT_MAX : U64_MAX); +} + struct bch_hash_desc { enum btree_id btree_id; u8 key_type; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index be7ed612d28f..61eeac671283 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -89,7 +89,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v prt_str(&buf, "requested incompat feature "); bch2_version_to_text(&buf, version); prt_str(&buf, " currently not enabled, allowed up to "); - bch2_version_to_text(&buf, version); + bch2_version_to_text(&buf, c->sb.version_incompat_allowed); prt_printf(&buf, "\n set version_upgrade=incompat to enable"); bch_notice(c, "%s", buf.buf); @@ -379,7 +379,7 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) return 0; } -int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, +int bch2_sb_validate(struct bch_sb *sb, struct bch_opts *opts, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { enum bch_opt_id opt_id; @@ -389,28 +389,30 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, if (ret) return ret; - u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); - unsigned incompat_bit = 0; - if (incompat) - incompat_bit = __ffs64(incompat); - else if (sb->features[1]) - incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); - - if (incompat_bit) { - prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", - incompat_bit, - bch2_sb_features[BCH_FEATURE_NR - 1], - BCH_FEATURE_NR - 1); - return -BCH_ERR_invalid_sb_features; - } + if (!opts->no_version_check) { + u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); + unsigned incompat_bit = 0; + if (incompat) + incompat_bit = __ffs64(incompat); + else if (sb->features[1]) + incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); + + if (incompat_bit) { + prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", + incompat_bit, + bch2_sb_features[BCH_FEATURE_NR - 1], + BCH_FEATURE_NR - 1); + return -BCH_ERR_invalid_sb_features; + } - if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || - BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { - prt_str(out, "Filesystem has incompatible version "); - bch2_version_to_text(out, le16_to_cpu(sb->version)); - prt_str(out, ", current version "); - bch2_version_to_text(out, bcachefs_metadata_version_current); - return -BCH_ERR_invalid_sb_features; + if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || + BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { + prt_str(out, "Filesystem has incompatible version "); + bch2_version_to_text(out, le16_to_cpu(sb->version)); + prt_str(out, ", current version "); + bch2_version_to_text(out, bcachefs_metadata_version_current); + return -BCH_ERR_invalid_sb_features; + } } if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { @@ -915,7 +917,7 @@ got_super: sb->have_layout = true; - ret = bch2_sb_validate(sb->sb, offset, 0, &err); + ret = bch2_sb_validate(sb->sb, opts, offset, 0, &err); if (ret) { bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); @@ -1081,9 +1083,10 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_from_fs(c, (*ca)); darray_for_each(online_devices, ca) { + struct bch_opts opts = bch2_opts_empty(); printbuf_reset(&err); - ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err); + ret = bch2_sb_validate((*ca)->disk_sb.sb, &opts, 0, BCH_VALIDATE_write, &err); if (ret) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); goto out; @@ -1186,13 +1189,13 @@ int bch2_write_super(struct bch_fs *c) nr_wrote = dev_mask_nr(&sb_written); can_mount_with_written = - bch2_have_enough_devs(c, sb_written, degraded_flags, false); + bch2_have_enough_devs(c, sb_written, degraded_flags, NULL); for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++) sb_written.d[i] = ~sb_written.d[i]; can_mount_without_written = - bch2_have_enough_devs(c, sb_written, degraded_flags, false); + bch2_have_enough_devs(c, sb_written, degraded_flags, NULL); /* * If we would be able to mount _without_ the devices we successfully diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index a3b7a90f2533..82cb3a3ceeae 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -92,7 +92,8 @@ int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *); void bch2_free_super(struct bch_sb_handle *); int bch2_sb_realloc(struct bch_sb_handle *, unsigned); -int bch2_sb_validate(struct bch_sb *, u64, enum bch_validate_flags, struct printbuf *); +int bch2_sb_validate(struct bch_sb *, struct bch_opts *, u64, + enum bch_validate_flags, struct printbuf *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_read_super_silent(const char *, struct bch_opts *, struct bch_sb_handle *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 70d5aff38723..cc9d00e1afd5 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1021,6 +1021,12 @@ static int bch2_fs_opt_version_init(struct bch_fs *c) prt_bitflags(&p, bch2_recovery_passes, sb_passes); } + u64 btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); + if (btrees_lost_data) { + prt_str(&p, "\nsuperblock indicates damage to following btrees:\n "); + prt_bitflags(&p, __bch2_btree_ids, btrees_lost_data); + } + if (bch2_check_version_downgrade(c)) { prt_str(&p, "\nVersion downgrade required:"); @@ -1362,10 +1368,14 @@ static bool bch2_fs_may_start(struct bch_fs *c) return false; } break; - } + } } - return bch2_have_enough_devs(c, c->online_devs, flags, true); + CLASS(printbuf, err)(); + bool ret = bch2_have_enough_devs(c, c->online_devs, flags, &err); + if (!ret) + bch2_print_str(c, KERN_ERR, err.buf); + return ret; } int bch2_fs_start(struct bch_fs *c) @@ -1557,7 +1567,6 @@ static void bch2_dev_free(struct bch_dev *ca) bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch); bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); - genradix_free(&ca->buckets_gc); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); @@ -1741,19 +1750,20 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) return 0; } -static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) +static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb, + struct printbuf *err) { unsigned ret; if (bch2_dev_is_online(ca)) { - bch_err(ca, "already have device online in slot %u", - sb->sb->dev_idx); + prt_printf(err, "already have device online in slot %u\n", + sb->sb->dev_idx); return bch_err_throw(ca->fs, device_already_online); } if (get_capacity(sb->bdev->bd_disk) < ca->mi.bucket_size * ca->mi.nbuckets) { - bch_err(ca, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)", + prt_printf(err, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)\n", get_capacity(sb->bdev->bd_disk), ca->mi.bucket_size * ca->mi.nbuckets, ca->mi.nbuckets); @@ -1789,7 +1799,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) return 0; } -static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) +static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, + struct printbuf *err) { struct bch_dev *ca; int ret; @@ -1804,7 +1815,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) ca = bch2_dev_locked(c, sb->sb->dev_idx); - ret = __bch2_dev_attach_bdev(ca, sb); + ret = __bch2_dev_attach_bdev(ca, sb, err); if (ret) return ret; @@ -1828,7 +1839,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) * because we got an error or what have you? */ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) + enum bch_member_state new_state, int flags, + struct printbuf *err) { struct bch_devs_mask new_online_devs; int nr_rw = 0, required; @@ -1865,7 +1877,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, new_online_devs = c->online_devs; __clear_bit(ca->dev_idx, new_online_devs.d); - return bch2_have_enough_devs(c, new_online_devs, flags, false); + return bch2_have_enough_devs(c, new_online_devs, flags, err); default: BUG(); } @@ -1899,14 +1911,15 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) } int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) + enum bch_member_state new_state, int flags, + struct printbuf *err) { int ret = 0; if (ca->mi.state == new_state) return 0; - if (!bch2_dev_state_allowed(c, ca, new_state, flags)) + if (!bch2_dev_state_allowed(c, ca, new_state, flags, err)) return bch_err_throw(c, device_state_not_allowed); if (new_state != BCH_MEMBER_STATE_rw) @@ -1929,15 +1942,17 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, } int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) + enum bch_member_state new_state, int flags, + struct printbuf *err) { guard(rwsem_write)(&c->state_lock); - return __bch2_dev_set_state(c, ca, new_state, flags); + return __bch2_dev_set_state(c, ca, new_state, flags, err); } /* Device add/removal: */ -int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) +int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags, + struct printbuf *err) { unsigned dev_idx = ca->dev_idx, data; bool fast_device_removal = !bch2_request_incompat_feature(c, @@ -1952,8 +1967,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) */ bch2_dev_put(ca); - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { - bch_err(ca, "Cannot remove without losing data"); + if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { + prt_printf(err, "Cannot remove without losing data\n"); ret = bch_err_throw(c, device_state_not_allowed); goto err; } @@ -1973,16 +1988,17 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) if (!data_type_is_empty(i) && !data_type_is_hidden(i) && usage.buckets[i]) { - bch_err(ca, "Remove failed: still has data (%s, %llu buckets)", - __bch2_data_types[i], usage.buckets[i]); + prt_printf(err, "Remove failed: still has data (%s, %llu buckets)\n", + __bch2_data_types[i], usage.buckets[i]); ret = -EBUSY; goto err; } ret = bch2_dev_remove_alloc(c, ca); - bch_err_msg(ca, ret, "bch2_dev_remove_alloc()"); - if (ret) + if (ret) { + prt_printf(err, "bch2_dev_remove_alloc() error: %s\n", bch2_err_str(ret)); goto err; + } /* * We need to flush the entire journal to get rid of keys that reference @@ -1995,25 +2011,28 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) * calls, and could be cleaned up: */ ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); - bch_err_msg(ca, ret, "bch2_journal_flush_device_pins()"); - if (ret) + if (ret) { + prt_printf(err, "bch2_journal_flush_device_pins() error: %s\n", bch2_err_str(ret)); goto err; + } ret = bch2_journal_flush(&c->journal); - bch_err_msg(ca, ret, "bch2_journal_flush()"); - if (ret) + if (ret) { + prt_printf(err, "bch2_journal_flush() error: %s\n", bch2_err_str(ret)); goto err; + } ret = bch2_replicas_gc2(c); - bch_err_msg(ca, ret, "bch2_replicas_gc2()"); - if (ret) + if (ret) { + prt_printf(err, "bch2_replicas_gc2() error: %s\n", bch2_err_str(ret)); goto err; + } data = bch2_dev_has_data(c, ca); if (data) { - CLASS(printbuf, data_has)(); - prt_bitflags(&data_has, __bch2_data_types, data); - bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); + prt_str(err, "Remove failed, still has data ("); + prt_bitflags(err, __bch2_data_types, data); + prt_str(err, ")\n"); ret = -EBUSY; goto err; } @@ -2058,7 +2077,7 @@ err: } /* Add new device to running filesystem: */ -int bch2_dev_add(struct bch_fs *c, const char *path) +int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err) { struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb = {}; @@ -2067,9 +2086,10 @@ int bch2_dev_add(struct bch_fs *c, const char *path) int ret = 0; ret = bch2_read_super(path, &opts, &sb); - bch_err_msg(c, ret, "reading super"); - if (ret) + if (ret) { + prt_printf(err, "error reading superblock: %s\n", bch2_err_str(ret)); goto err; + } struct bch_member dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); @@ -2090,7 +2110,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } if (ret) { - bch_err(c, "filesystem UUID already open"); + prt_printf(err, "cannot go multidevice: filesystem UUID already open\n"); goto err; } } @@ -2105,7 +2125,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err; } - ret = __bch2_dev_attach_bdev(ca, &sb); + ret = __bch2_dev_attach_bdev(ca, &sb, err); if (ret) goto err; @@ -2114,16 +2134,17 @@ int bch2_dev_add(struct bch_fs *c, const char *path) SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); ret = bch2_sb_from_fs(c, ca); - bch_err_msg(c, ret, "setting up new superblock"); - if (ret) + if (ret) { + prt_printf(err, "error setting up new superblock: %s\n", bch2_err_str(ret)); goto err; + } if (dynamic_fault("bcachefs:add:no_slot")) goto err; ret = bch2_sb_member_alloc(c); if (ret < 0) { - bch_err_msg(c, ret, "setting up new superblock"); + prt_printf(err, "error allocating superblock member slot: %s\n", bch2_err_str(ret)); goto err; } unsigned dev_idx = ret; @@ -2141,7 +2162,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (BCH_MEMBER_GROUP(&dev_mi)) { ret = __bch2_dev_group_set(c, ca, label.buf); - bch_err_msg(c, ret, "creating new label"); + prt_printf(err, "error creating new label: %s\n", bch2_err_str(ret)); if (ret) goto err_late; } @@ -2155,22 +2176,25 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (test_bit(BCH_FS_started, &c->flags)) { ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(ca, ret, "marking new superblock"); - if (ret) + if (ret) { + prt_printf(err, "error marking new superblock: %s\n", bch2_err_str(ret)); goto err_late; + } ret = bch2_fs_freespace_init(c); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) + if (ret) { + prt_printf(err, "error initializing free space: %s\n", bch2_err_str(ret)); goto err_late; + } if (ca->mi.state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(c, ret, "allocating journal"); - if (ret) + if (ret) { + prt_printf(err, "error allocating journal: %s\n", bch2_err_str(ret)); goto err_late; + } } /* @@ -2203,7 +2227,7 @@ err_late: } /* Hot add existing device to running filesystem: */ -int bch2_dev_online(struct bch_fs *c, const char *path) +int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err) { struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb = { NULL }; @@ -2214,42 +2238,48 @@ int bch2_dev_online(struct bch_fs *c, const char *path) guard(rwsem_write)(&c->state_lock); ret = bch2_read_super(path, &opts, &sb); - if (ret) + if (ret) { + prt_printf(err, "error reading superblock: %s\n", bch2_err_str(ret)); return ret; + } dev_idx = sb.sb->dev_idx; ret = bch2_dev_in_fs(&c->disk_sb, &sb, &c->opts); - bch_err_msg(c, ret, "bringing %s online", path); - if (ret) + if (ret) { + prt_printf(err, "device not a member of fs: %s\n", bch2_err_str(ret)); goto err; + } - ret = bch2_dev_attach_bdev(c, &sb); + ret = bch2_dev_attach_bdev(c, &sb, err); if (ret) goto err; ca = bch2_dev_locked(c, dev_idx); ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path); - if (ret) + if (ret) { + prt_printf(err, "bch2_trans_mark_dev_sb() error: %s\n", bch2_err_str(ret)); goto err; + } if (ca->mi.state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); if (!ca->mi.freespace_initialized) { ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) + if (ret) { + prt_printf(err, "bch2_dev_freespace_init() error: %s\n", bch2_err_str(ret)); goto err; + } } if (!ca->journal.nr) { ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(ca, ret, "allocating journal"); - if (ret) + if (ret) { + prt_printf(err, "bch2_dev_journal_alloc() error: %s\n", bch2_err_str(ret)); goto err; + } } scoped_guard(mutex, &c->sb_lock) { @@ -2264,17 +2294,17 @@ err: return ret; } -int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) +int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err) { guard(rwsem_write)(&c->state_lock); if (!bch2_dev_is_online(ca)) { - bch_err(ca, "Already offline"); + prt_printf(err, "Already offline\n"); return 0; } - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { - bch_err(ca, "Cannot offline required disk"); + if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { + prt_printf(err, "Cannot offline required disk\n"); return bch_err_throw(c, device_state_not_allowed); } @@ -2294,7 +2324,7 @@ static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new bch2_dev_freespace_init(c, ca, old_nbuckets, new_nbuckets); } -int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) +int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct printbuf *err) { u64 old_nbuckets; int ret = 0; @@ -2303,31 +2333,36 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_nbuckets = ca->mi.nbuckets; if (nbuckets < ca->mi.nbuckets) { - bch_err(ca, "Cannot shrink yet"); + prt_printf(err, "Cannot shrink yet\n"); return -EINVAL; } if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { - bch_err(ca, "New device size too big (%llu greater than max %u)", - nbuckets, BCH_MEMBER_NBUCKETS_MAX); + prt_printf(err, "New device size too big (%llu greater than max %u)\n", + nbuckets, BCH_MEMBER_NBUCKETS_MAX); return bch_err_throw(c, device_size_too_big); } if (bch2_dev_is_online(ca) && get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { - bch_err(ca, "New size larger than device"); + prt_printf(err, "New size %llu larger than device size %llu\n", + ca->mi.bucket_size * nbuckets, + get_capacity(ca->disk_sb.bdev->bd_disk)); return bch_err_throw(c, device_size_too_small); } ret = bch2_dev_buckets_resize(c, ca, nbuckets); - bch_err_msg(ca, ret, "resizing buckets"); - if (ret) + if (ret) { + prt_printf(err, "bch2_dev_buckets_resize() error: %s\n", bch2_err_str(ret)); return ret; + } ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - if (ret) + if (ret) { + prt_printf(err, "bch2_trans_mark_dev_sb() error: %s\n", bch2_err_str(ret)); return ret; + } scoped_guard(mutex, &c->sb_lock) { struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); @@ -2338,8 +2373,10 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) if (ca->mi.freespace_initialized) { ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); - if (ret) + if (ret) { + prt_printf(err, "__bch2_dev_resize_alloc() error: %s\n", bch2_err_str(ret)); return ret; + } } bch2_recalc_capacity(c); @@ -2450,10 +2487,14 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) struct bch_dev *ca = bdev_to_bch_dev(c, bdev); if (ca) { + CLASS(printbuf, buf)(); + __bch2_log_msg_start(ca->name, &buf); + prt_printf(&buf, "offline from block layer\n"); + bool dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, - BCH_FORCE_IF_DEGRADED); - + BCH_FORCE_IF_DEGRADED, + &buf); if (!dev && sb) { if (!surprise) sync_filesystem(sb); @@ -2461,11 +2502,6 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) evict_inodes(sb); } - CLASS(printbuf, buf)(); - __bch2_log_msg_start(ca->name, &buf); - - prt_printf(&buf, "offline from block layer"); - if (dev) { __bch2_dev_offline(c, ca); } else { @@ -2543,11 +2579,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, BUG_ON(darray_push(&sbs, sb)); } - if (opts->nochanges && !opts->read_only) { - ret = bch_err_throw(c, erofs_nochanges); - goto err_print; - } - darray_for_each(sbs, sb) if (!best || sb_cmp(sb->sb, best->sb) > 0) best = sb; @@ -2575,9 +2606,12 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, scoped_guard(rwsem_write, &c->state_lock) darray_for_each(sbs, sb) { - ret = bch2_dev_attach_bdev(c, sb); - if (ret) + CLASS(printbuf, err)(); + ret = bch2_dev_attach_bdev(c, sb, &err); + if (ret) { + bch_err(bch2_dev_locked(c, sb->sb->dev_idx), "%s", err.buf); goto err; + } } if (!c->opts.nostart) { diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index e90bab9afe78..d13dbf2b8227 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -17,18 +17,20 @@ struct bch_fs *bch2_dev_to_fs(dev_t); struct bch_fs *bch2_uuid_to_fs(__uuid_t); bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, - enum bch_member_state, int); + enum bch_member_state, int, + struct printbuf *); int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *, - enum bch_member_state, int); + enum bch_member_state, int, + struct printbuf *); int bch2_dev_set_state(struct bch_fs *, struct bch_dev *, - enum bch_member_state, int); - -int bch2_dev_fail(struct bch_dev *, int); -int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int); -int bch2_dev_add(struct bch_fs *, const char *); -int bch2_dev_online(struct bch_fs *, const char *); -int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int); -int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); + enum bch_member_state, int, + struct printbuf *); + +int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int, struct printbuf *); +int bch2_dev_add(struct bch_fs *, const char *, struct printbuf *); +int bch2_dev_online(struct bch_fs *, const char *, struct printbuf *); +int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int, struct printbuf *); +int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64, struct printbuf *); struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 6094b568dd33..6d7303008b19 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -4,6 +4,7 @@ #include "acl.h" #include "bkey_methods.h" #include "btree_update.h" +#include "dirent.h" #include "extents.h" #include "fs.h" #include "rebalance.h" @@ -25,7 +26,7 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info, bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type)); bch2_str_hash_update(&ctx, info, key->name.name, key->name.len); - return bch2_str_hash_end(&ctx, info); + return bch2_str_hash_end(&ctx, info, false); } static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key) @@ -484,6 +485,22 @@ static int inode_opt_set_fn(struct btree_trans *trans, return ret; } + if (s->id == Inode_opt_inodes_32bit && + !bch2_request_incompat_feature(trans->c, bcachefs_metadata_version_31bit_dirent_offset)) { + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + int ret = bch2_empty_dir_trans(trans, inode_inum(inode)); + if (ret < 0) + return ret; + + if (s->defined) + bi->bi_flags |= BCH_INODE_31bit_dirent_offset; + else + bi->bi_flags &= ~BCH_INODE_31bit_dirent_offset; + } + if (s->defined) bi->bi_fields_set |= 1U << s->id; else |