diff options
Diffstat (limited to 'fs')
42 files changed, 402 insertions, 165 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 45d3db41225a..c43aaab4c108 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -809,6 +809,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, for (enum btree_id btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { + /* btree_type_has_ptrs should probably include BTREE_ID_stripes, + * definitely her... */ int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index cdf593c59922..16d08dfb5f19 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -386,14 +386,6 @@ do { \ ##__VA_ARGS__, bch2_err_str(_ret)); \ } while (0) -static inline int __bch2_err_trace(struct bch_fs *c, int err) -{ - trace_error_throw(c, err, _THIS_IP_); - return err; -} - -#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) - /* Parameters that are useful for debugging, but should always be compiled in: */ #define BCH_DEBUG_PARAMS_ALWAYS() \ BCH_DEBUG_PARAM(key_merging_disabled, \ @@ -1153,6 +1145,15 @@ struct bch_fs { struct mutex fsck_error_counts_lock; }; +static inline int __bch2_err_trace(struct bch_fs *c, int err) +{ + this_cpu_inc(c->counters[BCH_COUNTER_error_throw]); + trace_error_throw(c, err, _THIS_IP_); + return err; +} + +#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) + extern struct wait_queue_head bch2_read_only_wait; static inline bool bch2_ro_ref_tryget(struct bch_fs *c) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a8f59522e258..b2de993d802b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -706,7 +706,8 @@ struct bch_sb_field_ext { x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \ x(fast_device_removal, BCH_VERSION(1, 27)) \ x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \ - x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) + x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \ + x(31bit_dirent_offset, BCH_VERSION(1, 30)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1378,7 +1379,8 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_alloc_v4)) \ x(quotas, 5, 0, \ BIT_ULL(KEY_TYPE_quota)) \ - x(stripes, 6, 0, \ + x(stripes, 6, \ + BTREE_IS_data, \ BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, \ BTREE_IS_extents| \ @@ -1438,9 +1440,9 @@ enum btree_id { */ #define BTREE_ID_NR_MAX 63 -static inline bool btree_id_is_alloc(enum btree_id id) +static inline bool btree_id_is_alloc(enum btree_id btree) { - switch (id) { + switch (btree) { case BTREE_ID_alloc: case BTREE_ID_backpointers: case BTREE_ID_need_discard: @@ -1454,6 +1456,33 @@ static inline bool btree_id_is_alloc(enum btree_id id) } } +/* We can reconstruct these btrees from information in other btrees */ +static inline bool btree_id_can_reconstruct(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return true; + + switch (btree) { + case BTREE_ID_snapshot_trees: + case BTREE_ID_deleted_inodes: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return true; + default: + return false; + } +} + +/* + * We can reconstruct BTREE_ID_alloc, but reconstucting it from scratch is not + * so cheap and OOMs on huge filesystems (until we have online + * check_allocations) + */ +static inline bool btree_id_recovers_from_scan(enum btree_id btree) +{ + return btree == BTREE_ID_alloc || !btree_id_can_reconstruct(btree); +} + #define BTREE_MAX_DEPTH 4U /* Btree nodes */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6b91649688da..ae7d260589d8 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -44,27 +44,6 @@ #include <linux/rcupdate.h> #include <linux/sched/task.h> -/* - * Returns true if it's a btree we can easily reconstruct, or otherwise won't - * cause data loss if it's missing: - */ -static bool btree_id_important(enum btree_id btree) -{ - if (btree_id_is_alloc(btree)) - return false; - - switch (btree) { - case BTREE_ID_quotas: - case BTREE_ID_snapshot_trees: - case BTREE_ID_logged_ops: - case BTREE_ID_rebalance_work: - case BTREE_ID_subvolume_children: - return false; - default: - return true; - } -} - static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -557,45 +536,55 @@ fsck_err: return ret; } -static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, +static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btree, bool *reconstructed_root) { struct bch_fs *c = trans->c; struct btree_root *r = bch2_btree_id_root(c, btree); - CLASS(printbuf, buf)(); - int ret = 0; - - bch2_btree_id_to_text(&buf, btree); - if (r->error) { - bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); + if (!r->error) + return 0; - ret = bch2_btree_has_scanned_nodes(c, btree); - if (ret < 0) - goto err; + CLASS(printbuf, buf)(); + int ret = 0; - if (!ret) { - __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0), - btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + if (!btree_id_recovers_from_scan(btree)) { + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 0); + ret = bch2_btree_lost_data(c, &buf, btree); + bch2_print_str(c, KERN_NOTICE, buf.buf); + goto out; + } - r->alive = false; - r->error = 0; - bch2_btree_root_alloc_fake_trans(trans, btree, 0); - } else { - r->alive = false; - r->error = 0; - bch2_btree_root_alloc_fake_trans(trans, btree, 1); + bch2_btree_id_to_text(&buf, btree); + bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); - if (ret) - return ret; - } + ret = bch2_btree_has_scanned_nodes(c, btree); + if (ret < 0) + goto err; - *reconstructed_root = true; + if (!ret) { + __fsck_err(trans, + FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); + + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 0); + } else { + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 1); + + bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); + if (ret) + return ret; } +out: + *reconstructed_root = true; err: fsck_err: bch_err_fn(c, ret); @@ -613,7 +602,7 @@ int bch2_check_topology(struct bch_fs *c) for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { bool reconstructed_root = false; recover: - ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root)); + ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)); if (ret) break; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8962c481e310..546b559fe3ce 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2366,7 +2366,9 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && + !(iter->flags & BTREE_ITER_nofilter_whiteouts) && + bkey_eq(end, POS_MAX)); ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 4b7b5ca74ba1..b618a0bd1186 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -149,7 +149,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes); } - if (btree_id_is_alloc(BTREE_NODE_ID(bn))) + if (btree_id_can_reconstruct(BTREE_NODE_ID(bn))) return; if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) @@ -534,7 +534,7 @@ int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, unsigned level, struct bpos node_min, struct bpos node_max) { - if (btree_id_is_alloc(btree)) + if (!btree_id_recovers_from_scan(btree)) return 0; struct find_btree_nodes *f = &c->found_btree_nodes; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index f59f018fe0d8..053a837cf241 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -12,7 +12,6 @@ #include "extents.h" #include "keylist.h" #include "snapshot.h" -#include "super-io.h" #include "trace.h" #include <linux/string_helpers.h> @@ -159,21 +158,6 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, return ret; } -static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, const struct bkey *k) -{ - /* - * KEY_TYPE_extent_whiteout indicates that there isn't a real extent - * present at that position: key start positions inclusive of - * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are - * monotonically increasing - */ - return btree_id_is_extents_snapshots(btree) && - bkey_deleted(k) && - !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) - ? KEY_TYPE_extent_whiteout - : KEY_TYPE_whiteout; -} - int bch2_trans_update_extent_overwrite(struct btree_trans *trans, struct btree_iter *iter, enum btree_iter_update_trigger_flags flags, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 663739db82b1..18560ca80057 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -5,6 +5,7 @@ #include "btree_iter.h" #include "journal.h" #include "snapshot.h" +#include "super-io.h" struct bch_fs; struct btree; @@ -110,6 +111,22 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, : 0; } +static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, + const struct bkey *k) +{ + /* + * KEY_TYPE_extent_whiteout indicates that there isn't a real extent + * present at that position: key start positions inclusive of + * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are + * monotonically increasing + */ + return btree_id_is_extents_snapshots(btree) && + bkey_deleted(k) && + !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) + ? KEY_TYPE_extent_whiteout + : KEY_TYPE_whiteout; +} + int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *, enum btree_iter_update_trigger_flags, struct bkey_s_c, struct bkey_s_c); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 76897cf15946..65ca54c5b0ff 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -336,6 +336,20 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, BUG_ON(b->ob.nr); mutex_lock(&c->btree_reserve_cache_lock); + if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { + guard(spinlock)(&c->freelist_lock); + if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) { + if (cl) + closure_wait(&c->open_buckets_wait, cl); + + ret = cl + ? bch_err_throw(c, bucket_alloc_blocked) + : bch_err_throw(c, open_buckets_empty); + mutex_unlock(&c->btree_reserve_cache_lock); + goto err; + } + } + if (c->btree_reserve_cache_nr > nr_reserve) { for (struct btree_alloc *a = c->btree_reserve_cache; a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index afad11831e1d..755fb25a8eba 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -701,8 +701,16 @@ int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree, struct bkey_i_accounting *k) { struct btree_write_buffer *wb = &c->btree_write_buffer; - struct btree_write_buffered_key new = { .btree = btree }; + if (trace_accounting_key_to_wb_slowpath_enabled()) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "have: %zu\n", wb->accounting.nr); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&k->k_i)); + trace_accounting_key_to_wb_slowpath(c, buf.buf); + } + count_event(c, accounting_key_to_wb_slowpath); + + struct btree_write_buffered_key new = { .btree = btree }; bkey_copy(&new.k, &k->k_i); int ret = darray_push(&wb->accounting, new); diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index e484cd6b90b0..b862bdf67f58 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -95,7 +95,7 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, EBUG_ON(!dst->seq); - return k->k.type == KEY_TYPE_accounting + return bch2_bkey_is_accounting_mem(&k->k) ? bch2_accounting_key_to_wb(c, btree, bkey_i_to_accounting(k)) : __bch2_journal_key_to_wb(c, dst, btree, k); } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 01838a3a189d..a314d70c6b8e 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -225,7 +225,7 @@ static void trace_io_move_created_rebalance2(struct data_update *m, trace_io_move_created_rebalance(c, buf.buf); - this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]); + count_event(c, io_move_created_rebalance); } noinline_for_stack @@ -693,6 +693,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, if (ret) return ret; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (data_opts->kill_ec_ptrs & BIT(i)) + bch2_bkey_drop_ec(n, p.ptr.dev); + i++; + } + while (data_opts->kill_ptrs) { unsigned i = 0, drop = __fls(data_opts->kill_ptrs); diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 5e14d13568de..fc12aa65366f 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -12,6 +12,7 @@ struct moving_context; struct data_update_opts { unsigned rewrite_ptrs; unsigned kill_ptrs; + unsigned kill_ec_ptrs; u16 target; u8 extra_replicas; unsigned btree_insert_flags; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index cb44b35e0f1d..fe6f3d874a47 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -95,7 +95,7 @@ static u64 bch2_dirent_hash(const struct bch_hash_info *info, bch2_str_hash_update(&ctx, info, name->name, name->len); /* [0,2) reserved for dots */ - return max_t(u64, bch2_str_hash_end(&ctx, info), 2); + return max_t(u64, bch2_str_hash_end(&ctx, info, true), 2); } static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f96530c70262..5944ad6d0f8d 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -184,6 +184,9 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, void *end = &acc_k + 1; int ret = 0; + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + return 0; + bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && bversion_zero(k.k->bversion), c, accounting_key_version_0, diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 43f4b21d0aab..cc73cce98a44 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -145,6 +145,16 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc) acc->type != BCH_DISK_ACCOUNTING_inum; } +static inline bool bch2_bkey_is_accounting_mem(struct bkey *k) +{ + if (k->type != KEY_TYPE_accounting) + return false; + + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k->p); + return bch2_accounting_is_mem(&acc_k); +} + /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index c2840cb674b2..15c7d8ff5dea 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -35,6 +35,8 @@ #include <linux/raid/pq.h> #include <linux/raid/xor.h> +static bool bch2_stripe_is_open(struct bch_fs *, u64); + static void raid5_recov(unsigned disks, unsigned failed_idx, size_t size, void **data) { @@ -386,11 +388,20 @@ int bch2_trigger_stripe(struct btree_trans *trans, new_s->nr_redundant != old_s->nr_redundant)); if (flags & BTREE_TRIGGER_transactional) { + u64 old_lru_pos = stripe_lru_pos(old_s); + u64 new_lru_pos = stripe_lru_pos(new_s); + + if (new_lru_pos == STRIPE_LRU_POS_EMPTY && + !bch2_stripe_is_open(c, idx)) { + _new.k->type = KEY_TYPE_deleted; + set_bkey_val_u64s(_new.k, 0); + new_s = NULL; + new_lru_pos = 0; + } + int ret = bch2_lru_change(trans, - BCH_LRU_STRIPE_FRAGMENTATION, - idx, - stripe_lru_pos(old_s), - stripe_lru_pos(new_s)); + BCH_LRU_STRIPE_FRAGMENTATION, idx, + old_lru_pos, new_lru_pos); if (ret) return ret; } @@ -954,7 +965,7 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx) */ if (k.k->type == KEY_TYPE_stripe && !bch2_stripe_is_open(trans->c, idx) && - stripe_lru_pos(bkey_s_c_to_stripe(k).v) == 1) + stripe_lru_pos(bkey_s_c_to_stripe(k).v) == STRIPE_LRU_POS_EMPTY) return bch2_btree_delete_at(trans, &iter, 0); return 0; @@ -1767,7 +1778,14 @@ static int __get_existing_stripe(struct btree_trans *trans, { struct bch_fs *c = trans->c; - CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_nopreserve); + /* + * We require an intent lock here until we have the stripe open, for + * exclusion with bch2_trigger_stripe() - which will delete empty + * stripes if they're not open, but it can't actually open them: + */ + CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), + BTREE_ITER_intent| + BTREE_ITER_nopreserve); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); int ret = bkey_err(k); if (ret) @@ -1778,8 +1796,19 @@ static int __get_existing_stripe(struct btree_trans *trans, return 0; struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - if (stripe_lru_pos(s.v) <= 1) - return 0; + + if (stripe_lru_pos(s.v) == STRIPE_LRU_POS_EMPTY) { + /* + * We can't guarantee that the trigger will always delete + * stripes - the stripe might still be open when the last data + * in it was deleted + */ + return !bch2_stripe_is_open(c, idx) + ? bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + bch_err_throw(c, transaction_restart_commit) + : 0; + } if (s.v->disk_label == head->disk_label && s.v->algorithm == head->algo && diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 0c3c3a24fc6f..213814787dd6 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -173,7 +173,8 @@ do { \ if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix) && \ !bch2_err_matches(_ret, BCH_ERR_fsck_ignore)) \ ret = _ret; \ - ret = bch_err_throw(c, fsck_delete_bkey); \ + else \ + ret = bch_err_throw(c, fsck_delete_bkey); \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index c4b0ea1adaa8..7ddb156c765c 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -98,11 +98,13 @@ static int count_iters_for_insert(struct btree_trans *trans, return ret2 ?: ret; } -int bch2_extent_atomic_end(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos *end) +int bch2_extent_trim_atomic(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *insert) { - unsigned nr_iters = 0; + enum bch_bkey_type whiteout_type = + extent_whiteout_type(trans->c, iter->btree_id, &insert->k); + struct bpos end = insert->k.p; struct btree_iter copy; bch2_trans_copy_iter(©, iter); @@ -111,42 +113,54 @@ int bch2_extent_atomic_end(struct btree_trans *trans, if (ret) goto err; + copy.flags |= BTREE_ITER_nofilter_whiteouts; + struct bkey_s_c k; - for_each_btree_key_max_continue_norestart(copy, *end, 0, k, ret) { + unsigned nr_iters = 0; + for_each_btree_key_continue_norestart(copy, 0, k, ret) { unsigned offset = 0; if (bkey_gt(iter->pos, bkey_start_pos(k.k))) offset = iter->pos.offset - bkey_start_offset(k.k); - ret = count_iters_for_insert(trans, k, offset, end, &nr_iters); - if (ret) - break; + if (bkey_extent_whiteout(k.k)) { + if (bpos_gt(k.k->p, insert->k.p)) { + if (k.k->type == KEY_TYPE_extent_whiteout) + break; + else + continue; + } else if (k.k->type != whiteout_type) { + nr_iters += 1; + if (nr_iters >= EXTENT_ITERS_MAX) { + end = bpos_min(end, k.k->p); + break; + } + } + } else { + if (bpos_ge(bkey_start_pos(k.k), end)) + break; + + ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters); + if (ret) + break; + } } err: bch2_trans_iter_exit(©); - return ret < 0 ? ret : 0; -} - -int bch2_extent_trim_atomic(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *k) -{ - struct bpos end = k->k.p; - int ret = bch2_extent_atomic_end(trans, iter, &end); - if (ret) + if (ret < 0) return ret; /* tracepoint */ - if (bpos_lt(end, k->k.p)) { + if (bpos_lt(end, insert->k.p)) { if (trace_extent_trim_atomic_enabled()) { CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, end); prt_newline(&buf); - bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(insert)); trace_extent_trim_atomic(trans->c, buf.buf); } - bch2_cut_back(end, k); + bch2_cut_back(end, insert); } return 0; } diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h index 34467db53f45..2d956d971b11 100644 --- a/fs/bcachefs/extent_update.h +++ b/fs/bcachefs/extent_update.h @@ -4,8 +4,6 @@ #include "bcachefs.h" -int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *, - struct bpos *); int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *, struct bkey_i *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index b879a586b7f6..7ab0398707d8 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -995,6 +995,22 @@ void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) bch2_bkey_drop_ptrs_noerror(k, ptr, ptr->dev == dev); } +void bch2_bkey_drop_ec(struct bkey_i *k, unsigned dev) +{ + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); + union bch_extent_entry *entry, *ec = NULL; + + bkey_extent_entry_for_each(ptrs, entry) { + if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) + ec = entry; + else if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_ptr && + entry->ptr.dev == dev) { + bch2_bkey_extent_entry_drop(k, ec); + return; + } + } +} + const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -1757,3 +1773,4 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64)); return -val_u64s_delta; } + diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 35ee03cd5065..f6dcb17108cd 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -650,6 +650,7 @@ void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned); void bch2_bkey_drop_device(struct bkey_s, unsigned); +void bch2_bkey_drop_ec(struct bkey_i *k, unsigned); #define bch2_bkey_drop_ptrs_noerror(_k, _ptr, _cond) \ do { \ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 0005569ecace..ab542cef96fe 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -42,6 +42,14 @@ struct readpages_iter { folios folios; }; +static inline void readpages_iter_folio_revert(struct readahead_control *ractl, + struct folio *folio) +{ + bch2_folio_release(folio); + ractl->_nr_pages += folio_nr_pages(folio); + ractl->_index -= folio_nr_pages(folio); +} + static int readpages_iter_init(struct readpages_iter *iter, struct readahead_control *ractl) { @@ -52,9 +60,7 @@ static int readpages_iter_init(struct readpages_iter *iter, while ((folio = __readahead_folio(ractl))) { if (!bch2_folio_create(folio, GFP_KERNEL) || darray_push(&iter->folios, folio)) { - bch2_folio_release(folio); - ractl->_nr_pages += folio_nr_pages(folio); - ractl->_index -= folio_nr_pages(folio); + readpages_iter_folio_revert(ractl, folio); return iter->folios.nr ? 0 : -ENOMEM; } @@ -64,6 +70,15 @@ static int readpages_iter_init(struct readpages_iter *iter, return 0; } +static void readpages_iter_exit(struct readpages_iter *iter, + struct readahead_control *ractl) +{ + darray_for_each_reverse(iter->folios, folio) { + readpages_iter_folio_revert(ractl, *folio); + folio_get(*folio); + } +} + static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) { if (iter->idx >= iter->folios.nr) @@ -274,6 +289,8 @@ void bch2_readahead(struct readahead_control *ractl) struct readpages_iter readpages_iter; struct blk_plug plug; + lockdep_assert_held(&inode->ei_pagecache_lock); + bch2_inode_opts_get(&opts, c, &inode->ei_inode); int ret = readpages_iter_init(&readpages_iter, ractl); @@ -290,7 +307,10 @@ void bch2_readahead(struct readahead_control *ractl) * scheduling. */ blk_start_plug(&plug); - bch2_pagecache_add_get(inode); + if (!bch2_pagecache_add_tryget(inode)) { + readpages_iter_exit(&readpages_iter, ractl); + goto out; + } struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { @@ -317,6 +337,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_put(trans); bch2_pagecache_add_put(inode); +out: blk_finish_plug(&plug); darray_exit(&readpages_iter.folios); } @@ -759,7 +780,6 @@ int bch2_write_end(struct file *file, struct address_space *mapping, struct bch2_folio_reservation *res = fsdata; unsigned offset = pos - folio_pos(folio); - lockdep_assert_held(&inode->v.i_rwsem); BUG_ON(offset + copied > folio_size(folio)); if (unlikely(copied < len && !folio_test_uptodate(folio))) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 76d2647d9500..c79b1f6f7db3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -441,6 +441,12 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) BUG(); } +static __maybe_unused int ptrcmp_fn(const struct lockdep_map *l, + const struct lockdep_map *r) +{ + return cmp_int(l, r); +} + static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c, gfp_t gfp) { struct bch_inode_info *inode = alloc_inode_sb(c->vfs_sb, @@ -452,6 +458,7 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c, gfp_t gfp) mutex_init(&inode->ei_update_lock); two_state_lock_init(&inode->ei_pagecache_lock); INIT_LIST_HEAD(&inode->ei_vfs_inode_list); + lock_set_cmp_fn(&inode->ei_pagecache_lock, ptrcmp_fn, NULL); inode->ei_flags = 0; mutex_init(&inode->ei_quota_lock); memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); @@ -826,14 +833,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_MTIME); - if (inode_u.bi_subvol) { - /* - * Subvolume deletion is asynchronous, but we still want to tell - * the VFS that it's been deleted here: - */ - set_nlink(&inode->v, 0); - } - if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); err: @@ -865,9 +864,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, if (IS_ERR(inode)) return bch2_err_class(PTR_ERR(inode)); - inode_lock(&inode->v); ret = page_symlink(&inode->v, symname, strlen(symname) + 1); - inode_unlock(&inode->v); if (unlikely(ret)) goto err; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 01c1c6372229..ccc44b1fc178 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -266,7 +266,8 @@ create_lostfound: root_inode.bi_nlink++; - ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); + ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu, + inode_opt_get(c, &root_inode, inodes_32bit)); if (ret) goto err; @@ -573,7 +574,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub new_inode.bi_subvol = subvolid; - int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: + int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu, false) ?: bch2_btree_iter_traverse(&inode_iter) ?: bch2_inode_write(trans, &inode_iter, &new_inode); bch2_trans_iter_exit(&inode_iter); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index d5e5190f0663..4aa130ff7cf6 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -944,11 +944,12 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, } static struct bkey_i_inode_alloc_cursor * -bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) +bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max, + bool is_32bit) { struct bch_fs *c = trans->c; - u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; + u64 cursor_idx = is_32bit ? 0 : cpu + 1; cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); @@ -967,7 +968,7 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m if (IS_ERR(cursor)) return cursor; - if (c->opts.inodes_32bit) { + if (is_32bit) { *min = BLOCKDEV_INODE_MAX; *max = INT_MAX; } else { @@ -996,11 +997,11 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m int bch2_inode_create(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode_u, - u32 snapshot, u64 cpu) + u32 snapshot, u64 cpu, bool is_32bit) { u64 min, max; struct bkey_i_inode_alloc_cursor *cursor = - bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); + bch2_inode_alloc_cursor_get(trans, cpu, &min, &max, is_32bit); int ret = PTR_ERR_OR_ZERO(cursor); if (ret) return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index b8ec3e628d90..79092ea74844 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -172,7 +172,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_unpacked *); int bch2_inode_create(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, u32, u64); + struct bch_inode_unpacked *, u32, u64, bool); int bch2_inode_rm(struct bch_fs *, subvol_inum); diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 1f00938b1bdc..e07fa6cc99bd 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -144,7 +144,8 @@ enum inode_opt_id { x(unlinked, 7) \ x(backptr_untrusted, 8) \ x(has_child_snapshot, 9) \ - x(has_case_insensitive, 10) + x(has_case_insensitive, 10) \ + x(31bit_dirent_offset, 11) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index a66d01d04e57..892990b4a6a6 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -125,6 +125,10 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, if (!btree_type_has_ptrs(id)) continue; + /* Stripe keys have pointers, but are handled separately */ + if (id == BTREE_ID_stripes) + continue; + int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 30fe269d531d..4f41f1f6ec6c 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -150,7 +150,7 @@ static void move_write_done(struct bch_write_op *op) bch2_write_op_to_text(&buf, op); trace_io_move_write_fail(c, buf.buf); } - this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); + count_event(c, io_move_write_fail); ctxt->write_error = true; } @@ -344,7 +344,7 @@ int bch2_move_extent(struct moving_context *ctxt, if (!data_opts.rewrite_ptrs && !data_opts.extra_replicas && !data_opts.scrub) { - if (data_opts.kill_ptrs) { + if (data_opts.kill_ptrs|data_opts.kill_ec_ptrs) { this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size); return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); } else { @@ -542,7 +542,7 @@ int bch2_move_ratelimit(struct moving_context *ctxt) if (ctxt->wait_on_copygc && c->copygc_running) { bch2_moving_ctxt_flush_all(ctxt); - wait_event_killable(c->copygc_running_wq, + wait_event_freezable(c->copygc_running_wq, !c->copygc_running || (is_kthread && kthread_should_stop())); } @@ -819,7 +819,9 @@ static int bch2_move_data(struct bch_fs *c, unsigned min_depth_this_btree = min_depth; - if (!btree_type_has_ptrs(id)) + /* Stripe keys have pointers, but are handled separately */ + if (!btree_type_has_ptrs(id) || + id == BTREE_ID_stripes) min_depth_this_btree = max(min_depth_this_btree, 1); for (unsigned level = min_depth_this_btree; @@ -1280,7 +1282,17 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, i++; } - return data_opts->kill_ptrs != 0; + i = 0; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (p.has_ec && durability - p.ec.redundancy >= replicas) { + data_opts->kill_ec_ptrs |= BIT(i); + durability -= p.ec.redundancy; + } + + i++; + } + + return (data_opts->kill_ptrs|data_opts->kill_ec_ptrs) != 0; } static bool scrub_pred(struct bch_fs *c, void *_arg, diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index d1019052f182..5c321a0d1f89 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -62,7 +62,8 @@ int bch2_create_trans(struct btree_trans *trans, if (flags & BCH_CREATE_TMPFILE) new_inode->bi_flags |= BCH_INODE_unlinked; - ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); + ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu, + inode_opt_get(c, dir_u, inodes_32bit)); if (ret) goto err; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 17ca56b0e2ac..e1db63d75a99 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -444,8 +444,9 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_init(&sk); - ret = bkey_err(k = next_rebalance_extent(trans, work_pos, - extent_iter, &io_opts, &data_opts)); + ret = lockrestart_do(trans, + bkey_err(k = next_rebalance_extent(trans, work_pos, + extent_iter, &io_opts, &data_opts))); if (ret || !k.k) goto out; @@ -587,7 +588,7 @@ static int do_rebalance(struct moving_context *ctxt) ret = k->k.type == KEY_TYPE_cookie ? do_rebalance_scan(ctxt, k->k.p.inode, le64_to_cpu(bkey_i_to_cookie(k)->v.cookie)) - : lockrestart_do(trans, do_rebalance_extent(ctxt, k->k.p, &extent_iter)); + : do_rebalance_extent(ctxt, k->k.p, &extent_iter); if (ret) break; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 21aa2edb13ac..29e81f96db0f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -607,7 +607,7 @@ static int read_btree_roots(struct bch_fs *c) c, btree_root_read_error, "error reading btree root %s: %s", buf.buf, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) + if (btree_id_can_reconstruct(i)) r->error = 0; ret = 0; } diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index f3ea53a55384..44bc12573a0c 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -101,7 +101,9 @@ enum counters_flags { x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \ x(trans_restart_split_race, 76, TYPE_COUNTER) \ x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \ - x(write_buffer_flush_sync, 78, TYPE_COUNTER) + x(write_buffer_flush_sync, 78, TYPE_COUNTER) \ + x(accounting_key_to_wb_slowpath, 94, TYPE_COUNTER) \ + x(error_throw, 93, TYPE_COUNTER) enum bch_persistent_counters { #define x(t, n, ...) BCH_COUNTER_##t, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 84f987d3a02a..eab0c1e3ff56 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1673,7 +1673,8 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, return ret; darray_for_each(*deleted, i) - nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); + nr_deleted_ancestors += bch2_snapshots_same_tree(c, s->k.p.offset, i->id) && + bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); if (!nr_deleted_ancestors) return 0; diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index fef32a0118c4..28d9a29a1fd0 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -51,6 +51,17 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) return s ? s->tree : 0; } +static inline bool bch2_snapshots_same_tree(struct bch_fs *c, u32 id1, u32 id2) +{ + if (id1 == id2) + return true; + + guard(rcu)(); + const struct snapshot_t *s1 = snapshot_t(c, id1); + const struct snapshot_t *s2 = snapshot_t(c, id2); + return s1 && s2 && s1->tree == s2->tree; +} + static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { const struct snapshot_t *s = snapshot_t(c, id); @@ -157,6 +168,10 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { + EBUG_ON(!id); + EBUG_ON(!ancestor); + EBUG_ON(!bch2_snapshots_same_tree(c, id, ancestor)); + return id == ancestor ? true : __bch2_snapshot_is_ancestor(c, id, ancestor); diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 8c0fb44929cc..2a61cc36ddbf 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -34,6 +34,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u32 inum_snapshot; u8 type; + bool is_31bit; struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of @@ -48,6 +49,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) struct bch_hash_info info = { .inum_snapshot = bi->bi_snapshot, .type = INODE_STR_HASH(bi), + .is_31bit = bi->bi_flags & BCH_INODE_31bit_dirent_offset, .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, .siphash_key = { .k0 = bi->bi_hash_seed } }; @@ -112,8 +114,8 @@ static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx, } } -static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, - const struct bch_hash_info *info) +static inline u64 __bch2_str_hash_end(struct bch_str_hash_ctx *ctx, + const struct bch_hash_info *info) { switch (info->type) { case BCH_STR_HASH_crc32c: @@ -128,6 +130,14 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, } } +static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, + const struct bch_hash_info *info, + bool maybe_31bit) +{ + return __bch2_str_hash_end(ctx, info) & + (maybe_31bit && info->is_31bit ? INT_MAX : U64_MAX); +} + struct bch_hash_desc { enum btree_id btree_id; u8 key_type; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index be7ed612d28f..369465a4de77 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -89,7 +89,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v prt_str(&buf, "requested incompat feature "); bch2_version_to_text(&buf, version); prt_str(&buf, " currently not enabled, allowed up to "); - bch2_version_to_text(&buf, version); + bch2_version_to_text(&buf, c->sb.version_incompat_allowed); prt_printf(&buf, "\n set version_upgrade=incompat to enable"); bch_notice(c, "%s", buf.buf); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ef15e614f4f3..09e7f8ae9922 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -2542,11 +2542,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, BUG_ON(darray_push(&sbs, sb)); } - if (opts->nochanges && !opts->read_only) { - ret = bch_err_throw(c, erofs_nochanges); - goto err_print; - } - darray_for_each(sbs, sb) if (!best || sb_cmp(sb->sb, best->sb) > 0) best = sb; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 3776a1403104..269cdf1a87a4 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1179,6 +1179,11 @@ DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_ARGS(trans, caller_ip) ); +DEFINE_EVENT(fs_str, accounting_key_to_wb_slowpath, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + TRACE_EVENT(path_downgrade, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, diff --git a/fs/bcachefs/two_state_shared_lock.h b/fs/bcachefs/two_state_shared_lock.h index 7f647846b511..95986f5ef894 100644 --- a/fs/bcachefs/two_state_shared_lock.h +++ b/fs/bcachefs/two_state_shared_lock.h @@ -15,14 +15,28 @@ typedef struct { atomic_long_t v; wait_queue_head_t wait; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } two_state_lock_t; -static inline void two_state_lock_init(two_state_lock_t *lock) +static inline void __two_state_lock_init(two_state_lock_t *lock, + const char *name, struct lock_class_key *key) { atomic_long_set(&lock->v, 0); init_waitqueue_head(&lock->wait); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + debug_check_no_locks_freed((void *) lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif } +#define two_state_lock_init(_lock) \ +do { \ + static struct lock_class_key __key; \ + __two_state_lock_init((_lock), #_lock, &__key); \ +} while (0) + static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s) { long i = s ? 1 : -1; @@ -31,9 +45,11 @@ static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s) if (atomic_long_sub_return_release(i, &lock->v) == 0) wake_up_all(&lock->wait); + + lock_release(&lock->dep_map, _THIS_IP_); } -static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s) +static inline bool __bch2_two_state_trylock(two_state_lock_t *lock, int s) { long i = s ? 1 : -1; long old; @@ -47,11 +63,20 @@ static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s) return true; } +static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s) +{ + bool ret = __bch2_two_state_trylock(lock, s); + if (ret) + lock_acquire_exclusive(&lock->dep_map, 0, true, NULL, _THIS_IP_); + return ret; +} + void __bch2_two_state_lock(two_state_lock_t *, int); static inline void bch2_two_state_lock(two_state_lock_t *lock, int s) { - if (!bch2_two_state_trylock(lock, s)) + lock_acquire_exclusive(&lock->dep_map, 0, 0, NULL, _THIS_IP_); + if (!__bch2_two_state_trylock(lock, s)) __bch2_two_state_lock(lock, s); } diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 6094b568dd33..6d7303008b19 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -4,6 +4,7 @@ #include "acl.h" #include "bkey_methods.h" #include "btree_update.h" +#include "dirent.h" #include "extents.h" #include "fs.h" #include "rebalance.h" @@ -25,7 +26,7 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info, bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type)); bch2_str_hash_update(&ctx, info, key->name.name, key->name.len); - return bch2_str_hash_end(&ctx, info); + return bch2_str_hash_end(&ctx, info, false); } static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key) @@ -484,6 +485,22 @@ static int inode_opt_set_fn(struct btree_trans *trans, return ret; } + if (s->id == Inode_opt_inodes_32bit && + !bch2_request_incompat_feature(trans->c, bcachefs_metadata_version_31bit_dirent_offset)) { + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + int ret = bch2_empty_dir_trans(trans, inode_inum(inode)); + if (ret < 0) + return ret; + + if (s->defined) + bi->bi_flags |= BCH_INODE_31bit_dirent_offset; + else + bi->bi_flags &= ~BCH_INODE_31bit_dirent_offset; + } + if (s->defined) bi->bi_fields_set |= 1U << s->id; else |