diff options
27 files changed, 241 insertions, 143 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index cdf593c59922..16d08dfb5f19 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -386,14 +386,6 @@ do { \ ##__VA_ARGS__, bch2_err_str(_ret)); \ } while (0) -static inline int __bch2_err_trace(struct bch_fs *c, int err) -{ - trace_error_throw(c, err, _THIS_IP_); - return err; -} - -#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) - /* Parameters that are useful for debugging, but should always be compiled in: */ #define BCH_DEBUG_PARAMS_ALWAYS() \ BCH_DEBUG_PARAM(key_merging_disabled, \ @@ -1153,6 +1145,15 @@ struct bch_fs { struct mutex fsck_error_counts_lock; }; +static inline int __bch2_err_trace(struct bch_fs *c, int err) +{ + this_cpu_inc(c->counters[BCH_COUNTER_error_throw]); + trace_error_throw(c, err, _THIS_IP_); + return err; +} + +#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) + extern struct wait_queue_head bch2_read_only_wait; static inline bool bch2_ro_ref_tryget(struct bch_fs *c) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a8f59522e258..7a0b602c1b27 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1438,9 +1438,9 @@ enum btree_id { */ #define BTREE_ID_NR_MAX 63 -static inline bool btree_id_is_alloc(enum btree_id id) +static inline bool btree_id_is_alloc(enum btree_id btree) { - switch (id) { + switch (btree) { case BTREE_ID_alloc: case BTREE_ID_backpointers: case BTREE_ID_need_discard: @@ -1454,6 +1454,33 @@ static inline bool btree_id_is_alloc(enum btree_id id) } } +/* We can reconstruct these btrees from information in other btrees */ +static inline bool btree_id_can_reconstruct(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return true; + + switch (btree) { + case BTREE_ID_snapshot_trees: + case BTREE_ID_deleted_inodes: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return true; + default: + return false; + } +} + +/* + * We can reconstruct BTREE_ID_alloc, but reconstucting it from scratch is not + * so cheap and OOMs on huge filesystems (until we have online + * check_allocations) + */ +static inline bool btree_id_recovers_from_scan(enum btree_id btree) +{ + return btree == BTREE_ID_alloc || !btree_id_can_reconstruct(btree); +} + #define BTREE_MAX_DEPTH 4U /* Btree nodes */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6b91649688da..ae7d260589d8 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -44,27 +44,6 @@ #include <linux/rcupdate.h> #include <linux/sched/task.h> -/* - * Returns true if it's a btree we can easily reconstruct, or otherwise won't - * cause data loss if it's missing: - */ -static bool btree_id_important(enum btree_id btree) -{ - if (btree_id_is_alloc(btree)) - return false; - - switch (btree) { - case BTREE_ID_quotas: - case BTREE_ID_snapshot_trees: - case BTREE_ID_logged_ops: - case BTREE_ID_rebalance_work: - case BTREE_ID_subvolume_children: - return false; - default: - return true; - } -} - static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -557,45 +536,55 @@ fsck_err: return ret; } -static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, +static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btree, bool *reconstructed_root) { struct bch_fs *c = trans->c; struct btree_root *r = bch2_btree_id_root(c, btree); - CLASS(printbuf, buf)(); - int ret = 0; - - bch2_btree_id_to_text(&buf, btree); - if (r->error) { - bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); + if (!r->error) + return 0; - ret = bch2_btree_has_scanned_nodes(c, btree); - if (ret < 0) - goto err; + CLASS(printbuf, buf)(); + int ret = 0; - if (!ret) { - __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0), - btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + if (!btree_id_recovers_from_scan(btree)) { + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 0); + ret = bch2_btree_lost_data(c, &buf, btree); + bch2_print_str(c, KERN_NOTICE, buf.buf); + goto out; + } - r->alive = false; - r->error = 0; - bch2_btree_root_alloc_fake_trans(trans, btree, 0); - } else { - r->alive = false; - r->error = 0; - bch2_btree_root_alloc_fake_trans(trans, btree, 1); + bch2_btree_id_to_text(&buf, btree); + bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); - if (ret) - return ret; - } + ret = bch2_btree_has_scanned_nodes(c, btree); + if (ret < 0) + goto err; - *reconstructed_root = true; + if (!ret) { + __fsck_err(trans, + FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); + + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 0); + } else { + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 1); + + bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); + if (ret) + return ret; } +out: + *reconstructed_root = true; err: fsck_err: bch_err_fn(c, ret); @@ -613,7 +602,7 @@ int bch2_check_topology(struct bch_fs *c) for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { bool reconstructed_root = false; recover: - ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root)); + ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)); if (ret) break; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8962c481e310..546b559fe3ce 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2366,7 +2366,9 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && + !(iter->flags & BTREE_ITER_nofilter_whiteouts) && + bkey_eq(end, POS_MAX)); ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 4b7b5ca74ba1..b618a0bd1186 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -149,7 +149,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes); } - if (btree_id_is_alloc(BTREE_NODE_ID(bn))) + if (btree_id_can_reconstruct(BTREE_NODE_ID(bn))) return; if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) @@ -534,7 +534,7 @@ int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, unsigned level, struct bpos node_min, struct bpos node_max) { - if (btree_id_is_alloc(btree)) + if (!btree_id_recovers_from_scan(btree)) return 0; struct find_btree_nodes *f = &c->found_btree_nodes; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index f59f018fe0d8..053a837cf241 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -12,7 +12,6 @@ #include "extents.h" #include "keylist.h" #include "snapshot.h" -#include "super-io.h" #include "trace.h" #include <linux/string_helpers.h> @@ -159,21 +158,6 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, return ret; } -static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, const struct bkey *k) -{ - /* - * KEY_TYPE_extent_whiteout indicates that there isn't a real extent - * present at that position: key start positions inclusive of - * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are - * monotonically increasing - */ - return btree_id_is_extents_snapshots(btree) && - bkey_deleted(k) && - !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) - ? KEY_TYPE_extent_whiteout - : KEY_TYPE_whiteout; -} - int bch2_trans_update_extent_overwrite(struct btree_trans *trans, struct btree_iter *iter, enum btree_iter_update_trigger_flags flags, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 663739db82b1..18560ca80057 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -5,6 +5,7 @@ #include "btree_iter.h" #include "journal.h" #include "snapshot.h" +#include "super-io.h" struct bch_fs; struct btree; @@ -110,6 +111,22 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, : 0; } +static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, + const struct bkey *k) +{ + /* + * KEY_TYPE_extent_whiteout indicates that there isn't a real extent + * present at that position: key start positions inclusive of + * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are + * monotonically increasing + */ + return btree_id_is_extents_snapshots(btree) && + bkey_deleted(k) && + !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts) + ? KEY_TYPE_extent_whiteout + : KEY_TYPE_whiteout; +} + int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *, enum btree_iter_update_trigger_flags, struct bkey_s_c, struct bkey_s_c); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 76897cf15946..65ca54c5b0ff 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -336,6 +336,20 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, BUG_ON(b->ob.nr); mutex_lock(&c->btree_reserve_cache_lock); + if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { + guard(spinlock)(&c->freelist_lock); + if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) { + if (cl) + closure_wait(&c->open_buckets_wait, cl); + + ret = cl + ? bch_err_throw(c, bucket_alloc_blocked) + : bch_err_throw(c, open_buckets_empty); + mutex_unlock(&c->btree_reserve_cache_lock); + goto err; + } + } + if (c->btree_reserve_cache_nr > nr_reserve) { for (struct btree_alloc *a = c->btree_reserve_cache; a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index afad11831e1d..755fb25a8eba 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -701,8 +701,16 @@ int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree, struct bkey_i_accounting *k) { struct btree_write_buffer *wb = &c->btree_write_buffer; - struct btree_write_buffered_key new = { .btree = btree }; + if (trace_accounting_key_to_wb_slowpath_enabled()) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "have: %zu\n", wb->accounting.nr); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&k->k_i)); + trace_accounting_key_to_wb_slowpath(c, buf.buf); + } + count_event(c, accounting_key_to_wb_slowpath); + + struct btree_write_buffered_key new = { .btree = btree }; bkey_copy(&new.k, &k->k_i); int ret = darray_push(&wb->accounting, new); diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index e484cd6b90b0..b862bdf67f58 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -95,7 +95,7 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, EBUG_ON(!dst->seq); - return k->k.type == KEY_TYPE_accounting + return bch2_bkey_is_accounting_mem(&k->k) ? bch2_accounting_key_to_wb(c, btree, bkey_i_to_accounting(k)) : __bch2_journal_key_to_wb(c, dst, btree, k); } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 01838a3a189d..b7e0e31407bf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -225,7 +225,7 @@ static void trace_io_move_created_rebalance2(struct data_update *m, trace_io_move_created_rebalance(c, buf.buf); - this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]); + count_event(c, io_move_created_rebalance); } noinline_for_stack @@ -460,17 +460,11 @@ restart_drop_extra_replicas: this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size); if (trace_io_move_finish_enabled()) trace_io_move_finish2(m, &new->k_i, insert); + goto next; err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - ret = 0; - if (ret) + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; -next: - while (bkey_ge(iter.pos, bch2_keylist_front(&op->insert_keys)->k.p)) { - bch2_keylist_pop_front(&op->insert_keys); - if (bch2_keylist_empty(&op->insert_keys)) - goto out; - } + continue; nowork: if (m->stats) { @@ -479,11 +473,15 @@ nowork: atomic64_add(k.k->p.offset - iter.pos.offset, &m->stats->sectors_raced); } - count_event(c, io_move_fail); bch2_btree_iter_advance(&iter); - goto next; +next: + while (bkey_ge(iter.pos, bch2_keylist_front(&op->insert_keys)->k.p)) { + bch2_keylist_pop_front(&op->insert_keys); + if (bch2_keylist_empty(&op->insert_keys)) + goto out; + } } out: BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); @@ -693,6 +691,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, if (ret) return ret; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (data_opts->kill_ec_ptrs & BIT(i)) + bch2_bkey_drop_ec(n, p.ptr.dev); + i++; + } + while (data_opts->kill_ptrs) { unsigned i = 0, drop = __fls(data_opts->kill_ptrs); diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 5e14d13568de..fc12aa65366f 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -12,6 +12,7 @@ struct moving_context; struct data_update_opts { unsigned rewrite_ptrs; unsigned kill_ptrs; + unsigned kill_ec_ptrs; u16 target; u8 extra_replicas; unsigned btree_insert_flags; diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f96530c70262..5944ad6d0f8d 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -184,6 +184,9 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, void *end = &acc_k + 1; int ret = 0; + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + return 0; + bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && bversion_zero(k.k->bversion), c, accounting_key_version_0, diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 43f4b21d0aab..cc73cce98a44 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -145,6 +145,16 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc) acc->type != BCH_DISK_ACCOUNTING_inum; } +static inline bool bch2_bkey_is_accounting_mem(struct bkey *k) +{ + if (k->type != KEY_TYPE_accounting) + return false; + + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k->p); + return bch2_accounting_is_mem(&acc_k); +} + /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 0c3c3a24fc6f..213814787dd6 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -173,7 +173,8 @@ do { \ if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix) && \ !bch2_err_matches(_ret, BCH_ERR_fsck_ignore)) \ ret = _ret; \ - ret = bch_err_throw(c, fsck_delete_bkey); \ + else \ + ret = bch_err_throw(c, fsck_delete_bkey); \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index c4b0ea1adaa8..7ddb156c765c 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -98,11 +98,13 @@ static int count_iters_for_insert(struct btree_trans *trans, return ret2 ?: ret; } -int bch2_extent_atomic_end(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos *end) +int bch2_extent_trim_atomic(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *insert) { - unsigned nr_iters = 0; + enum bch_bkey_type whiteout_type = + extent_whiteout_type(trans->c, iter->btree_id, &insert->k); + struct bpos end = insert->k.p; struct btree_iter copy; bch2_trans_copy_iter(©, iter); @@ -111,42 +113,54 @@ int bch2_extent_atomic_end(struct btree_trans *trans, if (ret) goto err; + copy.flags |= BTREE_ITER_nofilter_whiteouts; + struct bkey_s_c k; - for_each_btree_key_max_continue_norestart(copy, *end, 0, k, ret) { + unsigned nr_iters = 0; + for_each_btree_key_continue_norestart(copy, 0, k, ret) { unsigned offset = 0; if (bkey_gt(iter->pos, bkey_start_pos(k.k))) offset = iter->pos.offset - bkey_start_offset(k.k); - ret = count_iters_for_insert(trans, k, offset, end, &nr_iters); - if (ret) - break; + if (bkey_extent_whiteout(k.k)) { + if (bpos_gt(k.k->p, insert->k.p)) { + if (k.k->type == KEY_TYPE_extent_whiteout) + break; + else + continue; + } else if (k.k->type != whiteout_type) { + nr_iters += 1; + if (nr_iters >= EXTENT_ITERS_MAX) { + end = bpos_min(end, k.k->p); + break; + } + } + } else { + if (bpos_ge(bkey_start_pos(k.k), end)) + break; + + ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters); + if (ret) + break; + } } err: bch2_trans_iter_exit(©); - return ret < 0 ? ret : 0; -} - -int bch2_extent_trim_atomic(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *k) -{ - struct bpos end = k->k.p; - int ret = bch2_extent_atomic_end(trans, iter, &end); - if (ret) + if (ret < 0) return ret; /* tracepoint */ - if (bpos_lt(end, k->k.p)) { + if (bpos_lt(end, insert->k.p)) { if (trace_extent_trim_atomic_enabled()) { CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, end); prt_newline(&buf); - bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(insert)); trace_extent_trim_atomic(trans->c, buf.buf); } - bch2_cut_back(end, k); + bch2_cut_back(end, insert); } return 0; } diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h index 34467db53f45..2d956d971b11 100644 --- a/fs/bcachefs/extent_update.h +++ b/fs/bcachefs/extent_update.h @@ -4,8 +4,6 @@ #include "bcachefs.h" -int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *, - struct bpos *); int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *, struct bkey_i *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index b879a586b7f6..7ab0398707d8 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -995,6 +995,22 @@ void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) bch2_bkey_drop_ptrs_noerror(k, ptr, ptr->dev == dev); } +void bch2_bkey_drop_ec(struct bkey_i *k, unsigned dev) +{ + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); + union bch_extent_entry *entry, *ec = NULL; + + bkey_extent_entry_for_each(ptrs, entry) { + if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) + ec = entry; + else if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_ptr && + entry->ptr.dev == dev) { + bch2_bkey_extent_entry_drop(k, ec); + return; + } + } +} + const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -1757,3 +1773,4 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64)); return -val_u64s_delta; } + diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 35ee03cd5065..f6dcb17108cd 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -650,6 +650,7 @@ void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned); void bch2_bkey_drop_device(struct bkey_s, unsigned); +void bch2_bkey_drop_ec(struct bkey_i *k, unsigned); #define bch2_bkey_drop_ptrs_noerror(_k, _ptr, _cond) \ do { \ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 0005569ecace..ed8329c6c00d 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -290,7 +290,8 @@ void bch2_readahead(struct readahead_control *ractl) * scheduling. */ blk_start_plug(&plug); - bch2_pagecache_add_get(inode); + if (!bch2_pagecache_add_tryget(inode)) + goto out; struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { @@ -317,6 +318,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_put(trans); bch2_pagecache_add_put(inode); +out: blk_finish_plug(&plug); darray_exit(&readpages_iter.folios); } @@ -759,7 +761,6 @@ int bch2_write_end(struct file *file, struct address_space *mapping, struct bch2_folio_reservation *res = fsdata; unsigned offset = pos - folio_pos(folio); - lockdep_assert_held(&inode->v.i_rwsem); BUG_ON(offset + copied > folio_size(folio)); if (unlikely(copied < len && !folio_test_uptodate(folio))) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 76d2647d9500..0425238a83ee 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -826,14 +826,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_MTIME); - if (inode_u.bi_subvol) { - /* - * Subvolume deletion is asynchronous, but we still want to tell - * the VFS that it's been deleted here: - */ - set_nlink(&inode->v, 0); - } - if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); err: @@ -865,9 +857,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, if (IS_ERR(inode)) return bch2_err_class(PTR_ERR(inode)); - inode_lock(&inode->v); ret = page_symlink(&inode->v, symname, strlen(symname) + 1); - inode_unlock(&inode->v); if (unlikely(ret)) goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 30fe269d531d..df6833416855 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -150,7 +150,7 @@ static void move_write_done(struct bch_write_op *op) bch2_write_op_to_text(&buf, op); trace_io_move_write_fail(c, buf.buf); } - this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); + count_event(c, io_move_write_fail); ctxt->write_error = true; } @@ -344,7 +344,7 @@ int bch2_move_extent(struct moving_context *ctxt, if (!data_opts.rewrite_ptrs && !data_opts.extra_replicas && !data_opts.scrub) { - if (data_opts.kill_ptrs) { + if (data_opts.kill_ptrs|data_opts.kill_ec_ptrs) { this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size); return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); } else { @@ -542,7 +542,7 @@ int bch2_move_ratelimit(struct moving_context *ctxt) if (ctxt->wait_on_copygc && c->copygc_running) { bch2_moving_ctxt_flush_all(ctxt); - wait_event_killable(c->copygc_running_wq, + wait_event_freezable(c->copygc_running_wq, !c->copygc_running || (is_kthread && kthread_should_stop())); } @@ -1280,7 +1280,17 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, i++; } - return data_opts->kill_ptrs != 0; + i = 0; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (p.has_ec && durability - p.ec.redundancy >= replicas) { + data_opts->kill_ec_ptrs |= BIT(i); + durability -= p.ec.redundancy; + } + + i++; + } + + return (data_opts->kill_ptrs|data_opts->kill_ec_ptrs) != 0; } static bool scrub_pred(struct bch_fs *c, void *_arg, diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 17ca56b0e2ac..e1db63d75a99 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -444,8 +444,9 @@ static int do_rebalance_extent(struct moving_context *ctxt, bch2_bkey_buf_init(&sk); - ret = bkey_err(k = next_rebalance_extent(trans, work_pos, - extent_iter, &io_opts, &data_opts)); + ret = lockrestart_do(trans, + bkey_err(k = next_rebalance_extent(trans, work_pos, + extent_iter, &io_opts, &data_opts))); if (ret || !k.k) goto out; @@ -587,7 +588,7 @@ static int do_rebalance(struct moving_context *ctxt) ret = k->k.type == KEY_TYPE_cookie ? do_rebalance_scan(ctxt, k->k.p.inode, le64_to_cpu(bkey_i_to_cookie(k)->v.cookie)) - : lockrestart_do(trans, do_rebalance_extent(ctxt, k->k.p, &extent_iter)); + : do_rebalance_extent(ctxt, k->k.p, &extent_iter); if (ret) break; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 21aa2edb13ac..29e81f96db0f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -607,7 +607,7 @@ static int read_btree_roots(struct bch_fs *c) c, btree_root_read_error, "error reading btree root %s: %s", buf.buf, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) + if (btree_id_can_reconstruct(i)) r->error = 0; ret = 0; } diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index f3ea53a55384..44bc12573a0c 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -101,7 +101,9 @@ enum counters_flags { x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \ x(trans_restart_split_race, 76, TYPE_COUNTER) \ x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \ - x(write_buffer_flush_sync, 78, TYPE_COUNTER) + x(write_buffer_flush_sync, 78, TYPE_COUNTER) \ + x(accounting_key_to_wb_slowpath, 94, TYPE_COUNTER) \ + x(error_throw, 93, TYPE_COUNTER) enum bch_persistent_counters { #define x(t, n, ...) BCH_COUNTER_##t, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ef15e614f4f3..09e7f8ae9922 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -2542,11 +2542,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, BUG_ON(darray_push(&sbs, sb)); } - if (opts->nochanges && !opts->read_only) { - ret = bch_err_throw(c, erofs_nochanges); - goto err_print; - } - darray_for_each(sbs, sb) if (!best || sb_cmp(sb->sb, best->sb) > 0) best = sb; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 3776a1403104..269cdf1a87a4 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1179,6 +1179,11 @@ DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_ARGS(trans, caller_ip) ); +DEFINE_EVENT(fs_str, accounting_key_to_wb_slowpath, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + TRACE_EVENT(path_downgrade, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, |