diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/alloc_background.c | 44 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/alloc_types.h | 10 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 40 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/data_update.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/extent_update.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 47 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/fs-io-buffered.c | 56 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 17 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/io_read.c | 55 | ||||
-rw-r--r-- | fs/bcachefs/migrate.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/sb-counters_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/sb-errors_format.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 36 |
27 files changed, 246 insertions, 160 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index cab4d6798dd7..21cdc42eff46 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1771,13 +1771,6 @@ static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) darray_remove_item(&ca->discard_buckets_in_flight, i); } -struct discard_buckets_state { - u64 seen; - u64 open; - u64 need_journal_commit; - u64 discarded; -}; - static int bch2_discard_one_bucket(struct btree_trans *trans, struct bch_dev *ca, struct btree_iter *need_discard_iter, @@ -1790,6 +1783,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, bool discard_locked = false; int ret = 0; + s->seen++; + if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { s->open++; return 0; @@ -1800,6 +1795,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, if (seq_ready > c->journal.flushed_seq_ondisk) { if (seq_ready > c->journal.flushing_seq) s->need_journal_commit++; + else + s->commit_in_flight++; return 0; } @@ -1815,6 +1812,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, return ret; if (a->v.data_type != BCH_DATA_need_discard) { + s->bad_data_type++; + if (need_discard_or_freespace_err(trans, k, true, true, true)) { ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); if (ret) @@ -1826,8 +1825,10 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } if (!fastpath) { - if (discard_in_flight_add(ca, iter.pos.offset, true)) + if (discard_in_flight_add(ca, iter.pos.offset, true)) { + s->already_discarding++; goto out; + } discard_locked = true; } @@ -1861,6 +1862,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, commit: ret = bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| + BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc); if (ret) goto out; @@ -1873,14 +1875,11 @@ out: fsck_err: if (discard_locked) discard_in_flight_remove(ca, iter.pos.offset); - if (!ret) - s->seen++; return ret; } -static void bch2_do_discards_work(struct work_struct *work) +static void __bch2_dev_do_discards(struct bch_dev *ca) { - struct bch_dev *ca = container_of(work, struct bch_dev, discard_work); struct bch_fs *c = ca->fs; struct discard_buckets_state s = {}; struct bpos discard_pos_done = POS_MAX; @@ -1901,10 +1900,25 @@ static void bch2_do_discards_work(struct work_struct *work) if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal)) bch2_journal_flush_async(&c->journal, NULL); - trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, - bch2_err_str(ret)); + trace_discard_buckets(c, &s, bch2_err_str(ret)); enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards); +} + +void bch2_do_discards_going_ro(struct bch_fs *c) +{ + for_each_member_device(c, ca) + if (bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_discards)) + __bch2_dev_do_discards(ca); +} + +static void bch2_do_discards_work(struct work_struct *work) +{ + struct bch_dev *ca = container_of(work, struct bch_dev, discard_work); + struct bch_fs *c = ca->fs; + + __bch2_dev_do_discards(ca); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard); } @@ -1992,7 +2006,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) break; } - trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); + trace_discard_buckets_fast(c, &s, bch2_err_str(ret)); bch2_trans_put(trans); enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index c2e8482fbbe6..a602507fef19 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -320,6 +320,7 @@ static inline int bch2_check_discard_freespace_key_async(struct btree_trans *tra int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_dev_do_discards(struct bch_dev *); +void bch2_do_discards_going_ro(struct bch_fs *); void bch2_do_discards(struct bch_fs *); static inline u64 should_invalidate_buckets(struct bch_dev *ca, diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 3d125ee81663..97b627ed3b22 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1529,6 +1529,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) printbuf_tabstop_push(out, 24); prt_printf(out, "capacity\t%llu\n", c->capacity); + prt_printf(out, "used\t%llu\n", bch2_fs_usage_read_short(c).used); prt_printf(out, "reserved\t%llu\n", c->reserved); prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden)); prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree)); diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index e7becdf22cba..ee52b66dc5d7 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -118,4 +118,14 @@ struct write_point_specifier { unsigned long v; }; +struct discard_buckets_state { + u64 seen; + u64 open; + u64 need_journal_commit; + u64 commit_in_flight; + u64 bad_data_type; + u64 already_discarding; + u64 discarded; +}; + #endif /* _BCACHEFS_ALLOC_TYPES_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 553031a3b06a..83d6ab9c1a91 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -458,7 +458,6 @@ BCH_DEBUG_PARAMS_ALL() x(btree_node_compact) \ x(btree_node_merge) \ x(btree_node_sort) \ - x(btree_node_get) \ x(btree_node_read) \ x(btree_node_read_done) \ x(btree_node_write) \ @@ -466,10 +465,6 @@ BCH_DEBUG_PARAMS_ALL() x(btree_interior_update_total) \ x(btree_gc) \ x(data_write) \ - x(data_write_to_submit) \ - x(data_write_to_queue) \ - x(data_write_to_btree_update) \ - x(data_write_btree_update) \ x(data_read) \ x(data_promote) \ x(journal_flush_write) \ @@ -483,6 +478,7 @@ BCH_DEBUG_PARAMS_ALL() x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ + x(blocked_writeback_throttle) \ x(nocow_lock_contended) enum bch_time_stats { diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 0839397105a9..269a373f3e80 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -965,7 +965,8 @@ enum bch_sb_feature { x(alloc_info, 0) \ x(alloc_metadata, 1) \ x(extents_above_btree_updates_done, 2) \ - x(bformat_overflow_done, 3) + x(bformat_overflow_done, 3) \ + x(no_stale_ptrs, 4) enum bch_sb_compat { #define x(f, n) BCH_COMPAT_##f, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 75d73677c4d8..da1a1a21586e 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -344,15 +344,6 @@ void bch2_bkey_swab_val(struct bkey_s k) ops->swab(k); } -bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k) -{ - const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); - - return ops->key_normalize - ? ops->key_normalize(c, k) - : false; -} - bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) { const struct bkey_ops *ops = bch2_bkey_type_ops(l.k->type); diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index bf34111cdf00..5adce4e9294b 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -26,7 +26,6 @@ struct bkey_ops { void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); - bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); int (*trigger)(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, @@ -66,8 +65,6 @@ void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *, void bch2_bkey_swab_val(struct bkey_s); -bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); - static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r) { return l->type == r->type && diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2338feb8d8ed..f45aa34d22de 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1140,43 +1140,11 @@ static int gc_btree_gens_key(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); if (unlikely(test_bit(BCH_FS_going_ro, &c->flags))) return -EROFS; - bool too_stale = false; - scoped_guard(rcu) { - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); - if (!ca) - continue; - - too_stale |= dev_ptr_stale(ca, ptr) > 16; - } - - if (!too_stale) - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); - if (!ca) - continue; - - u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)]; - if (gen_after(*gen, ptr->gen)) - *gen = ptr->gen; - } - } - - if (too_stale) { - struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k, 0); - int ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - bch2_extent_normalize(c, bkey_i_to_s(u)); - } - - return 0; + return bch2_bkey_drop_stale_ptrs(trans, iter, k); } static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev *ca, @@ -1281,6 +1249,12 @@ int bch2_gc_gens(struct bch_fs *c) bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); trace_and_count(c, gc_gens_end, c); + + if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs))) { + guard(mutex)(&c->sb_lock); + c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs)); + bch2_write_super(c); + } err: for_each_member_device(c, ca) { kvfree(ca->oldest_gen); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index a8cd7a5a6e7d..ce86d158aa8e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -702,8 +702,10 @@ static void btree_update_nodes_written(struct btree_update *as) if (ret) goto err; - if (!btree_update_new_nodes_marked_sb(as)) + if (!btree_update_new_nodes_marked_sb(as)) { + bch2_trans_unlock_long(trans); btree_update_new_nodes_mark_sb(as); + } /* * Wait for any in flight writes to finish before we free the old nodes diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 021f5cb7998d..00b95841b243 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -462,6 +462,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); CLASS(printbuf, buf)(); bool inserting = sectors > 0; + int ret = 0; BUG_ON(!sectors); @@ -489,8 +490,17 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, BCH_FSCK_ERR_ptr_too_stale); } - if (b_gen != ptr->gen && ptr->cached) + if (b_gen != ptr->gen && ptr->cached) { + if (fsck_err_on(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs), + trans, stale_ptr_with_no_stale_ptrs_feature, + "stale cached ptr, but have no_stale_ptrs feature\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + guard(mutex)(&c->sb_lock); + c->disk_sb.sb->compat[0] &= ~cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs)); + bch2_write_super(c); + } return 1; + } if (unlikely(b_gen != ptr->gen)) { bch2_log_msg_start(c, &buf); @@ -530,7 +540,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, } *bucket_sectors += sectors; - return 0; +fsck_err: + return ret; } void bch2_trans_account_disk_usage_change(struct btree_trans *trans) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 7a0da6cdf78c..ca925c5d1a48 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -393,7 +393,7 @@ restart_drop_extra_replicas: bch2_extent_ptr_decoded_append(insert, &p); bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); - bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert)); + bch2_bkey_drop_extra_cached_ptrs(c, &m->op.opts, bkey_i_to_s(insert)); ret = bch2_sum_sector_overwrites(trans, &iter, insert, &should_check_enospc, @@ -721,7 +721,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, * will do the appropriate thing with it (turning it into a * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n)); + bch2_bkey_drop_extra_cached_ptrs(c, io_opts, bkey_i_to_s(n)); /* * Since we're not inserting through an extent iterator diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index adc1f9315eab..420f6922dacb 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -345,6 +345,7 @@ x(BCH_ERR_data_read, data_read_no_encryption_key) \ x(BCH_ERR_data_read, data_read_buffer_too_small) \ x(BCH_ERR_data_read, data_read_key_overwritten) \ + x(0, rbio_narrow_crcs_fail) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 73eb28090bc7..1279026b4c1e 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -146,6 +146,7 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, if (bpos_ge(bkey_start_pos(k.k), end)) break; + nr_iters += 1; ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters); if (ret) break; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 86aa93ea2345..43367d4e671a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -12,6 +12,7 @@ #include "btree_gc.h" #include "btree_io.h" #include "btree_iter.h" +#include "btree_update.h" #include "buckets.h" #include "checksum.h" #include "compress.h" @@ -1213,6 +1214,21 @@ drop: bch2_bkey_drop_ptr_noerror(k, ptr); } +static bool bch2_bkey_has_stale_ptrs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_dev *ca; + + guard(rcu)(); + bkey_for_each_ptr(ptrs, ptr) + if (ptr->cached && + (ca = bch2_dev_rcu_noerror(c, ptr->dev)) && + dev_ptr_stale_rcu(ca, ptr) > 0) + return true; + + return false; +} + /* * bch2_extent_normalize - clean up an extent, dropping stale pointers etc. * @@ -1221,7 +1237,7 @@ drop: * For existing keys, only called when btree nodes are being rewritten, not when * they're merely being compacted/resorted in memory. */ -bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) +static void __bch2_bkey_drop_stale_ptrs(struct bch_fs *c, struct bkey_s k) { struct bch_dev *ca; @@ -1230,19 +1246,26 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) ptr->cached && (!(ca = bch2_dev_rcu_noerror(c, ptr->dev)) || dev_ptr_stale_rcu(ca, ptr) > 0)); +} + +int bch2_bkey_drop_stale_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) +{ + if (!bch2_bkey_has_stale_ptrs(trans->c, k)) { + struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k, + BTREE_UPDATE_internal_snapshot_node); + int ret = PTR_ERR_OR_ZERO(u); + if (ret) + return ret; + + __bch2_bkey_drop_stale_ptrs(trans->c, bkey_i_to_s(u)); + } - return bkey_deleted(k.k); + return 0; } -/* - * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc. - * - * Like bch2_extent_normalize(), but also only keeps a single cached pointer on - * the promote target. - */ -bool bch2_extent_normalize_by_opts(struct bch_fs *c, - struct bch_inode_opts *opts, - struct bkey_s k) +void bch2_bkey_drop_extra_cached_ptrs(struct bch_fs *c, + struct bch_inode_opts *opts, + struct bkey_s k) { struct bkey_ptrs ptrs; bool have_cached_ptr; @@ -1260,8 +1283,6 @@ restart_drop_ptrs: } have_cached_ptr = true; } - - return bkey_deleted(k.k); } void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 03ea7c689d9a..1ea9752bfe95 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -440,7 +440,6 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ - .key_normalize = bch2_extent_normalize, \ .key_merge = bch2_extent_merge, \ .trigger = bch2_trigger_extent, \ }) @@ -689,8 +688,8 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s); void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_inode_opts *, struct bkey_s, struct bch_extent_ptr *); -bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_inode_opts *, struct bkey_s); -bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +int bch2_bkey_drop_stale_ptrs(struct btree_trans *, struct btree_iter *, struct bkey_s_c); +void bch2_bkey_drop_extra_cached_ptrs(struct bch_fs *, struct bch_inode_opts *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index aab30571b056..fe684adca370 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -532,6 +532,39 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); } +static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closure *cl) +{ + unsigned reserved = OPEN_BUCKETS_COUNT - + (OPEN_BUCKETS_COUNT - bch2_open_buckets_reserved(BCH_WATERMARK_normal)) / 2; + + if (unlikely(c->open_buckets_nr_free <= reserved)) { + closure_wait(&c->open_buckets_wait, cl); + return false; + } + + if (BCH_WATERMARK_normal < c->journal.watermark && !bch2_journal_error(&c->journal)) { + closure_wait(&c->journal.async_wait, cl); + return false; + } + + return true; +} + +static void throttle_writes(struct bch_fs *c, unsigned replicas_want, struct closure *cl) +{ + u64 start = 0; + while (!can_write_now(c, replicas_want, cl)) { + if (!start) + start = local_clock(); + closure_sync(cl); + } + + BUG_ON(closure_nr_remaining(cl) > 1); + + if (start) + bch2_time_stats_update(&c->times[BCH_TIME_blocked_writeback_throttle], start); +} + static int __bch2_writepage(struct folio *folio, struct writeback_control *wbc, void *data) @@ -667,17 +700,6 @@ do_io: return 0; } -static int bch2_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, void *data) -{ - struct folio *folio = NULL; - int error; - - while ((folio = writeback_iter(mapping, wbc, folio, &error))) - error = __bch2_writepage(folio, wbc, data); - return error; -} - int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct bch_fs *c = mapping->host->i_sb->s_fs_info; @@ -686,7 +708,17 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc bch2_inode_opts_get_inode(c, &to_bch_ei(mapping->host)->ei_inode, &w->opts); blk_start_plug(&w->plug); - int ret = bch2_write_cache_pages(mapping, wbc, w); + + struct closure cl; + closure_init_stack(&cl); + + struct folio *folio = NULL; + int ret = 0; + + while (throttle_writes(c, w->opts.data_replicas, &cl), + (folio = writeback_iter(mapping, wbc, folio, &ret))) + ret = __bch2_writepage(folio, wbc, w); + if (w->io) bch2_writepage_do_io(w); blk_finish_plug(&w->plug); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c7bb5b108e2f..d6a2031e17e8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2147,9 +2147,11 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); int ret = bch2_inode_rm(c, inode_inum(inode)); if (ret && !bch2_err_matches(ret, EROFS)) { - bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu", - inode->ei_inum.subvol, - inode->ei_inum.inum); + CLASS(printbuf, buf)(); + bch2_trans_do(c, bch2_inum_to_path(trans, inode->ei_inum, &buf)); + + bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu\n%s", + inode->ei_inum.subvol, inode->ei_inum.inum, buf.buf); bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm); } @@ -2236,11 +2238,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) struct bch_fs *c = sb->s_fs_info; struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); unsigned shift = sb->s_blocksize_bits - 9; + /* - * this assumes inodes take up 64 bytes, which is a decent average + * This assumes inodes take up 64 bytes, which is a decent average * number: + * + * Not anymore - bi_dir, bi_dir_offset came later and shouldn't have + * been varint fields: seeing 144-160 byte inodes, so let's call it 256 + * bytes: */ - u64 avail_inodes = ((usage.capacity - usage.used) << 3); + u64 avail_inodes = ((usage.capacity - usage.used) << 1); buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index ccc44b1fc178..3bde5c07b528 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1963,7 +1963,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, } } - ret = check_extent_overbig(trans, iter, k); + ret = check_extent_overbig(trans, iter, k) ?: + bch2_bkey_drop_stale_ptrs(trans, iter, k); if (ret) goto err; @@ -2040,7 +2041,8 @@ int bch2_check_indirect_extents(struct bch_fs *c) BCH_TRANS_COMMIT_no_enospc, ({ progress_update_iter(trans, &progress, &iter); bch2_disk_reservation_put(c, &res); - check_extent_overbig(trans, &iter, k); + check_extent_overbig(trans, &iter, k) ?: + bch2_bkey_drop_stale_ptrs(trans, &iter, k); })); bch2_disk_reservation_put(c, &res); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 655ed90b2a39..543627fb58be 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1359,7 +1359,7 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - return ret ?: bch_err_throw(c, transaction_restart_nested); + return ret; } /* @@ -1398,7 +1398,8 @@ next_parent: int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { return __bch2_inode_rm_snapshot(trans, inum, snapshot) ?: - delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); + delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)) ?: + bch_err_throw(trans->c, transaction_restart_nested); } static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 7066be2701c0..e7ba0d0bf5ef 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -740,15 +740,13 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, } static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, - struct bch_read_bio *rbio) + struct bch_read_bio *rbio, + struct bch_extent_crc_unpacked *new_crc) { struct bch_fs *c = rbio->c; u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; int ret = 0; - if (crc_is_compressed(rbio->pick.crc)) - return 0; - CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) @@ -756,21 +754,12 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (bversion_cmp(k.k->bversion, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) - return 0; + return bch_err_throw(c, rbio_narrow_crcs_fail); - /* Extent was merged? */ - if (bkey_start_offset(k.k) < data_offset || - k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) - return 0; - - struct bch_extent_crc_unpacked new_crc; - if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, - rbio->pick.crc, NULL, &new_crc, - bkey_start_offset(k.k) - data_offset, k.k->size, - rbio->pick.crc.csum_type)) { - bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); - return 0; - } + /* Extent was trimmed/merged? */ + if (!bpos_eq(bkey_start_pos(k.k), rbio->data_pos) || + k.k->p.offset != rbio->data_pos.offset + rbio->pick.crc.live_size) + return bch_err_throw(c, rbio_narrow_crcs_fail); /* * going to be temporarily appending another checksum entry: @@ -782,17 +771,37 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, bkey_reassemble(new, k); - if (!bch2_bkey_narrow_crcs(new, new_crc)) - return 0; + if (!bch2_bkey_narrow_crcs(new, *new_crc)) + return bch_err_throw(c, rbio_narrow_crcs_fail); return bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node); } static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - CLASS(btree_trans, trans)(rbio->c); - commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + struct bch_fs *c = rbio->c; + + if (crc_is_compressed(rbio->pick.crc)) + return; + + u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; + + struct bch_extent_crc_unpacked new_crc; + if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, + rbio->pick.crc, NULL, &new_crc, + rbio->data_pos.offset - data_offset, rbio->pick.crc.live_size, + rbio->pick.crc.csum_type)) { + bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); + return; + } + + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio, &new_crc)); + if (!ret) + count_event(c, io_read_narrow_crcs); + else if (ret == -BCH_ERR_rbio_narrow_crcs_fail) + count_event(c, io_read_narrow_crcs_fail); } static void bch2_read_decompress_err(struct work_struct *work) diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 8a3981e1016e..519ef16669e4 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -84,13 +84,6 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, return ret; /* - * If the new extent no longer has any pointers, bch2_extent_normalize() - * will do the appropriate thing with it (turning it into a - * KEY_TYPE_error key, or just a discard if it was a cached extent) - */ - bch2_extent_normalize(c, bkey_i_to_s(n)); - - /* * Since we're not inserting through an extent iterator * (BTREE_ITER_all_snapshots iterators aren't extent iterators), * we aren't using the extent overwrite path to delete, we're diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 531c2ef128ae..6942d3cfcba3 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -920,6 +920,13 @@ use_clean: if (bch2_blacklist_entries_gc(c)) write_sb = true; + if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) && + (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_extents)) && + (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_indirect_extents))) { + c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs)); + write_sb = true; + } + if (write_sb) bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -982,8 +989,9 @@ int bch2_fs_initialize(struct bch_fs *c) set_bit(BCH_FS_new_fs, &c->flags); scoped_guard(mutex, &c->sb_lock) { - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); + c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_extents_above_btree_updates_done)); + c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_bformat_overflow_done)); + c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs)); bch2_check_version_downgrade(c); diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index 17cd617664d9..3907ba7edff2 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -23,6 +23,8 @@ enum counters_flags { x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ x(io_read_fail_and_poison, 95, TYPE_COUNTER) \ + x(io_read_narrow_crcs, 97, TYPE_COUNTER) \ + x(io_read_narrow_crcs_fail, 98, TYPE_COUNTER) \ x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 7c6f18a1ee2a..77e3fc92e39b 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -160,7 +160,7 @@ enum bch_fsck_flags { x(extent_ptrs_unwritten, 140, 0) \ x(extent_ptrs_written_and_unwritten, 141, 0) \ x(ptr_to_invalid_device, 142, 0) \ - x(ptr_to_removed_device, 322, 0) \ + x(ptr_to_removed_device, 322, FSCK_AUTOFIX) \ x(ptr_to_duplicate_device, 143, 0) \ x(ptr_after_last_bucket, 144, 0) \ x(ptr_before_first_bucket, 145, 0) \ @@ -170,9 +170,10 @@ enum bch_fsck_flags { x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ - x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \ + x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \ x(ptr_too_stale, 153, 0) \ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ + x(stale_ptr_with_no_stale_ptrs_feature, 327, FSCK_AUTOFIX) \ x(ptr_bucket_data_type_mismatch, 155, 0) \ x(ptr_cached_and_erasure_coded, 156, 0) \ x(ptr_crc_uncompressed_size_too_small, 157, 0) \ @@ -338,7 +339,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 327, 0) + x(MAX, 328, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index de1e8912975c..c442d7507f83 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -322,6 +322,8 @@ static void __bch2_fs_read_only(struct bch_fs *c) do { clean_passes++; + bch2_do_discards_going_ro(c); + if (bch2_btree_interior_updates_flush(c) || bch2_btree_write_buffer_flush_going_ro(c) || bch2_journal_flush_all_pins(&c->journal) || @@ -833,8 +835,6 @@ int bch2_fs_init_rw(struct bch_fs *c) if (test_bit(BCH_FS_rw_init_done, &c->flags)) return 0; - bch_verbose(c, "doing rw allocations"); - if (!(c->btree_update_wq = alloc_workqueue("bcachefs", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete", @@ -1211,12 +1211,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch2_opts_apply(&c->opts, *opts); +#ifdef __KERNEL__ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && c->opts.block_size > PAGE_SIZE) { bch_err(c, "cannot mount bs > ps filesystem without CONFIG_TRANSPARENT_HUGEPAGE"); ret = -EINVAL; goto err; } +#endif c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; if (c->opts.inodes_use_key_cache) @@ -1991,7 +1993,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err) { unsigned dev_idx = ca->dev_idx, data; - bool fast_device_removal = !bch2_request_incompat_feature(c, + bool fast_device_removal = (c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) && + !bch2_request_incompat_feature(c, bcachefs_metadata_version_fast_device_removal); int ret; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 269cdf1a87a4..6c312fd9a447 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -720,47 +720,55 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail, ); DECLARE_EVENT_CLASS(discard_buckets_class, - TP_PROTO(struct bch_fs *c, u64 seen, u64 open, - u64 need_journal_commit, u64 discarded, const char *err), - TP_ARGS(c, seen, open, need_journal_commit, discarded, err), + TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err), + TP_ARGS(c, s, err), TP_STRUCT__entry( __field(dev_t, dev ) __field(u64, seen ) __field(u64, open ) __field(u64, need_journal_commit ) + __field(u64, commit_in_flight ) + __field(u64, bad_data_type ) + __field(u64, already_discarding ) __field(u64, discarded ) __array(char, err, 16 ) ), TP_fast_assign( __entry->dev = c->dev; - __entry->seen = seen; - __entry->open = open; - __entry->need_journal_commit = need_journal_commit; - __entry->discarded = discarded; + __entry->seen = s->seen; + __entry->open = s->open; + __entry->need_journal_commit = s->need_journal_commit; + __entry->commit_in_flight = s->commit_in_flight; + __entry->bad_data_type = s->bad_data_type; + __entry->already_discarding = s->already_discarding; + __entry->discarded = s->discarded; strscpy(__entry->err, err, sizeof(__entry->err)); ), - TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s", + TP_printk("%d%d seen %llu open %llu\n" + "need_commit %llu committing %llu bad_data_type %llu\n" + "already_discarding %llu discarded %llu err %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->seen, __entry->open, __entry->need_journal_commit, + __entry->commit_in_flight, + __entry->bad_data_type, + __entry->already_discarding, __entry->discarded, __entry->err) ); DEFINE_EVENT(discard_buckets_class, discard_buckets, - TP_PROTO(struct bch_fs *c, u64 seen, u64 open, - u64 need_journal_commit, u64 discarded, const char *err), - TP_ARGS(c, seen, open, need_journal_commit, discarded, err) + TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err), + TP_ARGS(c, s, err) ); DEFINE_EVENT(discard_buckets_class, discard_buckets_fast, - TP_PROTO(struct bch_fs *c, u64 seen, u64 open, - u64 need_journal_commit, u64 discarded, const char *err), - TP_ARGS(c, seen, open, need_journal_commit, discarded, err) + TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err), + TP_ARGS(c, s, err) ); TRACE_EVENT(bucket_invalidate, |