summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/alloc_background.c44
-rw-r--r--fs/bcachefs/alloc_background.h1
-rw-r--r--fs/bcachefs/alloc_foreground.c1
-rw-r--r--fs/bcachefs/alloc_types.h10
-rw-r--r--fs/bcachefs/bcachefs.h6
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/bkey_methods.c9
-rw-r--r--fs/bcachefs/bkey_methods.h3
-rw-r--r--fs/bcachefs/btree_gc.c40
-rw-r--r--fs/bcachefs/btree_update_interior.c4
-rw-r--r--fs/bcachefs/buckets.c15
-rw-r--r--fs/bcachefs/data_update.c4
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/extent_update.c1
-rw-r--r--fs/bcachefs/extents.c47
-rw-r--r--fs/bcachefs/extents.h5
-rw-r--r--fs/bcachefs/fs-io-buffered.c56
-rw-r--r--fs/bcachefs/fs.c17
-rw-r--r--fs/bcachefs/fsck.c6
-rw-r--r--fs/bcachefs/inode.c5
-rw-r--r--fs/bcachefs/io_read.c55
-rw-r--r--fs/bcachefs/migrate.c7
-rw-r--r--fs/bcachefs/recovery.c12
-rw-r--r--fs/bcachefs/sb-counters_format.h2
-rw-r--r--fs/bcachefs/sb-errors_format.h7
-rw-r--r--fs/bcachefs/super.c9
-rw-r--r--fs/bcachefs/trace.h36
27 files changed, 246 insertions, 160 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index cab4d6798dd7..21cdc42eff46 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1771,13 +1771,6 @@ static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
darray_remove_item(&ca->discard_buckets_in_flight, i);
}
-struct discard_buckets_state {
- u64 seen;
- u64 open;
- u64 need_journal_commit;
- u64 discarded;
-};
-
static int bch2_discard_one_bucket(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *need_discard_iter,
@@ -1790,6 +1783,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
bool discard_locked = false;
int ret = 0;
+ s->seen++;
+
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
s->open++;
return 0;
@@ -1800,6 +1795,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
if (seq_ready > c->journal.flushed_seq_ondisk) {
if (seq_ready > c->journal.flushing_seq)
s->need_journal_commit++;
+ else
+ s->commit_in_flight++;
return 0;
}
@@ -1815,6 +1812,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
return ret;
if (a->v.data_type != BCH_DATA_need_discard) {
+ s->bad_data_type++;
+
if (need_discard_or_freespace_err(trans, k, true, true, true)) {
ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false);
if (ret)
@@ -1826,8 +1825,10 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
}
if (!fastpath) {
- if (discard_in_flight_add(ca, iter.pos.offset, true))
+ if (discard_in_flight_add(ca, iter.pos.offset, true)) {
+ s->already_discarding++;
goto out;
+ }
discard_locked = true;
}
@@ -1861,6 +1862,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
commit:
ret = bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto out;
@@ -1873,14 +1875,11 @@ out:
fsck_err:
if (discard_locked)
discard_in_flight_remove(ca, iter.pos.offset);
- if (!ret)
- s->seen++;
return ret;
}
-static void bch2_do_discards_work(struct work_struct *work)
+static void __bch2_dev_do_discards(struct bch_dev *ca)
{
- struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
struct bch_fs *c = ca->fs;
struct discard_buckets_state s = {};
struct bpos discard_pos_done = POS_MAX;
@@ -1901,10 +1900,25 @@ static void bch2_do_discards_work(struct work_struct *work)
if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
bch2_journal_flush_async(&c->journal, NULL);
- trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
- bch2_err_str(ret));
+ trace_discard_buckets(c, &s, bch2_err_str(ret));
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
+}
+
+void bch2_do_discards_going_ro(struct bch_fs *c)
+{
+ for_each_member_device(c, ca)
+ if (bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_discards))
+ __bch2_dev_do_discards(ca);
+}
+
+static void bch2_do_discards_work(struct work_struct *work)
+{
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
+ struct bch_fs *c = ca->fs;
+
+ __bch2_dev_do_discards(ca);
+
enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
@@ -1992,7 +2006,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
break;
}
- trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
+ trace_discard_buckets_fast(c, &s, bch2_err_str(ret));
bch2_trans_put(trans);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index c2e8482fbbe6..a602507fef19 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -320,6 +320,7 @@ static inline int bch2_check_discard_freespace_key_async(struct btree_trans *tra
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_dev_do_discards(struct bch_dev *);
+void bch2_do_discards_going_ro(struct bch_fs *);
void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 3d125ee81663..97b627ed3b22 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1529,6 +1529,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
printbuf_tabstop_push(out, 24);
prt_printf(out, "capacity\t%llu\n", c->capacity);
+ prt_printf(out, "used\t%llu\n", bch2_fs_usage_read_short(c).used);
prt_printf(out, "reserved\t%llu\n", c->reserved);
prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden));
prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree));
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index e7becdf22cba..ee52b66dc5d7 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -118,4 +118,14 @@ struct write_point_specifier {
unsigned long v;
};
+struct discard_buckets_state {
+ u64 seen;
+ u64 open;
+ u64 need_journal_commit;
+ u64 commit_in_flight;
+ u64 bad_data_type;
+ u64 already_discarding;
+ u64 discarded;
+};
+
#endif /* _BCACHEFS_ALLOC_TYPES_H */
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 553031a3b06a..83d6ab9c1a91 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -458,7 +458,6 @@ BCH_DEBUG_PARAMS_ALL()
x(btree_node_compact) \
x(btree_node_merge) \
x(btree_node_sort) \
- x(btree_node_get) \
x(btree_node_read) \
x(btree_node_read_done) \
x(btree_node_write) \
@@ -466,10 +465,6 @@ BCH_DEBUG_PARAMS_ALL()
x(btree_interior_update_total) \
x(btree_gc) \
x(data_write) \
- x(data_write_to_submit) \
- x(data_write_to_queue) \
- x(data_write_to_btree_update) \
- x(data_write_btree_update) \
x(data_read) \
x(data_promote) \
x(journal_flush_write) \
@@ -483,6 +478,7 @@ BCH_DEBUG_PARAMS_ALL()
x(blocked_allocate) \
x(blocked_allocate_open_bucket) \
x(blocked_write_buffer_full) \
+ x(blocked_writeback_throttle) \
x(nocow_lock_contended)
enum bch_time_stats {
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0839397105a9..269a373f3e80 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -965,7 +965,8 @@ enum bch_sb_feature {
x(alloc_info, 0) \
x(alloc_metadata, 1) \
x(extents_above_btree_updates_done, 2) \
- x(bformat_overflow_done, 3)
+ x(bformat_overflow_done, 3) \
+ x(no_stale_ptrs, 4)
enum bch_sb_compat {
#define x(f, n) BCH_COMPAT_##f,
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 75d73677c4d8..da1a1a21586e 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -344,15 +344,6 @@ void bch2_bkey_swab_val(struct bkey_s k)
ops->swab(k);
}
-bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
-{
- const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
-
- return ops->key_normalize
- ? ops->key_normalize(c, k)
- : false;
-}
-
bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
{
const struct bkey_ops *ops = bch2_bkey_type_ops(l.k->type);
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index bf34111cdf00..5adce4e9294b 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -26,7 +26,6 @@ struct bkey_ops {
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void (*swab)(struct bkey_s);
- bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int (*trigger)(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
@@ -66,8 +65,6 @@ void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
void bch2_bkey_swab_val(struct bkey_s);
-bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-
static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
{
return l->type == r->type &&
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 2338feb8d8ed..f45aa34d22de 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1140,43 +1140,11 @@ static int gc_btree_gens_key(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
return -EROFS;
- bool too_stale = false;
- scoped_guard(rcu) {
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
-
- too_stale |= dev_ptr_stale(ca, ptr) > 16;
- }
-
- if (!too_stale)
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
-
- u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
- if (gen_after(*gen, ptr->gen))
- *gen = ptr->gen;
- }
- }
-
- if (too_stale) {
- struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k, 0);
- int ret = PTR_ERR_OR_ZERO(u);
- if (ret)
- return ret;
-
- bch2_extent_normalize(c, bkey_i_to_s(u));
- }
-
- return 0;
+ return bch2_bkey_drop_stale_ptrs(trans, iter, k);
}
static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev *ca,
@@ -1281,6 +1249,12 @@ int bch2_gc_gens(struct bch_fs *c)
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
trace_and_count(c, gc_gens_end, c);
+
+ if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs))) {
+ guard(mutex)(&c->sb_lock);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ bch2_write_super(c);
+ }
err:
for_each_member_device(c, ca) {
kvfree(ca->oldest_gen);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index a8cd7a5a6e7d..ce86d158aa8e 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -702,8 +702,10 @@ static void btree_update_nodes_written(struct btree_update *as)
if (ret)
goto err;
- if (!btree_update_new_nodes_marked_sb(as))
+ if (!btree_update_new_nodes_marked_sb(as)) {
+ bch2_trans_unlock_long(trans);
btree_update_new_nodes_mark_sb(as);
+ }
/*
* Wait for any in flight writes to finish before we free the old nodes
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 021f5cb7998d..00b95841b243 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -462,6 +462,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
CLASS(printbuf, buf)();
bool inserting = sectors > 0;
+ int ret = 0;
BUG_ON(!sectors);
@@ -489,8 +490,17 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
BCH_FSCK_ERR_ptr_too_stale);
}
- if (b_gen != ptr->gen && ptr->cached)
+ if (b_gen != ptr->gen && ptr->cached) {
+ if (fsck_err_on(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs),
+ trans, stale_ptr_with_no_stale_ptrs_feature,
+ "stale cached ptr, but have no_stale_ptrs feature\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ guard(mutex)(&c->sb_lock);
+ c->disk_sb.sb->compat[0] &= ~cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ bch2_write_super(c);
+ }
return 1;
+ }
if (unlikely(b_gen != ptr->gen)) {
bch2_log_msg_start(c, &buf);
@@ -530,7 +540,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
}
*bucket_sectors += sectors;
- return 0;
+fsck_err:
+ return ret;
}
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 7a0da6cdf78c..ca925c5d1a48 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -393,7 +393,7 @@ restart_drop_extra_replicas:
bch2_extent_ptr_decoded_append(insert, &p);
bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
- bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert));
+ bch2_bkey_drop_extra_cached_ptrs(c, &m->op.opts, bkey_i_to_s(insert));
ret = bch2_sum_sector_overwrites(trans, &iter, insert,
&should_check_enospc,
@@ -721,7 +721,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
* will do the appropriate thing with it (turning it into a
* KEY_TYPE_error key, or just a discard if it was a cached extent)
*/
- bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n));
+ bch2_bkey_drop_extra_cached_ptrs(c, io_opts, bkey_i_to_s(n));
/*
* Since we're not inserting through an extent iterator
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index adc1f9315eab..420f6922dacb 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -345,6 +345,7 @@
x(BCH_ERR_data_read, data_read_no_encryption_key) \
x(BCH_ERR_data_read, data_read_buffer_too_small) \
x(BCH_ERR_data_read, data_read_key_overwritten) \
+ x(0, rbio_narrow_crcs_fail) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index 73eb28090bc7..1279026b4c1e 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -146,6 +146,7 @@ int bch2_extent_trim_atomic(struct btree_trans *trans,
if (bpos_ge(bkey_start_pos(k.k), end))
break;
+ nr_iters += 1;
ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters);
if (ret)
break;
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 86aa93ea2345..43367d4e671a 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -12,6 +12,7 @@
#include "btree_gc.h"
#include "btree_io.h"
#include "btree_iter.h"
+#include "btree_update.h"
#include "buckets.h"
#include "checksum.h"
#include "compress.h"
@@ -1213,6 +1214,21 @@ drop:
bch2_bkey_drop_ptr_noerror(k, ptr);
}
+static bool bch2_bkey_has_stale_ptrs(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_dev *ca;
+
+ guard(rcu)();
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->cached &&
+ (ca = bch2_dev_rcu_noerror(c, ptr->dev)) &&
+ dev_ptr_stale_rcu(ca, ptr) > 0)
+ return true;
+
+ return false;
+}
+
/*
* bch2_extent_normalize - clean up an extent, dropping stale pointers etc.
*
@@ -1221,7 +1237,7 @@ drop:
* For existing keys, only called when btree nodes are being rewritten, not when
* they're merely being compacted/resorted in memory.
*/
-bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
+static void __bch2_bkey_drop_stale_ptrs(struct bch_fs *c, struct bkey_s k)
{
struct bch_dev *ca;
@@ -1230,19 +1246,26 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
ptr->cached &&
(!(ca = bch2_dev_rcu_noerror(c, ptr->dev)) ||
dev_ptr_stale_rcu(ca, ptr) > 0));
+}
+
+int bch2_bkey_drop_stale_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
+{
+ if (!bch2_bkey_has_stale_ptrs(trans->c, k)) {
+ struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k,
+ BTREE_UPDATE_internal_snapshot_node);
+ int ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
+
+ __bch2_bkey_drop_stale_ptrs(trans->c, bkey_i_to_s(u));
+ }
- return bkey_deleted(k.k);
+ return 0;
}
-/*
- * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc.
- *
- * Like bch2_extent_normalize(), but also only keeps a single cached pointer on
- * the promote target.
- */
-bool bch2_extent_normalize_by_opts(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_s k)
+void bch2_bkey_drop_extra_cached_ptrs(struct bch_fs *c,
+ struct bch_inode_opts *opts,
+ struct bkey_s k)
{
struct bkey_ptrs ptrs;
bool have_cached_ptr;
@@ -1260,8 +1283,6 @@ restart_drop_ptrs:
}
have_cached_ptr = true;
}
-
- return bkey_deleted(k.k);
}
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 03ea7c689d9a..1ea9752bfe95 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -440,7 +440,6 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.key_validate = bch2_bkey_ptrs_validate, \
.val_to_text = bch2_bkey_ptrs_to_text, \
.swab = bch2_ptr_swab, \
- .key_normalize = bch2_extent_normalize, \
.key_merge = bch2_extent_merge, \
.trigger = bch2_trigger_extent, \
})
@@ -689,8 +688,8 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_inode_opts *,
struct bkey_s, struct bch_extent_ptr *);
-bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_inode_opts *, struct bkey_s);
-bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+int bch2_bkey_drop_stale_ptrs(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
+void bch2_bkey_drop_extra_cached_ptrs(struct bch_fs *, struct bch_inode_opts *, struct bkey_s);
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index aab30571b056..fe684adca370 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -532,6 +532,39 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
op->wbio.bio.bi_opf = wbc_to_write_flags(wbc);
}
+static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closure *cl)
+{
+ unsigned reserved = OPEN_BUCKETS_COUNT -
+ (OPEN_BUCKETS_COUNT - bch2_open_buckets_reserved(BCH_WATERMARK_normal)) / 2;
+
+ if (unlikely(c->open_buckets_nr_free <= reserved)) {
+ closure_wait(&c->open_buckets_wait, cl);
+ return false;
+ }
+
+ if (BCH_WATERMARK_normal < c->journal.watermark && !bch2_journal_error(&c->journal)) {
+ closure_wait(&c->journal.async_wait, cl);
+ return false;
+ }
+
+ return true;
+}
+
+static void throttle_writes(struct bch_fs *c, unsigned replicas_want, struct closure *cl)
+{
+ u64 start = 0;
+ while (!can_write_now(c, replicas_want, cl)) {
+ if (!start)
+ start = local_clock();
+ closure_sync(cl);
+ }
+
+ BUG_ON(closure_nr_remaining(cl) > 1);
+
+ if (start)
+ bch2_time_stats_update(&c->times[BCH_TIME_blocked_writeback_throttle], start);
+}
+
static int __bch2_writepage(struct folio *folio,
struct writeback_control *wbc,
void *data)
@@ -667,17 +700,6 @@ do_io:
return 0;
}
-static int bch2_write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc, void *data)
-{
- struct folio *folio = NULL;
- int error;
-
- while ((folio = writeback_iter(mapping, wbc, folio, &error)))
- error = __bch2_writepage(folio, wbc, data);
- return error;
-}
-
int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct bch_fs *c = mapping->host->i_sb->s_fs_info;
@@ -686,7 +708,17 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
bch2_inode_opts_get_inode(c, &to_bch_ei(mapping->host)->ei_inode, &w->opts);
blk_start_plug(&w->plug);
- int ret = bch2_write_cache_pages(mapping, wbc, w);
+
+ struct closure cl;
+ closure_init_stack(&cl);
+
+ struct folio *folio = NULL;
+ int ret = 0;
+
+ while (throttle_writes(c, w->opts.data_replicas, &cl),
+ (folio = writeback_iter(mapping, wbc, folio, &ret)))
+ ret = __bch2_writepage(folio, wbc, w);
+
if (w->io)
bch2_writepage_do_io(w);
blk_finish_plug(&w->plug);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index c7bb5b108e2f..d6a2031e17e8 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -2147,9 +2147,11 @@ static void bch2_evict_inode(struct inode *vinode)
KEY_TYPE_QUOTA_WARN);
int ret = bch2_inode_rm(c, inode_inum(inode));
if (ret && !bch2_err_matches(ret, EROFS)) {
- bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu",
- inode->ei_inum.subvol,
- inode->ei_inum.inum);
+ CLASS(printbuf, buf)();
+ bch2_trans_do(c, bch2_inum_to_path(trans, inode->ei_inum, &buf));
+
+ bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu\n%s",
+ inode->ei_inum.subvol, inode->ei_inum.inum, buf.buf);
bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm);
}
@@ -2236,11 +2238,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
struct bch_fs *c = sb->s_fs_info;
struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
unsigned shift = sb->s_blocksize_bits - 9;
+
/*
- * this assumes inodes take up 64 bytes, which is a decent average
+ * This assumes inodes take up 64 bytes, which is a decent average
* number:
+ *
+ * Not anymore - bi_dir, bi_dir_offset came later and shouldn't have
+ * been varint fields: seeing 144-160 byte inodes, so let's call it 256
+ * bytes:
*/
- u64 avail_inodes = ((usage.capacity - usage.used) << 3);
+ u64 avail_inodes = ((usage.capacity - usage.used) << 1);
buf->f_type = BCACHEFS_STATFS_MAGIC;
buf->f_bsize = sb->s_blocksize;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index ccc44b1fc178..3bde5c07b528 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1963,7 +1963,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
}
}
- ret = check_extent_overbig(trans, iter, k);
+ ret = check_extent_overbig(trans, iter, k) ?:
+ bch2_bkey_drop_stale_ptrs(trans, iter, k);
if (ret)
goto err;
@@ -2040,7 +2041,8 @@ int bch2_check_indirect_extents(struct bch_fs *c)
BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter);
bch2_disk_reservation_put(c, &res);
- check_extent_overbig(trans, &iter, k);
+ check_extent_overbig(trans, &iter, k) ?:
+ bch2_bkey_drop_stale_ptrs(trans, &iter, k);
}));
bch2_disk_reservation_put(c, &res);
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 655ed90b2a39..543627fb58be 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1359,7 +1359,7 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- return ret ?: bch_err_throw(c, transaction_restart_nested);
+ return ret;
}
/*
@@ -1398,7 +1398,8 @@ next_parent:
int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
{
return __bch2_inode_rm_snapshot(trans, inum, snapshot) ?:
- delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot));
+ delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)) ?:
+ bch_err_throw(trans->c, transaction_restart_nested);
}
static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 7066be2701c0..e7ba0d0bf5ef 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -740,15 +740,13 @@ static void bch2_rbio_error(struct bch_read_bio *rbio,
}
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
- struct bch_read_bio *rbio)
+ struct bch_read_bio *rbio,
+ struct bch_extent_crc_unpacked *new_crc)
{
struct bch_fs *c = rbio->c;
u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
int ret = 0;
- if (crc_is_compressed(rbio->pick.crc))
- return 0;
-
CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
if ((ret = bkey_err(k)))
@@ -756,21 +754,12 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
if (bversion_cmp(k.k->bversion, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
- return 0;
+ return bch_err_throw(c, rbio_narrow_crcs_fail);
- /* Extent was merged? */
- if (bkey_start_offset(k.k) < data_offset ||
- k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
- return 0;
-
- struct bch_extent_crc_unpacked new_crc;
- if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
- rbio->pick.crc, NULL, &new_crc,
- bkey_start_offset(k.k) - data_offset, k.k->size,
- rbio->pick.crc.csum_type)) {
- bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
- return 0;
- }
+ /* Extent was trimmed/merged? */
+ if (!bpos_eq(bkey_start_pos(k.k), rbio->data_pos) ||
+ k.k->p.offset != rbio->data_pos.offset + rbio->pick.crc.live_size)
+ return bch_err_throw(c, rbio_narrow_crcs_fail);
/*
* going to be temporarily appending another checksum entry:
@@ -782,17 +771,37 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
bkey_reassemble(new, k);
- if (!bch2_bkey_narrow_crcs(new, new_crc))
- return 0;
+ if (!bch2_bkey_narrow_crcs(new, *new_crc))
+ return bch_err_throw(c, rbio_narrow_crcs_fail);
return bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node);
}
static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
- CLASS(btree_trans, trans)(rbio->c);
- commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- __bch2_rbio_narrow_crcs(trans, rbio));
+ struct bch_fs *c = rbio->c;
+
+ if (crc_is_compressed(rbio->pick.crc))
+ return;
+
+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
+
+ struct bch_extent_crc_unpacked new_crc;
+ if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
+ rbio->pick.crc, NULL, &new_crc,
+ rbio->data_pos.offset - data_offset, rbio->pick.crc.live_size,
+ rbio->pick.crc.csum_type)) {
+ bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
+ return;
+ }
+
+ CLASS(btree_trans, trans)(c);
+ int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_rbio_narrow_crcs(trans, rbio, &new_crc));
+ if (!ret)
+ count_event(c, io_read_narrow_crcs);
+ else if (ret == -BCH_ERR_rbio_narrow_crcs_fail)
+ count_event(c, io_read_narrow_crcs_fail);
}
static void bch2_read_decompress_err(struct work_struct *work)
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 8a3981e1016e..519ef16669e4 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -84,13 +84,6 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
return ret;
/*
- * If the new extent no longer has any pointers, bch2_extent_normalize()
- * will do the appropriate thing with it (turning it into a
- * KEY_TYPE_error key, or just a discard if it was a cached extent)
- */
- bch2_extent_normalize(c, bkey_i_to_s(n));
-
- /*
* Since we're not inserting through an extent iterator
* (BTREE_ITER_all_snapshots iterators aren't extent iterators),
* we aren't using the extent overwrite path to delete, we're
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 531c2ef128ae..6942d3cfcba3 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -920,6 +920,13 @@ use_clean:
if (bch2_blacklist_entries_gc(c))
write_sb = true;
+ if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) &&
+ (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_extents)) &&
+ (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_indirect_extents))) {
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ write_sb = true;
+ }
+
if (write_sb)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@@ -982,8 +989,9 @@ int bch2_fs_initialize(struct bch_fs *c)
set_bit(BCH_FS_new_fs, &c->flags);
scoped_guard(mutex, &c->sb_lock) {
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_extents_above_btree_updates_done));
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_bformat_overflow_done));
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
bch2_check_version_downgrade(c);
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index 17cd617664d9..3907ba7edff2 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -23,6 +23,8 @@ enum counters_flags {
x(io_read_reuse_race, 34, TYPE_COUNTER) \
x(io_read_retry, 32, TYPE_COUNTER) \
x(io_read_fail_and_poison, 95, TYPE_COUNTER) \
+ x(io_read_narrow_crcs, 97, TYPE_COUNTER) \
+ x(io_read_narrow_crcs_fail, 98, TYPE_COUNTER) \
x(io_write, 1, TYPE_SECTORS) \
x(io_move, 2, TYPE_SECTORS) \
x(io_move_read, 35, TYPE_SECTORS) \
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 7c6f18a1ee2a..77e3fc92e39b 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -160,7 +160,7 @@ enum bch_fsck_flags {
x(extent_ptrs_unwritten, 140, 0) \
x(extent_ptrs_written_and_unwritten, 141, 0) \
x(ptr_to_invalid_device, 142, 0) \
- x(ptr_to_removed_device, 322, 0) \
+ x(ptr_to_removed_device, 322, FSCK_AUTOFIX) \
x(ptr_to_duplicate_device, 143, 0) \
x(ptr_after_last_bucket, 144, 0) \
x(ptr_before_first_bucket, 145, 0) \
@@ -170,9 +170,10 @@ enum bch_fsck_flags {
x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \
x(ptr_to_missing_stripe, 150, 0) \
x(ptr_to_incorrect_stripe, 151, 0) \
- x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
+ x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
x(ptr_too_stale, 153, 0) \
x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \
+ x(stale_ptr_with_no_stale_ptrs_feature, 327, FSCK_AUTOFIX) \
x(ptr_bucket_data_type_mismatch, 155, 0) \
x(ptr_cached_and_erasure_coded, 156, 0) \
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
@@ -338,7 +339,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 327, 0)
+ x(MAX, 328, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index de1e8912975c..c442d7507f83 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -322,6 +322,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
do {
clean_passes++;
+ bch2_do_discards_going_ro(c);
+
if (bch2_btree_interior_updates_flush(c) ||
bch2_btree_write_buffer_flush_going_ro(c) ||
bch2_journal_flush_all_pins(&c->journal) ||
@@ -833,8 +835,6 @@ int bch2_fs_init_rw(struct bch_fs *c)
if (test_bit(BCH_FS_rw_init_done, &c->flags))
return 0;
- bch_verbose(c, "doing rw allocations");
-
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
@@ -1211,12 +1211,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch2_opts_apply(&c->opts, *opts);
+#ifdef __KERNEL__
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
c->opts.block_size > PAGE_SIZE) {
bch_err(c, "cannot mount bs > ps filesystem without CONFIG_TRANSPARENT_HUGEPAGE");
ret = -EINVAL;
goto err;
}
+#endif
c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
if (c->opts.inodes_use_key_cache)
@@ -1991,7 +1993,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
struct printbuf *err)
{
unsigned dev_idx = ca->dev_idx, data;
- bool fast_device_removal = !bch2_request_incompat_feature(c,
+ bool fast_device_removal = (c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) &&
+ !bch2_request_incompat_feature(c,
bcachefs_metadata_version_fast_device_removal);
int ret;
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 269cdf1a87a4..6c312fd9a447 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -720,47 +720,55 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail,
);
DECLARE_EVENT_CLASS(discard_buckets_class,
- TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
- u64 need_journal_commit, u64 discarded, const char *err),
- TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
+ TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err),
+ TP_ARGS(c, s, err),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, seen )
__field(u64, open )
__field(u64, need_journal_commit )
+ __field(u64, commit_in_flight )
+ __field(u64, bad_data_type )
+ __field(u64, already_discarding )
__field(u64, discarded )
__array(char, err, 16 )
),
TP_fast_assign(
__entry->dev = c->dev;
- __entry->seen = seen;
- __entry->open = open;
- __entry->need_journal_commit = need_journal_commit;
- __entry->discarded = discarded;
+ __entry->seen = s->seen;
+ __entry->open = s->open;
+ __entry->need_journal_commit = s->need_journal_commit;
+ __entry->commit_in_flight = s->commit_in_flight;
+ __entry->bad_data_type = s->bad_data_type;
+ __entry->already_discarding = s->already_discarding;
+ __entry->discarded = s->discarded;
strscpy(__entry->err, err, sizeof(__entry->err));
),
- TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s",
+ TP_printk("%d%d seen %llu open %llu\n"
+ "need_commit %llu committing %llu bad_data_type %llu\n"
+ "already_discarding %llu discarded %llu err %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->seen,
__entry->open,
__entry->need_journal_commit,
+ __entry->commit_in_flight,
+ __entry->bad_data_type,
+ __entry->already_discarding,
__entry->discarded,
__entry->err)
);
DEFINE_EVENT(discard_buckets_class, discard_buckets,
- TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
- u64 need_journal_commit, u64 discarded, const char *err),
- TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+ TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err),
+ TP_ARGS(c, s, err)
);
DEFINE_EVENT(discard_buckets_class, discard_buckets_fast,
- TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
- u64 need_journal_commit, u64 discarded, const char *err),
- TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+ TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err),
+ TP_ARGS(c, s, err)
);
TRACE_EVENT(bucket_invalidate,