summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/alloc_background.c44
-rw-r--r--fs/bcachefs/alloc_background.h1
-rw-r--r--fs/bcachefs/alloc_foreground.c1
-rw-r--r--fs/bcachefs/alloc_types.h10
-rw-r--r--fs/bcachefs/backpointers.c14
-rw-r--r--fs/bcachefs/bcachefs.h5
-rw-r--r--fs/bcachefs/bcachefs_format.h8
-rw-r--r--fs/bcachefs/bkey_methods.c9
-rw-r--r--fs/bcachefs/bkey_methods.h3
-rw-r--r--fs/bcachefs/btree_gc.c42
-rw-r--r--fs/bcachefs/btree_locking.c8
-rw-r--r--fs/bcachefs/btree_update_interior.c4
-rw-r--r--fs/bcachefs/buckets.c31
-rw-r--r--fs/bcachefs/data_update.c4
-rw-r--r--fs/bcachefs/disk_accounting_format.h10
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/extent_update.c1
-rw-r--r--fs/bcachefs/extents.c47
-rw-r--r--fs/bcachefs/extents.h5
-rw-r--r--fs/bcachefs/fs-io-buffered.c2
-rw-r--r--fs/bcachefs/fs.c17
-rw-r--r--fs/bcachefs/fsck.c6
-rw-r--r--fs/bcachefs/inode.c5
-rw-r--r--fs/bcachefs/io_read.c55
-rw-r--r--fs/bcachefs/migrate.c20
-rw-r--r--fs/bcachefs/progress.c39
-rw-r--r--fs/bcachefs/progress.h12
-rw-r--r--fs/bcachefs/recovery.c12
-rw-r--r--fs/bcachefs/sb-counters_format.h2
-rw-r--r--fs/bcachefs/sb-downgrade.c11
-rw-r--r--fs/bcachefs/sb-errors_format.h7
-rw-r--r--fs/bcachefs/super.c65
-rw-r--r--fs/bcachefs/trace.h36
-rw-r--r--fs/bcachefs/util.c8
-rw-r--r--include/linux/closure.h33
-rw-r--r--lib/closure.c190
36 files changed, 454 insertions, 314 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index cab4d6798dd7..21cdc42eff46 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1771,13 +1771,6 @@ static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
darray_remove_item(&ca->discard_buckets_in_flight, i);
}
-struct discard_buckets_state {
- u64 seen;
- u64 open;
- u64 need_journal_commit;
- u64 discarded;
-};
-
static int bch2_discard_one_bucket(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *need_discard_iter,
@@ -1790,6 +1783,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
bool discard_locked = false;
int ret = 0;
+ s->seen++;
+
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
s->open++;
return 0;
@@ -1800,6 +1795,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
if (seq_ready > c->journal.flushed_seq_ondisk) {
if (seq_ready > c->journal.flushing_seq)
s->need_journal_commit++;
+ else
+ s->commit_in_flight++;
return 0;
}
@@ -1815,6 +1812,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
return ret;
if (a->v.data_type != BCH_DATA_need_discard) {
+ s->bad_data_type++;
+
if (need_discard_or_freespace_err(trans, k, true, true, true)) {
ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false);
if (ret)
@@ -1826,8 +1825,10 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
}
if (!fastpath) {
- if (discard_in_flight_add(ca, iter.pos.offset, true))
+ if (discard_in_flight_add(ca, iter.pos.offset, true)) {
+ s->already_discarding++;
goto out;
+ }
discard_locked = true;
}
@@ -1861,6 +1862,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
commit:
ret = bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto out;
@@ -1873,14 +1875,11 @@ out:
fsck_err:
if (discard_locked)
discard_in_flight_remove(ca, iter.pos.offset);
- if (!ret)
- s->seen++;
return ret;
}
-static void bch2_do_discards_work(struct work_struct *work)
+static void __bch2_dev_do_discards(struct bch_dev *ca)
{
- struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
struct bch_fs *c = ca->fs;
struct discard_buckets_state s = {};
struct bpos discard_pos_done = POS_MAX;
@@ -1901,10 +1900,25 @@ static void bch2_do_discards_work(struct work_struct *work)
if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
bch2_journal_flush_async(&c->journal, NULL);
- trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
- bch2_err_str(ret));
+ trace_discard_buckets(c, &s, bch2_err_str(ret));
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
+}
+
+void bch2_do_discards_going_ro(struct bch_fs *c)
+{
+ for_each_member_device(c, ca)
+ if (bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_discards))
+ __bch2_dev_do_discards(ca);
+}
+
+static void bch2_do_discards_work(struct work_struct *work)
+{
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
+ struct bch_fs *c = ca->fs;
+
+ __bch2_dev_do_discards(ca);
+
enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
@@ -1992,7 +2006,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
break;
}
- trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
+ trace_discard_buckets_fast(c, &s, bch2_err_str(ret));
bch2_trans_put(trans);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index c2e8482fbbe6..a602507fef19 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -320,6 +320,7 @@ static inline int bch2_check_discard_freespace_key_async(struct btree_trans *tra
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_dev_do_discards(struct bch_dev *);
+void bch2_do_discards_going_ro(struct bch_fs *);
void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 3d125ee81663..97b627ed3b22 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1529,6 +1529,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
printbuf_tabstop_push(out, 24);
prt_printf(out, "capacity\t%llu\n", c->capacity);
+ prt_printf(out, "used\t%llu\n", bch2_fs_usage_read_short(c).used);
prt_printf(out, "reserved\t%llu\n", c->reserved);
prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden));
prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree));
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index e7becdf22cba..ee52b66dc5d7 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -118,4 +118,14 @@ struct write_point_specifier {
unsigned long v;
};
+struct discard_buckets_state {
+ u64 seen;
+ u64 open;
+ u64 need_journal_commit;
+ u64 commit_in_flight;
+ u64 bad_data_type;
+ u64 already_discarding;
+ u64 discarded;
+};
+
#endif /* _BCACHEFS_ALLOC_TYPES_H */
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index c662eeba66ab..3193dbcfc3c6 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -432,6 +432,10 @@ fsck_err:
/* verify that every backpointer has a corresponding alloc key */
int bch2_check_btree_backpointers(struct bch_fs *c)
{
+ struct progress_indicator_state progress;
+
+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_backpointers));
+
struct bkey_buf last_flushed;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
@@ -439,8 +443,10 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
CLASS(btree_trans, trans)(c);
int ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_backpointers, POS_MIN, 0, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed));
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ progress_update_iter(trans, &progress, &iter);
+ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed);
+ }));
bch2_bkey_buf_exit(&last_flushed, c);
return ret;
@@ -815,7 +821,9 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct progress_indicator_state progress;
int ret = 0;
- bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
+ bch2_progress_init_inner(&progress, trans->c,
+ btree_has_data_ptrs_mask,
+ ~0ULL);
for (enum btree_id btree_id = 0;
btree_id < btree_id_nr_alive(c);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index c62d2e4ab50b..83d6ab9c1a91 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -458,7 +458,6 @@ BCH_DEBUG_PARAMS_ALL()
x(btree_node_compact) \
x(btree_node_merge) \
x(btree_node_sort) \
- x(btree_node_get) \
x(btree_node_read) \
x(btree_node_read_done) \
x(btree_node_write) \
@@ -466,10 +465,6 @@ BCH_DEBUG_PARAMS_ALL()
x(btree_interior_update_total) \
x(btree_gc) \
x(data_write) \
- x(data_write_to_submit) \
- x(data_write_to_queue) \
- x(data_write_to_btree_update) \
- x(data_write_btree_update) \
x(data_read) \
x(data_promote) \
x(journal_flush_write) \
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0839397105a9..d29bd684b137 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -706,7 +706,8 @@ struct bch_sb_field_ext {
x(fast_device_removal, BCH_VERSION(1, 27)) \
x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \
x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \
- x(31bit_dirent_offset, BCH_VERSION(1, 30))
+ x(31bit_dirent_offset, BCH_VERSION(1, 30)) \
+ x(btree_node_accounting, BCH_VERSION(1, 31))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -717,7 +718,7 @@ enum bcachefs_metadata_version {
};
static const __maybe_unused
-unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_btree_node_accounting;
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@@ -965,7 +966,8 @@ enum bch_sb_feature {
x(alloc_info, 0) \
x(alloc_metadata, 1) \
x(extents_above_btree_updates_done, 2) \
- x(bformat_overflow_done, 3)
+ x(bformat_overflow_done, 3) \
+ x(no_stale_ptrs, 4)
enum bch_sb_compat {
#define x(f, n) BCH_COMPAT_##f,
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 75d73677c4d8..da1a1a21586e 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -344,15 +344,6 @@ void bch2_bkey_swab_val(struct bkey_s k)
ops->swab(k);
}
-bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
-{
- const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
-
- return ops->key_normalize
- ? ops->key_normalize(c, k)
- : false;
-}
-
bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
{
const struct bkey_ops *ops = bch2_bkey_type_ops(l.k->type);
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index bf34111cdf00..5adce4e9294b 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -26,7 +26,6 @@ struct bkey_ops {
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void (*swab)(struct bkey_s);
- bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int (*trigger)(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
@@ -66,8 +65,6 @@ void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
void bch2_bkey_swab_val(struct bkey_s);
-bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-
static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
{
return l->type == r->type &&
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 2338feb8d8ed..63dc0836bf08 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -780,7 +780,7 @@ static int bch2_gc_btrees(struct bch_fs *c)
int ret = 0;
struct progress_indicator_state progress;
- bch2_progress_init(&progress, c, ~0ULL);
+ bch2_progress_init_inner(&progress, c, ~0ULL, ~0ULL);
enum btree_id ids[BTREE_ID_NR];
for (unsigned i = 0; i < BTREE_ID_NR; i++)
@@ -1140,43 +1140,11 @@ static int gc_btree_gens_key(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
return -EROFS;
- bool too_stale = false;
- scoped_guard(rcu) {
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
-
- too_stale |= dev_ptr_stale(ca, ptr) > 16;
- }
-
- if (!too_stale)
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
- if (!ca)
- continue;
-
- u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
- if (gen_after(*gen, ptr->gen))
- *gen = ptr->gen;
- }
- }
-
- if (too_stale) {
- struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k, 0);
- int ret = PTR_ERR_OR_ZERO(u);
- if (ret)
- return ret;
-
- bch2_extent_normalize(c, bkey_i_to_s(u));
- }
-
- return 0;
+ return bch2_bkey_drop_stale_ptrs(trans, iter, k);
}
static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev *ca,
@@ -1281,6 +1249,12 @@ int bch2_gc_gens(struct bch_fs *c)
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
trace_and_count(c, gc_gens_end, c);
+
+ if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs))) {
+ guard(mutex)(&c->sb_lock);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ bch2_write_super(c);
+ }
err:
for_each_member_device(c, ca) {
kvfree(ca->oldest_gen);
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index a4f8aac448c0..52ed0a46dff0 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -69,6 +69,7 @@ struct trans_waiting_for_lock {
struct lock_graph {
struct trans_waiting_for_lock g[8];
unsigned nr;
+ bool printed_chain;
};
static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
@@ -89,6 +90,10 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
{
+ if (g->printed_chain || g->nr <= 1)
+ return;
+ g->printed_chain = true;
+
struct trans_waiting_for_lock *i;
for (i = g->g; i != g->g + g->nr; i++) {
@@ -124,6 +129,7 @@ static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
.node_want = trans->locking,
.lock_want = trans->locking_wait.lock_want,
};
+ g->printed_chain = false;
}
static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
@@ -398,7 +404,7 @@ next:
}
}
up:
- if (g.nr > 1 && cycle)
+ if (cycle)
print_chain(cycle, &g);
lock_graph_up(&g);
goto next;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index a8cd7a5a6e7d..ce86d158aa8e 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -702,8 +702,10 @@ static void btree_update_nodes_written(struct btree_update *as)
if (ret)
goto err;
- if (!btree_update_new_nodes_marked_sb(as))
+ if (!btree_update_new_nodes_marked_sb(as)) {
+ bch2_trans_unlock_long(trans);
btree_update_new_nodes_mark_sb(as);
+ }
/*
* Wait for any in flight writes to finish before we free the old nodes
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 021f5cb7998d..1b999b8b0921 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -462,6 +462,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
CLASS(printbuf, buf)();
bool inserting = sectors > 0;
+ int ret = 0;
BUG_ON(!sectors);
@@ -489,8 +490,17 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
BCH_FSCK_ERR_ptr_too_stale);
}
- if (b_gen != ptr->gen && ptr->cached)
+ if (b_gen != ptr->gen && ptr->cached) {
+ if (fsck_err_on(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs),
+ trans, stale_ptr_with_no_stale_ptrs_feature,
+ "stale cached ptr, but have no_stale_ptrs feature\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ guard(mutex)(&c->sb_lock);
+ c->disk_sb.sb->compat[0] &= ~cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ bch2_write_super(c);
+ }
return 1;
+ }
if (unlikely(b_gen != ptr->gen)) {
bch2_log_msg_start(c, &buf);
@@ -530,7 +540,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
}
*bucket_sectors += sectors;
- return 0;
+fsck_err:
+ return ret;
}
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
@@ -749,6 +760,7 @@ static int __trigger_extent(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
bool gc = flags & BTREE_TRIGGER_gc;
+ bool insert = !(flags & BTREE_TRIGGER_overwrite);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -802,7 +814,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (cur_compression_type &&
cur_compression_type != p.crc.compression_type) {
- if (flags & BTREE_TRIGGER_overwrite)
+ if (!insert)
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
@@ -835,7 +847,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (cur_compression_type) {
- if (flags & BTREE_TRIGGER_overwrite)
+ if (!insert)
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
@@ -845,12 +857,17 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (level) {
- ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, btree, btree_id);
+ const bool leaf_node = level == 1;
+ s64 v[3] = {
+ replicas_sectors,
+ insert ? 1 : -1,
+ !leaf_node ? (insert ? 1 : -1) : 0,
+ };
+
+ ret = bch2_disk_accounting_mod2(trans, gc, v, btree, btree_id);
if (ret)
return ret;
} else {
- bool insert = !(flags & BTREE_TRIGGER_overwrite);
-
s64 v[3] = {
insert ? 1 : -1,
insert ? k.k->size : -((s64) k.k->size),
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 7a0da6cdf78c..ca925c5d1a48 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -393,7 +393,7 @@ restart_drop_extra_replicas:
bch2_extent_ptr_decoded_append(insert, &p);
bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
- bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert));
+ bch2_bkey_drop_extra_cached_ptrs(c, &m->op.opts, bkey_i_to_s(insert));
ret = bch2_sum_sector_overwrites(trans, &iter, insert,
&should_check_enospc,
@@ -721,7 +721,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
* will do the appropriate thing with it (turning it into a
* KEY_TYPE_error key, or just a discard if it was a cached extent)
*/
- bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n));
+ bch2_bkey_drop_extra_cached_ptrs(c, io_opts, bkey_i_to_s(n));
/*
* Since we're not inserting through an extent iterator
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index 8269af1dbe2a..730a17ea4243 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -108,7 +108,7 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
x(dev_data_type, 3, 3) \
x(compression, 4, 3) \
x(snapshot, 5, 1) \
- x(btree, 6, 1) \
+ x(btree, 6, 3) \
x(rebalance_work, 7, 1) \
x(inum, 8, 3)
@@ -174,6 +174,14 @@ struct bch_acct_snapshot {
__u32 id;
} __packed;
+/*
+ * Metadata accounting per btree id:
+ * [
+ * total btree disk usage in sectors
+ * total number of btree nodes
+ * number of non-leaf btree nodes
+ * ]
+ */
struct bch_acct_btree {
__u32 id;
} __packed;
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index adc1f9315eab..420f6922dacb 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -345,6 +345,7 @@
x(BCH_ERR_data_read, data_read_no_encryption_key) \
x(BCH_ERR_data_read, data_read_buffer_too_small) \
x(BCH_ERR_data_read, data_read_key_overwritten) \
+ x(0, rbio_narrow_crcs_fail) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index 73eb28090bc7..1279026b4c1e 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -146,6 +146,7 @@ int bch2_extent_trim_atomic(struct btree_trans *trans,
if (bpos_ge(bkey_start_pos(k.k), end))
break;
+ nr_iters += 1;
ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters);
if (ret)
break;
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 86aa93ea2345..3274ba42c995 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -12,6 +12,7 @@
#include "btree_gc.h"
#include "btree_io.h"
#include "btree_iter.h"
+#include "btree_update.h"
#include "buckets.h"
#include "checksum.h"
#include "compress.h"
@@ -1213,6 +1214,21 @@ drop:
bch2_bkey_drop_ptr_noerror(k, ptr);
}
+static bool bch2_bkey_has_stale_ptrs(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_dev *ca;
+
+ guard(rcu)();
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->cached &&
+ (ca = bch2_dev_rcu_noerror(c, ptr->dev)) &&
+ dev_ptr_stale_rcu(ca, ptr) > 0)
+ return true;
+
+ return false;
+}
+
/*
* bch2_extent_normalize - clean up an extent, dropping stale pointers etc.
*
@@ -1221,7 +1237,7 @@ drop:
* For existing keys, only called when btree nodes are being rewritten, not when
* they're merely being compacted/resorted in memory.
*/
-bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
+static void __bch2_bkey_drop_stale_ptrs(struct bch_fs *c, struct bkey_s k)
{
struct bch_dev *ca;
@@ -1230,19 +1246,26 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
ptr->cached &&
(!(ca = bch2_dev_rcu_noerror(c, ptr->dev)) ||
dev_ptr_stale_rcu(ca, ptr) > 0));
+}
+
+int bch2_bkey_drop_stale_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
+{
+ if (bch2_bkey_has_stale_ptrs(trans->c, k)) {
+ struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k,
+ BTREE_UPDATE_internal_snapshot_node);
+ int ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
+
+ __bch2_bkey_drop_stale_ptrs(trans->c, bkey_i_to_s(u));
+ }
- return bkey_deleted(k.k);
+ return 0;
}
-/*
- * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc.
- *
- * Like bch2_extent_normalize(), but also only keeps a single cached pointer on
- * the promote target.
- */
-bool bch2_extent_normalize_by_opts(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_s k)
+void bch2_bkey_drop_extra_cached_ptrs(struct bch_fs *c,
+ struct bch_inode_opts *opts,
+ struct bkey_s k)
{
struct bkey_ptrs ptrs;
bool have_cached_ptr;
@@ -1260,8 +1283,6 @@ restart_drop_ptrs:
}
have_cached_ptr = true;
}
-
- return bkey_deleted(k.k);
}
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 03ea7c689d9a..1ea9752bfe95 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -440,7 +440,6 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.key_validate = bch2_bkey_ptrs_validate, \
.val_to_text = bch2_bkey_ptrs_to_text, \
.swab = bch2_ptr_swab, \
- .key_normalize = bch2_extent_normalize, \
.key_merge = bch2_extent_merge, \
.trigger = bch2_trigger_extent, \
})
@@ -689,8 +688,8 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_inode_opts *,
struct bkey_s, struct bch_extent_ptr *);
-bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_inode_opts *, struct bkey_s);
-bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+int bch2_bkey_drop_stale_ptrs(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
+void bch2_bkey_drop_extra_cached_ptrs(struct bch_fs *, struct bch_inode_opts *, struct bkey_s);
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index d9db9f1082d3..fe684adca370 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -542,7 +542,7 @@ static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closu
return false;
}
- if (BCH_WATERMARK_normal < c->journal.watermark) {
+ if (BCH_WATERMARK_normal < c->journal.watermark && !bch2_journal_error(&c->journal)) {
closure_wait(&c->journal.async_wait, cl);
return false;
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index c7bb5b108e2f..d6a2031e17e8 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -2147,9 +2147,11 @@ static void bch2_evict_inode(struct inode *vinode)
KEY_TYPE_QUOTA_WARN);
int ret = bch2_inode_rm(c, inode_inum(inode));
if (ret && !bch2_err_matches(ret, EROFS)) {
- bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu",
- inode->ei_inum.subvol,
- inode->ei_inum.inum);
+ CLASS(printbuf, buf)();
+ bch2_trans_do(c, bch2_inum_to_path(trans, inode->ei_inum, &buf));
+
+ bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu\n%s",
+ inode->ei_inum.subvol, inode->ei_inum.inum, buf.buf);
bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm);
}
@@ -2236,11 +2238,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
struct bch_fs *c = sb->s_fs_info;
struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
unsigned shift = sb->s_blocksize_bits - 9;
+
/*
- * this assumes inodes take up 64 bytes, which is a decent average
+ * This assumes inodes take up 64 bytes, which is a decent average
* number:
+ *
+ * Not anymore - bi_dir, bi_dir_offset came later and shouldn't have
+ * been varint fields: seeing 144-160 byte inodes, so let's call it 256
+ * bytes:
*/
- u64 avail_inodes = ((usage.capacity - usage.used) << 3);
+ u64 avail_inodes = ((usage.capacity - usage.used) << 1);
buf->f_type = BCACHEFS_STATFS_MAGIC;
buf->f_bsize = sb->s_blocksize;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index ccc44b1fc178..3bde5c07b528 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1963,7 +1963,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
}
}
- ret = check_extent_overbig(trans, iter, k);
+ ret = check_extent_overbig(trans, iter, k) ?:
+ bch2_bkey_drop_stale_ptrs(trans, iter, k);
if (ret)
goto err;
@@ -2040,7 +2041,8 @@ int bch2_check_indirect_extents(struct bch_fs *c)
BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter);
bch2_disk_reservation_put(c, &res);
- check_extent_overbig(trans, &iter, k);
+ check_extent_overbig(trans, &iter, k) ?:
+ bch2_bkey_drop_stale_ptrs(trans, &iter, k);
}));
bch2_disk_reservation_put(c, &res);
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 655ed90b2a39..543627fb58be 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1359,7 +1359,7 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- return ret ?: bch_err_throw(c, transaction_restart_nested);
+ return ret;
}
/*
@@ -1398,7 +1398,8 @@ next_parent:
int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
{
return __bch2_inode_rm_snapshot(trans, inum, snapshot) ?:
- delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot));
+ delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)) ?:
+ bch_err_throw(trans->c, transaction_restart_nested);
}
static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 7066be2701c0..e7ba0d0bf5ef 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -740,15 +740,13 @@ static void bch2_rbio_error(struct bch_read_bio *rbio,
}
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
- struct bch_read_bio *rbio)
+ struct bch_read_bio *rbio,
+ struct bch_extent_crc_unpacked *new_crc)
{
struct bch_fs *c = rbio->c;
u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
int ret = 0;
- if (crc_is_compressed(rbio->pick.crc))
- return 0;
-
CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
if ((ret = bkey_err(k)))
@@ -756,21 +754,12 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
if (bversion_cmp(k.k->bversion, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
- return 0;
+ return bch_err_throw(c, rbio_narrow_crcs_fail);
- /* Extent was merged? */
- if (bkey_start_offset(k.k) < data_offset ||
- k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
- return 0;
-
- struct bch_extent_crc_unpacked new_crc;
- if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
- rbio->pick.crc, NULL, &new_crc,
- bkey_start_offset(k.k) - data_offset, k.k->size,
- rbio->pick.crc.csum_type)) {
- bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
- return 0;
- }
+ /* Extent was trimmed/merged? */
+ if (!bpos_eq(bkey_start_pos(k.k), rbio->data_pos) ||
+ k.k->p.offset != rbio->data_pos.offset + rbio->pick.crc.live_size)
+ return bch_err_throw(c, rbio_narrow_crcs_fail);
/*
* going to be temporarily appending another checksum entry:
@@ -782,17 +771,37 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
bkey_reassemble(new, k);
- if (!bch2_bkey_narrow_crcs(new, new_crc))
- return 0;
+ if (!bch2_bkey_narrow_crcs(new, *new_crc))
+ return bch_err_throw(c, rbio_narrow_crcs_fail);
return bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node);
}
static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
- CLASS(btree_trans, trans)(rbio->c);
- commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- __bch2_rbio_narrow_crcs(trans, rbio));
+ struct bch_fs *c = rbio->c;
+
+ if (crc_is_compressed(rbio->pick.crc))
+ return;
+
+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
+
+ struct bch_extent_crc_unpacked new_crc;
+ if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
+ rbio->pick.crc, NULL, &new_crc,
+ rbio->data_pos.offset - data_offset, rbio->pick.crc.live_size,
+ rbio->pick.crc.csum_type)) {
+ bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
+ return;
+ }
+
+ CLASS(btree_trans, trans)(c);
+ int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_rbio_narrow_crcs(trans, rbio, &new_crc));
+ if (!ret)
+ count_event(c, io_read_narrow_crcs);
+ else if (ret == -BCH_ERR_rbio_narrow_crcs_fail)
+ count_event(c, io_read_narrow_crcs_fail);
}
static void bch2_read_decompress_err(struct work_struct *work)
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 8a3981e1016e..92edff50b655 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -84,13 +84,6 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
return ret;
/*
- * If the new extent no longer has any pointers, bch2_extent_normalize()
- * will do the appropriate thing with it (turning it into a
- * KEY_TYPE_error key, or just a discard if it was a cached extent)
- */
- bch2_extent_normalize(c, bkey_i_to_s(n));
-
- /*
* Since we're not inserting through an extent iterator
* (BTREE_ITER_all_snapshots iterators aren't extent iterators),
* we aren't using the extent overwrite path to delete, we're
@@ -273,10 +266,15 @@ int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx,
unsigned flags, struct printbuf *err)
{
struct progress_indicator_state progress;
+ int ret;
+
bch2_progress_init(&progress, c,
- BIT_ULL(BTREE_ID_extents)|
- BIT_ULL(BTREE_ID_reflink));
+ btree_has_data_ptrs_mask & ~BIT_ULL(BTREE_ID_stripes));
+
+ if ((ret = bch2_dev_usrdata_drop(c, &progress, dev_idx, flags, err)))
+ return ret;
+
+ bch2_progress_init_inner(&progress, c, 0, ~0ULL);
- return bch2_dev_usrdata_drop(c, &progress, dev_idx, flags, err) ?:
- bch2_dev_metadata_drop(c, &progress, dev_idx, flags, err);
+ return bch2_dev_metadata_drop(c, &progress, dev_idx, flags, err);
}
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
index 541ee951d1c9..7cc16490ffa9 100644
--- a/fs/bcachefs/progress.c
+++ b/fs/bcachefs/progress.c
@@ -4,14 +4,21 @@
#include "disk_accounting.h"
#include "progress.h"
-void bch2_progress_init(struct progress_indicator_state *s,
- struct bch_fs *c,
- u64 btree_id_mask)
+void bch2_progress_init_inner(struct progress_indicator_state *s,
+ struct bch_fs *c,
+ u64 leaf_btree_id_mask,
+ u64 inner_btree_id_mask)
{
memset(s, 0, sizeof(*s));
s->next_print = jiffies + HZ * 10;
+ /* This is only an estimation: nodes can have different replica counts */
+ const u32 expected_node_disk_sectors =
+ READ_ONCE(c->opts.metadata_replicas) * btree_sectors(c);
+
+ const u64 btree_id_mask = leaf_btree_id_mask | inner_btree_id_mask;
+
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
if (!(btree_id_mask & BIT_ULL(i)))
continue;
@@ -19,9 +26,29 @@ void bch2_progress_init(struct progress_indicator_state *s,
struct disk_accounting_pos acc;
disk_accounting_key_init(acc, btree, .id = i);
- u64 v;
- bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
- s->nodes_total += div64_ul(v, btree_sectors(c));
+ struct {
+ u64 disk_sectors;
+ u64 total_nodes;
+ u64 inner_nodes;
+ } v = {0};
+ bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc),
+ (u64 *)&v, sizeof(v) / sizeof(u64));
+
+ /* Better to estimate as 0 than the total node count */
+ if (inner_btree_id_mask & BIT_ULL(i))
+ s->nodes_total += v.inner_nodes;
+
+ if (!(leaf_btree_id_mask & BIT_ULL(i)))
+ continue;
+
+ /*
+ * We check for zeros to degrade gracefully when run
+ * with un-upgraded accounting info (missing some counters).
+ */
+ if (v.total_nodes != 0)
+ s->nodes_total += v.total_nodes - v.inner_nodes;
+ else
+ s->nodes_total += div_u64(v.disk_sectors, expected_node_disk_sectors);
}
}
diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h
index 972a73087ffe..91f345337709 100644
--- a/fs/bcachefs/progress.h
+++ b/fs/bcachefs/progress.h
@@ -20,7 +20,17 @@ struct progress_indicator_state {
struct btree *last_node;
};
-void bch2_progress_init(struct progress_indicator_state *, struct bch_fs *, u64);
+void bch2_progress_init_inner(struct progress_indicator_state *s,
+ struct bch_fs *c,
+ u64 leaf_btree_id_mask,
+ u64 inner_btree_id_mask);
+
+static inline void bch2_progress_init(struct progress_indicator_state *s,
+ struct bch_fs *c, u64 btree_id_mask)
+{
+ bch2_progress_init_inner(s, c, btree_id_mask, 0);
+}
+
void bch2_progress_update_iter(struct btree_trans *,
struct progress_indicator_state *,
struct btree_iter *,
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 531c2ef128ae..6942d3cfcba3 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -920,6 +920,13 @@ use_clean:
if (bch2_blacklist_entries_gc(c))
write_sb = true;
+ if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) &&
+ (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_extents)) &&
+ (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_indirect_extents))) {
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ write_sb = true;
+ }
+
if (write_sb)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@@ -982,8 +989,9 @@ int bch2_fs_initialize(struct bch_fs *c)
set_bit(BCH_FS_new_fs, &c->flags);
scoped_guard(mutex, &c->sb_lock) {
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_extents_above_btree_updates_done));
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_bformat_overflow_done));
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
bch2_check_version_downgrade(c);
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index 17cd617664d9..3907ba7edff2 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -23,6 +23,8 @@ enum counters_flags {
x(io_read_reuse_race, 34, TYPE_COUNTER) \
x(io_read_retry, 32, TYPE_COUNTER) \
x(io_read_fail_and_poison, 95, TYPE_COUNTER) \
+ x(io_read_narrow_crcs, 97, TYPE_COUNTER) \
+ x(io_read_narrow_crcs_fail, 98, TYPE_COUNTER) \
x(io_write, 1, TYPE_SECTORS) \
x(io_move, 2, TYPE_SECTORS) \
x(io_move_read, 35, TYPE_SECTORS) \
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index de56a1ee79db..bfd06fd5d506 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -104,7 +104,10 @@
x(inode_has_case_insensitive, \
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \
- BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)
+ BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)\
+ x(btree_node_accounting, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_accounting_mismatch)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@@ -152,7 +155,11 @@
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch, \
BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
- BCH_FSCK_ERR_accounting_key_junk_at_end)
+ BCH_FSCK_ERR_accounting_key_junk_at_end) \
+ x(btree_node_accounting, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_accounting_mismatch, \
+ BCH_FSCK_ERR_accounting_key_nr_counters_wrong)
struct upgrade_downgrade_entry {
u64 recovery_passes;
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 7c6f18a1ee2a..77e3fc92e39b 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -160,7 +160,7 @@ enum bch_fsck_flags {
x(extent_ptrs_unwritten, 140, 0) \
x(extent_ptrs_written_and_unwritten, 141, 0) \
x(ptr_to_invalid_device, 142, 0) \
- x(ptr_to_removed_device, 322, 0) \
+ x(ptr_to_removed_device, 322, FSCK_AUTOFIX) \
x(ptr_to_duplicate_device, 143, 0) \
x(ptr_after_last_bucket, 144, 0) \
x(ptr_before_first_bucket, 145, 0) \
@@ -170,9 +170,10 @@ enum bch_fsck_flags {
x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \
x(ptr_to_missing_stripe, 150, 0) \
x(ptr_to_incorrect_stripe, 151, 0) \
- x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
+ x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
x(ptr_too_stale, 153, 0) \
x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \
+ x(stale_ptr_with_no_stale_ptrs_feature, 327, FSCK_AUTOFIX) \
x(ptr_bucket_data_type_mismatch, 155, 0) \
x(ptr_cached_and_erasure_coded, 156, 0) \
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
@@ -338,7 +339,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 327, 0)
+ x(MAX, 328, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index ed504ce75169..03b12c2da097 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -238,6 +238,7 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
static int bch2_dev_attach_bdev(struct bch_fs *, struct bch_sb_handle *, struct printbuf *);
+static bool bch2_fs_will_resize_on_mount(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -322,6 +323,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
do {
clean_passes++;
+ bch2_do_discards_going_ro(c);
+
if (bch2_btree_interior_updates_flush(c) ||
bch2_btree_write_buffer_flush_going_ro(c) ||
bch2_journal_flush_all_pins(&c->journal) ||
@@ -962,6 +965,9 @@ static int bch2_fs_opt_version_init(struct bch_fs *c)
if (c->opts.journal_rewind)
c->opts.fsck = true;
+ bool may_upgrade_downgrade = !(c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) ||
+ bch2_fs_will_resize_on_mount(c);
+
CLASS(printbuf, p)();
bch2_log_msg_start(c, &p);
@@ -1038,22 +1044,24 @@ static int bch2_fs_opt_version_init(struct bch_fs *c)
prt_bitflags(&p, __bch2_btree_ids, btrees_lost_data);
}
- if (bch2_check_version_downgrade(c)) {
- prt_str(&p, "\nVersion downgrade required:");
-
- __le64 passes = ext->recovery_passes_required[0];
- bch2_sb_set_downgrade(c,
- BCH_VERSION_MINOR(bcachefs_metadata_version_current),
- BCH_VERSION_MINOR(c->sb.version));
- passes = ext->recovery_passes_required[0] & ~passes;
- if (passes) {
- prt_str(&p, "\nrunning recovery passes: ");
- prt_bitflags(&p, bch2_recovery_passes,
- bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ if (may_upgrade_downgrade) {
+ if (bch2_check_version_downgrade(c)) {
+ prt_str(&p, "\nVersion downgrade required:");
+
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_downgrade(c,
+ BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+ BCH_VERSION_MINOR(c->sb.version));
+ passes = ext->recovery_passes_required[0] & ~passes;
+ if (passes) {
+ prt_str(&p, "\nrunning recovery passes: ");
+ prt_bitflags(&p, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
}
- }
- check_version_upgrade(c);
+ check_version_upgrade(c);
+ }
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
@@ -1209,12 +1217,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch2_opts_apply(&c->opts, *opts);
+#ifdef __KERNEL__
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
c->opts.block_size > PAGE_SIZE) {
bch_err(c, "cannot mount bs > ps filesystem without CONFIG_TRANSPARENT_HUGEPAGE");
ret = -EINVAL;
goto err;
}
+#endif
c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
if (c->opts.inodes_use_key_cache)
@@ -1989,7 +1999,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
struct printbuf *err)
{
unsigned dev_idx = ca->dev_idx, data;
- bool fast_device_removal = !bch2_request_incompat_feature(c,
+ bool fast_device_removal = (c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) &&
+ !bch2_request_incompat_feature(c,
bcachefs_metadata_version_fast_device_removal);
int ret;
@@ -2417,15 +2428,29 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct p
return 0;
}
+static bool bch2_dev_will_resize_on_mount(struct bch_dev *ca)
+{
+ return ca->mi.resize_on_mount &&
+ ca->mi.nbuckets < div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
+ ca->mi.bucket_size);
+}
+
+static bool bch2_fs_will_resize_on_mount(struct bch_fs *c)
+{
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount)
+ if (bch2_dev_will_resize_on_mount(ca))
+ return true;
+ return false;
+}
+
int bch2_fs_resize_on_mount(struct bch_fs *c)
{
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
- u64 old_nbuckets = ca->mi.nbuckets;
- u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
- ca->mi.bucket_size);
+ if (bch2_dev_will_resize_on_mount(ca)) {
+ u64 old_nbuckets = ca->mi.nbuckets;
+ u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
+ ca->mi.bucket_size);
- if (ca->mi.resize_on_mount &&
- new_nbuckets > ca->mi.nbuckets) {
bch_info(ca, "resizing to size %llu", new_nbuckets * ca->mi.bucket_size);
int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets);
bch_err_fn(ca, ret);
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 269cdf1a87a4..6c312fd9a447 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -720,47 +720,55 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail,
);
DECLARE_EVENT_CLASS(discard_buckets_class,
- TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
- u64 need_journal_commit, u64 discarded, const char *err),
- TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
+ TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err),
+ TP_ARGS(c, s, err),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, seen )
__field(u64, open )
__field(u64, need_journal_commit )
+ __field(u64, commit_in_flight )
+ __field(u64, bad_data_type )
+ __field(u64, already_discarding )
__field(u64, discarded )
__array(char, err, 16 )
),
TP_fast_assign(
__entry->dev = c->dev;
- __entry->seen = seen;
- __entry->open = open;
- __entry->need_journal_commit = need_journal_commit;
- __entry->discarded = discarded;
+ __entry->seen = s->seen;
+ __entry->open = s->open;
+ __entry->need_journal_commit = s->need_journal_commit;
+ __entry->commit_in_flight = s->commit_in_flight;
+ __entry->bad_data_type = s->bad_data_type;
+ __entry->already_discarding = s->already_discarding;
+ __entry->discarded = s->discarded;
strscpy(__entry->err, err, sizeof(__entry->err));
),
- TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s",
+ TP_printk("%d%d seen %llu open %llu\n"
+ "need_commit %llu committing %llu bad_data_type %llu\n"
+ "already_discarding %llu discarded %llu err %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->seen,
__entry->open,
__entry->need_journal_commit,
+ __entry->commit_in_flight,
+ __entry->bad_data_type,
+ __entry->already_discarding,
__entry->discarded,
__entry->err)
);
DEFINE_EVENT(discard_buckets_class, discard_buckets,
- TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
- u64 need_journal_commit, u64 discarded, const char *err),
- TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+ TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err),
+ TP_ARGS(c, s, err)
);
DEFINE_EVENT(discard_buckets_class, discard_buckets_fast,
- TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
- u64 need_journal_commit, u64 discarded, const char *err),
- TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+ TP_PROTO(struct bch_fs *c, struct discard_buckets_state *s, const char *err),
+ TP_ARGS(c, s, err)
);
TRACE_EVENT(bucket_invalidate,
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 2a9462275f92..16d746f1d7e9 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -299,8 +299,10 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne
if (ret)
return ret;
+ skipnr += task == current;
+
do {
- nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
+ nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr);
} while (nr_entries == stack->size &&
!(ret = darray_make_room_gfp(stack, stack->size * 2, gfp)));
@@ -321,8 +323,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp)
{
+ skipnr += task == current;
+
CLASS(bch_stacktrace, stack)();
- int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp);
+ int ret = bch2_save_backtrace(&stack, task, skipnr, gfp);
bch2_prt_backtrace(out, &stack);
return ret;
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 880fe85e35e9..2b488a012559 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -128,14 +128,15 @@ enum closure_state {
* annotate where references are being transferred.
*/
- CLOSURE_BITS_START = (1U << 26),
- CLOSURE_DESTRUCTOR = (1U << 26),
+ CLOSURE_BITS_START = (1U << 24),
+ CLOSURE_DESTRUCTOR = (1U << 24),
+ CLOSURE_SLEEPING = (1U << 26),
CLOSURE_WAITING = (1U << 28),
CLOSURE_RUNNING = (1U << 30),
};
#define CLOSURE_GUARD_MASK \
- ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+ (((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)|(CLOSURE_BITS_START >> 1))
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
@@ -144,7 +145,7 @@ struct closure {
union {
struct {
struct workqueue_struct *wq;
- struct closure_syncer *s;
+ struct task_struct *sleeper;
struct llist_node list;
closure_fn *fn;
};
@@ -154,7 +155,6 @@ struct closure {
struct closure *parent;
atomic_t remaining;
- bool closure_get_happened;
#ifdef CONFIG_DEBUG_CLOSURES
#define CLOSURE_MAGIC_DEAD 0xc054dead
@@ -169,11 +169,18 @@ struct closure {
};
void closure_sub(struct closure *cl, int v);
-void closure_put(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void __closure_sync(struct closure *cl);
+/*
+ * closure_put - decrement a closure's refcount
+ */
+static inline void closure_put(struct closure *cl)
+{
+ closure_sub(cl, 1);
+}
+
static inline unsigned closure_nr_remaining(struct closure *cl)
{
return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK;
@@ -187,11 +194,7 @@ static inline unsigned closure_nr_remaining(struct closure *cl)
*/
static inline void closure_sync(struct closure *cl)
{
-#ifdef CONFIG_DEBUG_CLOSURES
- BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
-#endif
-
- if (cl->closure_get_happened)
+ if (closure_nr_remaining(cl) > 1)
__closure_sync(cl);
}
@@ -199,10 +202,7 @@ int __closure_sync_timeout(struct closure *cl, unsigned long timeout);
static inline int closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
-#ifdef CONFIG_DEBUG_CLOSURES
- BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
-#endif
- return cl->closure_get_happened
+ return closure_nr_remaining(cl) > 1
? __closure_sync_timeout(cl, timeout)
: 0;
}
@@ -275,8 +275,6 @@ static inline void closure_queue(struct closure *cl)
*/
static inline void closure_get(struct closure *cl)
{
- cl->closure_get_happened = true;
-
#ifdef CONFIG_DEBUG_CLOSURES
BUG_ON((atomic_inc_return(&cl->remaining) &
CLOSURE_REMAINING_MASK) <= 1);
@@ -314,7 +312,6 @@ static inline void closure_init(struct closure *cl, struct closure *parent)
closure_get(parent);
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
- cl->closure_get_happened = false;
closure_debug_create(cl);
closure_set_ip(cl);
diff --git a/lib/closure.c b/lib/closure.c
index 4fb78d18ee1b..dc31cd5ed625 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,65 +13,75 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
-static inline void closure_put_after_sub_checks(struct closure *cl, int flags)
+static inline void closure_put_after_sub_checks(struct closure *cl, unsigned r)
{
- int r = flags & CLOSURE_REMAINING_MASK;
+ unsigned count = r & CLOSURE_REMAINING_MASK;
- if (WARN(flags & CLOSURE_GUARD_MASK,
+ if (WARN(r & CLOSURE_GUARD_MASK,
"closure %ps has guard bits set: %x (%u)",
cl->fn,
- flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
+ r, (unsigned) __fls(r & CLOSURE_GUARD_MASK)))
r &= ~CLOSURE_GUARD_MASK;
- WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
+ WARN(!count && (r & ~CLOSURE_DESTRUCTOR),
"closure %ps ref hit 0 with incorrect flags set: %x (%u)",
cl->fn,
- flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+ r, (unsigned) __fls(r));
}
-static inline void closure_put_after_sub(struct closure *cl, int flags)
+enum new_closure_state {
+ CLOSURE_normal_put,
+ CLOSURE_requeue,
+ CLOSURE_done,
+};
+
+/* For clearing flags with the same atomic op as a put */
+void closure_sub(struct closure *cl, int v)
{
- closure_put_after_sub_checks(cl, flags);
+ enum new_closure_state s;
+ struct task_struct *sleeper;
- if (!(flags & CLOSURE_REMAINING_MASK)) {
- smp_acquire__after_ctrl_dep();
+ guard(rcu)();
- cl->closure_get_happened = false;
+ int old = atomic_read(&cl->remaining), new;
+ do {
+ new = old - v;
+ closure_put_after_sub_checks(cl, new);
- if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
- atomic_set(&cl->remaining,
- CLOSURE_REMAINING_INITIALIZER);
- closure_queue(cl);
- } else {
- struct closure *parent = cl->parent;
- closure_fn *destructor = cl->fn;
+ sleeper = new & CLOSURE_SLEEPING ? cl->sleeper : NULL;
- closure_debug_destroy(cl);
+ if (new & CLOSURE_REMAINING_MASK)
+ s = CLOSURE_normal_put;
+ else if (cl->fn && !(new & CLOSURE_DESTRUCTOR))
+ s = CLOSURE_requeue;
+ else
+ s = CLOSURE_done;
+ } while (!atomic_try_cmpxchg_release(&cl->remaining, &old, new));
- if (destructor)
- destructor(&cl->work);
+ if (s == CLOSURE_normal_put)
+ return;
- if (parent)
- closure_put(parent);
- }
+ if (sleeper) {
+ wake_up_process(sleeper);
+ return;
}
-}
-/* For clearing flags with the same atomic op as a put */
-void closure_sub(struct closure *cl, int v)
-{
- closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
-}
-EXPORT_SYMBOL(closure_sub);
+ if (s == CLOSURE_requeue) {
+ closure_queue(cl);
+ } else {
+ struct closure *parent = cl->parent;
+ closure_fn *destructor = cl->fn;
-/*
- * closure_put - decrement a closure's refcount
- */
-void closure_put(struct closure *cl)
-{
- closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
+ closure_debug_destroy(cl);
+
+ if (destructor)
+ destructor(&cl->work);
+
+ if (parent)
+ closure_put(parent);
+ }
}
-EXPORT_SYMBOL(closure_put);
+EXPORT_SYMBOL(closure_sub);
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -107,7 +117,6 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
return false;
- cl->closure_get_happened = true;
closure_set_waiting(cl, _RET_IP_);
atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
llist_add(&cl->list, &waitlist->list);
@@ -116,34 +125,16 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
}
EXPORT_SYMBOL(closure_wait);
-struct closure_syncer {
- struct task_struct *task;
- int done;
-};
-
-static CLOSURE_CALLBACK(closure_sync_fn)
-{
- struct closure *cl = container_of(ws, struct closure, work);
- struct closure_syncer *s = cl->s;
- struct task_struct *p;
-
- rcu_read_lock();
- p = READ_ONCE(s->task);
- s->done = 1;
- wake_up_process(p);
- rcu_read_unlock();
-}
-
void __sched __closure_sync(struct closure *cl)
{
- struct closure_syncer s = { .task = current };
-
- cl->s = &s;
- continue_at(cl, closure_sync_fn, NULL);
+ cl->sleeper = current;
+ closure_sub(cl,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_SLEEPING);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (s.done)
+ if (atomic_read(&cl->remaining) & CLOSURE_RUNNING)
break;
schedule();
}
@@ -157,31 +148,25 @@ EXPORT_SYMBOL(__closure_sync);
* for outstanding get()s to finish) and returning once closure refcount is 0.
*
* Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
- * closure_get_not_zero() calls waill fail.
+ * closure_get_not_zero() calls will fail.
*/
void __sched closure_return_sync(struct closure *cl)
{
- struct closure_syncer s = { .task = current };
-
- cl->s = &s;
- set_closure_fn(cl, closure_sync_fn, NULL);
-
- unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
- &cl->remaining);
+ cl->sleeper = current;
+ closure_sub(cl,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_DESTRUCTOR -
+ CLOSURE_SLEEPING);
- closure_put_after_sub_checks(cl, flags);
-
- if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (s.done)
- break;
- schedule();
- }
-
- __set_current_state(TASK_RUNNING);
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&cl->remaining) & CLOSURE_RUNNING)
+ break;
+ schedule();
}
+ __set_current_state(TASK_RUNNING);
+
if (cl->parent)
closure_put(cl->parent);
}
@@ -189,32 +174,33 @@ EXPORT_SYMBOL(closure_return_sync);
int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
- struct closure_syncer s = { .task = current };
int ret = 0;
- cl->s = &s;
- continue_at(cl, closure_sync_fn, NULL);
+ cl->sleeper = current;
+ closure_sub(cl,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_SLEEPING);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (s.done)
- break;
- if (!timeout) {
- /*
- * Carefully undo the continue_at() - but only if it
- * hasn't completed, i.e. the final closure_put() hasn't
- * happened yet:
- */
- unsigned old, new, v = atomic_read(&cl->remaining);
- do {
- old = v;
- if (!old || (old & CLOSURE_RUNNING))
- goto success;
-
- new = old + CLOSURE_REMAINING_INITIALIZER;
- } while ((v = atomic_cmpxchg(&cl->remaining, old, new)) != old);
- ret = -ETIME;
- }
+ /*
+ * Carefully undo the continue_at() - but only if it
+ * hasn't completed, i.e. the final closure_put() hasn't
+ * happened yet:
+ */
+ unsigned old = atomic_read(&cl->remaining), new;
+ do {
+ if (old & CLOSURE_RUNNING)
+ goto success;
+
+ if (timeout) {
+ timeout = schedule_timeout(timeout);
+ continue;
+ }
+
+ new = old + CLOSURE_REMAINING_INITIALIZER;
+ } while (!atomic_try_cmpxchg(&cl->remaining, &old, new));
+ ret = -ETIME;
timeout = schedule_timeout(timeout);
}