summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/backpointers.c14
-rw-r--r--fs/bcachefs/bcachefs_format.h8
-rw-r--r--fs/bcachefs/btree_gc.c8
-rw-r--r--fs/bcachefs/btree_locking.c14
-rw-r--r--fs/bcachefs/buckets.c31
-rw-r--r--fs/bcachefs/disk_accounting_format.h10
-rw-r--r--fs/bcachefs/ec.c9
-rw-r--r--fs/bcachefs/extents.c2
-rw-r--r--fs/bcachefs/fsck.c6
-rw-r--r--fs/bcachefs/migrate.c13
-rw-r--r--fs/bcachefs/opts.c34
-rw-r--r--fs/bcachefs/opts.h2
-rw-r--r--fs/bcachefs/progress.c39
-rw-r--r--fs/bcachefs/progress.h12
-rw-r--r--fs/bcachefs/rebalance.c258
-rw-r--r--fs/bcachefs/recovery.c12
-rw-r--r--fs/bcachefs/sb-downgrade.c11
-rw-r--r--fs/bcachefs/sb-errors_format.h5
-rw-r--r--fs/bcachefs/super.c61
-rw-r--r--fs/bcachefs/sysfs.c2
-rw-r--r--fs/bcachefs/util.c8
-rw-r--r--fs/bcachefs/xattr.c2
-rw-r--r--include/linux/closure.h11
-rw-r--r--lib/closure.c89
24 files changed, 431 insertions, 230 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index c662eeba66ab..3193dbcfc3c6 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -432,6 +432,10 @@ fsck_err:
/* verify that every backpointer has a corresponding alloc key */
int bch2_check_btree_backpointers(struct bch_fs *c)
{
+ struct progress_indicator_state progress;
+
+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_backpointers));
+
struct bkey_buf last_flushed;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
@@ -439,8 +443,10 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
CLASS(btree_trans, trans)(c);
int ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_backpointers, POS_MIN, 0, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed));
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ progress_update_iter(trans, &progress, &iter);
+ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed);
+ }));
bch2_bkey_buf_exit(&last_flushed, c);
return ret;
@@ -815,7 +821,9 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct progress_indicator_state progress;
int ret = 0;
- bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
+ bch2_progress_init_inner(&progress, trans->c,
+ btree_has_data_ptrs_mask,
+ ~0ULL);
for (enum btree_id btree_id = 0;
btree_id < btree_id_nr_alive(c);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0839397105a9..d29bd684b137 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -706,7 +706,8 @@ struct bch_sb_field_ext {
x(fast_device_removal, BCH_VERSION(1, 27)) \
x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \
x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \
- x(31bit_dirent_offset, BCH_VERSION(1, 30))
+ x(31bit_dirent_offset, BCH_VERSION(1, 30)) \
+ x(btree_node_accounting, BCH_VERSION(1, 31))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -717,7 +718,7 @@ enum bcachefs_metadata_version {
};
static const __maybe_unused
-unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_btree_node_accounting;
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@@ -965,7 +966,8 @@ enum bch_sb_feature {
x(alloc_info, 0) \
x(alloc_metadata, 1) \
x(extents_above_btree_updates_done, 2) \
- x(bformat_overflow_done, 3)
+ x(bformat_overflow_done, 3) \
+ x(no_stale_ptrs, 4)
enum bch_sb_compat {
#define x(f, n) BCH_COMPAT_##f,
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index f5f960326f4e..63dc0836bf08 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -780,7 +780,7 @@ static int bch2_gc_btrees(struct bch_fs *c)
int ret = 0;
struct progress_indicator_state progress;
- bch2_progress_init(&progress, c, ~0ULL);
+ bch2_progress_init_inner(&progress, c, ~0ULL, ~0ULL);
enum btree_id ids[BTREE_ID_NR];
for (unsigned i = 0; i < BTREE_ID_NR; i++)
@@ -1249,6 +1249,12 @@ int bch2_gc_gens(struct bch_fs *c)
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
trace_and_count(c, gc_gens_end, c);
+
+ if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs))) {
+ guard(mutex)(&c->sb_lock);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ bch2_write_super(c);
+ }
err:
for_each_member_device(c, ca) {
kvfree(ca->oldest_gen);
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index a4f8aac448c0..0047746405ab 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -69,6 +69,7 @@ struct trans_waiting_for_lock {
struct lock_graph {
struct trans_waiting_for_lock g[8];
unsigned nr;
+ bool printed_chain;
};
static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
@@ -89,6 +90,10 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
{
+ if (g->printed_chain || g->nr <= 1)
+ return;
+ g->printed_chain = true;
+
struct trans_waiting_for_lock *i;
for (i = g->g; i != g->g + g->nr; i++) {
@@ -124,6 +129,7 @@ static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
.node_want = trans->locking,
.lock_want = trans->locking_wait.lock_want,
};
+ g->printed_chain = false;
}
static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
@@ -265,8 +271,12 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
if (unlikely(g->nr == ARRAY_SIZE(g->g))) {
closure_put(&trans->ref);
- if (orig_trans->lock_may_not_fail)
+ if (orig_trans->lock_may_not_fail) {
+ /* Other threads will have to rerun the cycle detector: */
+ for (struct trans_waiting_for_lock *i = g->g + 1; i < g->g + g->nr; i++)
+ wake_up_process(i->trans->locking_wait.task);
return 0;
+ }
lock_graph_pop_all(g);
@@ -398,7 +408,7 @@ next:
}
}
up:
- if (g.nr > 1 && cycle)
+ if (cycle)
print_chain(cycle, &g);
lock_graph_up(&g);
goto next;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 021f5cb7998d..1b999b8b0921 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -462,6 +462,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
CLASS(printbuf, buf)();
bool inserting = sectors > 0;
+ int ret = 0;
BUG_ON(!sectors);
@@ -489,8 +490,17 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
BCH_FSCK_ERR_ptr_too_stale);
}
- if (b_gen != ptr->gen && ptr->cached)
+ if (b_gen != ptr->gen && ptr->cached) {
+ if (fsck_err_on(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs),
+ trans, stale_ptr_with_no_stale_ptrs_feature,
+ "stale cached ptr, but have no_stale_ptrs feature\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ guard(mutex)(&c->sb_lock);
+ c->disk_sb.sb->compat[0] &= ~cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ bch2_write_super(c);
+ }
return 1;
+ }
if (unlikely(b_gen != ptr->gen)) {
bch2_log_msg_start(c, &buf);
@@ -530,7 +540,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
}
*bucket_sectors += sectors;
- return 0;
+fsck_err:
+ return ret;
}
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
@@ -749,6 +760,7 @@ static int __trigger_extent(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
bool gc = flags & BTREE_TRIGGER_gc;
+ bool insert = !(flags & BTREE_TRIGGER_overwrite);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -802,7 +814,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (cur_compression_type &&
cur_compression_type != p.crc.compression_type) {
- if (flags & BTREE_TRIGGER_overwrite)
+ if (!insert)
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
@@ -835,7 +847,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (cur_compression_type) {
- if (flags & BTREE_TRIGGER_overwrite)
+ if (!insert)
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
@@ -845,12 +857,17 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (level) {
- ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, btree, btree_id);
+ const bool leaf_node = level == 1;
+ s64 v[3] = {
+ replicas_sectors,
+ insert ? 1 : -1,
+ !leaf_node ? (insert ? 1 : -1) : 0,
+ };
+
+ ret = bch2_disk_accounting_mod2(trans, gc, v, btree, btree_id);
if (ret)
return ret;
} else {
- bool insert = !(flags & BTREE_TRIGGER_overwrite);
-
s64 v[3] = {
insert ? 1 : -1,
insert ? k.k->size : -((s64) k.k->size),
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index 8269af1dbe2a..730a17ea4243 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -108,7 +108,7 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
x(dev_data_type, 3, 3) \
x(compression, 4, 3) \
x(snapshot, 5, 1) \
- x(btree, 6, 1) \
+ x(btree, 6, 3) \
x(rebalance_work, 7, 1) \
x(inum, 8, 3)
@@ -174,6 +174,14 @@ struct bch_acct_snapshot {
__u32 id;
} __packed;
+/*
+ * Metadata accounting per btree id:
+ * [
+ * total btree disk usage in sectors
+ * total number of btree nodes
+ * number of non-leaf btree nodes
+ * ]
+ */
struct bch_acct_btree {
__u32 id;
} __packed;
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 271e252152da..89621a43c51f 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -22,6 +22,7 @@
#include "io_write.h"
#include "keylist.h"
#include "lru.h"
+#include "rebalance.h"
#include "recovery.h"
#include "replicas.h"
#include "super-io.h"
@@ -1129,7 +1130,13 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
(union bch_extent_entry *) ec_ptr,
(union bch_extent_entry *) &stripe_ptr);
- ret = bch2_trans_update(trans, &iter, n, 0);
+ struct bch_inode_opts opts;
+
+ ret = bch2_extent_get_io_opts_one(trans, &opts, &iter, bkey_i_to_s_c(n),
+ SET_NEEDS_REBALANCE_other) ?:
+ bch2_bkey_set_needs_rebalance(trans->c, &opts, n,
+ SET_NEEDS_REBALANCE_other, 0) ?:
+ bch2_trans_update(trans, &iter, n, 0);
out:
bch2_trans_iter_exit(&iter);
return ret;
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 43367d4e671a..3274ba42c995 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -1250,7 +1250,7 @@ static void __bch2_bkey_drop_stale_ptrs(struct bch_fs *c, struct bkey_s k)
int bch2_bkey_drop_stale_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{
- if (!bch2_bkey_has_stale_ptrs(trans->c, k)) {
+ if (bch2_bkey_has_stale_ptrs(trans->c, k)) {
struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k,
BTREE_UPDATE_internal_snapshot_node);
int ret = PTR_ERR_OR_ZERO(u);
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index ccc44b1fc178..3bde5c07b528 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1963,7 +1963,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
}
}
- ret = check_extent_overbig(trans, iter, k);
+ ret = check_extent_overbig(trans, iter, k) ?:
+ bch2_bkey_drop_stale_ptrs(trans, iter, k);
if (ret)
goto err;
@@ -2040,7 +2041,8 @@ int bch2_check_indirect_extents(struct bch_fs *c)
BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter);
bch2_disk_reservation_put(c, &res);
- check_extent_overbig(trans, &iter, k);
+ check_extent_overbig(trans, &iter, k) ?:
+ bch2_bkey_drop_stale_ptrs(trans, &iter, k);
}));
bch2_disk_reservation_put(c, &res);
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 519ef16669e4..92edff50b655 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -266,10 +266,15 @@ int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx,
unsigned flags, struct printbuf *err)
{
struct progress_indicator_state progress;
+ int ret;
+
bch2_progress_init(&progress, c,
- BIT_ULL(BTREE_ID_extents)|
- BIT_ULL(BTREE_ID_reflink));
+ btree_has_data_ptrs_mask & ~BIT_ULL(BTREE_ID_stripes));
+
+ if ((ret = bch2_dev_usrdata_drop(c, &progress, dev_idx, flags, err)))
+ return ret;
+
+ bch2_progress_init_inner(&progress, c, 0, ~0ULL);
- return bch2_dev_usrdata_drop(c, &progress, dev_idx, flags, err) ?:
- bch2_dev_metadata_drop(c, &progress, dev_idx, flags, err);
+ return bch2_dev_metadata_drop(c, &progress, dev_idx, flags, err);
}
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 122bc98e4cbb..bd5faafc9aa7 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -518,7 +518,8 @@ void bch2_opts_to_text(struct printbuf *out,
}
}
-int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, u64 v)
+int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, u64 v,
+ bool change)
{
int ret = 0;
@@ -542,13 +543,26 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum b
break;
}
+ if (change &&
+ (id == Opt_foreground_target ||
+ id == Opt_background_target ||
+ id == Opt_promote_target ||
+ id == Opt_compression ||
+ id == Opt_background_compression ||
+ id == Opt_data_checksum ||
+ id == Opt_data_replicas)) {
+ ret = bch2_set_rebalance_needs_scan(c, inum);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
int bch2_opts_hooks_pre_set(struct bch_fs *c)
{
for (unsigned i = 0; i < bch2_opts_nr; i++) {
- int ret = bch2_opt_hook_pre_set(c, NULL, 0, i, bch2_opt_get_by_id(&c->opts, i));
+ int ret = bch2_opt_hook_pre_set(c, NULL, 0, i, bch2_opt_get_by_id(&c->opts, i), false);
if (ret)
return ret;
}
@@ -559,14 +573,18 @@ int bch2_opts_hooks_pre_set(struct bch_fs *c)
void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
enum bch_opt_id id, u64 v)
{
- switch (id) {
- case Opt_foreground_target:
- case Opt_compression:
- case Opt_background_target:
- case Opt_background_compression:
+ if (id == Opt_foreground_target ||
+ id == Opt_background_target ||
+ id == Opt_promote_target ||
+ id == Opt_compression ||
+ id == Opt_background_compression ||
+ id == Opt_data_checksum ||
+ id == Opt_data_replicas) {
bch2_set_rebalance_needs_scan(c, inum);
bch2_rebalance_wakeup(c);
- break;
+ }
+
+ switch (id) {
case Opt_rebalance_enabled:
bch2_rebalance_wakeup(c);
break;
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 22cf109fb9c9..8b38f27afea4 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -658,7 +658,7 @@ void bch2_opts_to_text(struct printbuf *,
struct bch_fs *, struct bch_sb *,
unsigned, unsigned, unsigned);
-int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64);
+int bch2_opt_hook_pre_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64, bool);
int bch2_opts_hooks_pre_set(struct bch_fs *);
void bch2_opt_hook_post_set(struct bch_fs *, struct bch_dev *, u64, enum bch_opt_id, u64);
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
index 541ee951d1c9..7cc16490ffa9 100644
--- a/fs/bcachefs/progress.c
+++ b/fs/bcachefs/progress.c
@@ -4,14 +4,21 @@
#include "disk_accounting.h"
#include "progress.h"
-void bch2_progress_init(struct progress_indicator_state *s,
- struct bch_fs *c,
- u64 btree_id_mask)
+void bch2_progress_init_inner(struct progress_indicator_state *s,
+ struct bch_fs *c,
+ u64 leaf_btree_id_mask,
+ u64 inner_btree_id_mask)
{
memset(s, 0, sizeof(*s));
s->next_print = jiffies + HZ * 10;
+ /* This is only an estimation: nodes can have different replica counts */
+ const u32 expected_node_disk_sectors =
+ READ_ONCE(c->opts.metadata_replicas) * btree_sectors(c);
+
+ const u64 btree_id_mask = leaf_btree_id_mask | inner_btree_id_mask;
+
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
if (!(btree_id_mask & BIT_ULL(i)))
continue;
@@ -19,9 +26,29 @@ void bch2_progress_init(struct progress_indicator_state *s,
struct disk_accounting_pos acc;
disk_accounting_key_init(acc, btree, .id = i);
- u64 v;
- bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
- s->nodes_total += div64_ul(v, btree_sectors(c));
+ struct {
+ u64 disk_sectors;
+ u64 total_nodes;
+ u64 inner_nodes;
+ } v = {0};
+ bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc),
+ (u64 *)&v, sizeof(v) / sizeof(u64));
+
+ /* Better to estimate as 0 than the total node count */
+ if (inner_btree_id_mask & BIT_ULL(i))
+ s->nodes_total += v.inner_nodes;
+
+ if (!(leaf_btree_id_mask & BIT_ULL(i)))
+ continue;
+
+ /*
+ * We check for zeros to degrade gracefully when run
+ * with un-upgraded accounting info (missing some counters).
+ */
+ if (v.total_nodes != 0)
+ s->nodes_total += v.total_nodes - v.inner_nodes;
+ else
+ s->nodes_total += div_u64(v.disk_sectors, expected_node_disk_sectors);
}
}
diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h
index 972a73087ffe..91f345337709 100644
--- a/fs/bcachefs/progress.h
+++ b/fs/bcachefs/progress.h
@@ -20,7 +20,17 @@ struct progress_indicator_state {
struct btree *last_node;
};
-void bch2_progress_init(struct progress_indicator_state *, struct bch_fs *, u64);
+void bch2_progress_init_inner(struct progress_indicator_state *s,
+ struct bch_fs *c,
+ u64 leaf_btree_id_mask,
+ u64 inner_btree_id_mask);
+
+static inline void bch2_progress_init(struct progress_indicator_state *s,
+ struct bch_fs *c, u64 btree_id_mask)
+{
+ bch2_progress_init_inner(s, c, btree_id_mask, 0);
+}
+
void bch2_progress_update_iter(struct btree_trans *,
struct progress_indicator_state *,
struct btree_iter *,
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index fa73de7890da..59593e6420d1 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -92,122 +92,107 @@ void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c,
}
}
-static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_s_c k,
- struct bkey_ptrs_c ptrs)
+static void bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_inode_opts *io_opts,
+ unsigned *move_ptrs,
+ unsigned *compress_ptrs,
+ u64 *sectors)
{
- if (!opts->background_compression)
- return 0;
+ *move_ptrs = 0;
+ *compress_ptrs = 0;
+ *sectors = 0;
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ const struct bch_extent_rebalance *rb_opts = bch2_bkey_ptrs_rebalance_opts(ptrs);
+ if (!io_opts && !rb_opts)
+ return;
+
+ if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
+ return;
+
+ unsigned compression_type =
+ bch2_compression_opt_to_type(io_opts
+ ? io_opts->background_compression
+ : rb_opts->background_compression);
+ unsigned target = io_opts
+ ? io_opts->background_target
+ : rb_opts->background_target;
+ if (target && !bch2_target_accepts_data(c, BCH_DATA_user, target))
+ target = 0;
- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- unsigned ptr_bit = 1;
- unsigned rewrite_ptrs = 0;
+ bool incompressible = false, unwritten = false;
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
- p.ptr.unwritten)
- return 0;
+ unsigned ptr_idx = 1;
- if (!p.ptr.cached && p.crc.compression_type != compression_type)
- rewrite_ptrs |= ptr_bit;
- ptr_bit <<= 1;
- }
-
- return rewrite_ptrs;
-}
+ guard(rcu)();
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ incompressible |= p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
+ unwritten |= p.ptr.unwritten;
-static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_ptrs_c ptrs)
-{
- if (!opts->background_target ||
- !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target))
- return 0;
+ if (!p.ptr.cached) {
+ if (p.crc.compression_type != compression_type)
+ *compress_ptrs |= ptr_idx;
- unsigned ptr_bit = 1;
- unsigned rewrite_ptrs = 0;
+ if (target && !bch2_dev_in_target(c, p.ptr.dev, target))
+ *move_ptrs |= ptr_idx;
+ }
- guard(rcu)();
- bkey_for_each_ptr(ptrs, ptr) {
- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target))
- rewrite_ptrs |= ptr_bit;
- ptr_bit <<= 1;
+ ptr_idx <<= 1;
}
- return rewrite_ptrs;
-}
+ if (unwritten)
+ *compress_ptrs = 0;
+ if (incompressible)
+ *compress_ptrs = 0;
-static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
- struct bch_inode_opts *opts,
- struct bkey_s_c k)
-{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned rb_ptrs = *move_ptrs | *compress_ptrs;
- if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
- return 0;
+ if (!rb_ptrs)
+ return;
- return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) |
- bch2_bkey_ptrs_need_move(c, opts, ptrs);
+ ptr_idx = 1;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (rb_ptrs & ptr_idx)
+ *sectors += p.crc.compressed_size;
+ ptr_idx <<= 1;
+ }
}
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-
- const struct bch_extent_rebalance *opts = bch2_bkey_ptrs_rebalance_opts(ptrs);
- if (!opts)
- return 0;
-
- if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
- return 0;
-
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- u64 sectors = 0;
-
- if (opts->background_compression) {
- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression);
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
- p.ptr.unwritten) {
- sectors = 0;
- goto incompressible;
- }
-
- if (!p.ptr.cached && p.crc.compression_type != compression_type)
- sectors += p.crc.compressed_size;
- }
- }
-incompressible:
- if (opts->background_target) {
- guard(rcu)();
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached &&
- !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
- sectors += p.crc.compressed_size;
- }
+ unsigned move_ptrs = 0;
+ unsigned compress_ptrs = 0;
+ u64 sectors = 0;
+ bch2_bkey_needs_rebalance(c, k, NULL, &move_ptrs, &compress_ptrs, &sectors);
return sectors;
}
-static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_inode_opts *opts,
- struct bkey_s_c k)
+static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
+ struct bch_inode_opts *opts,
+ struct bkey_s_c k)
{
- if (!bkey_extent_is_direct_data(k.k))
- return 0;
+ unsigned move_ptrs = 0;
+ unsigned compress_ptrs = 0;
+ u64 sectors = 0;
- const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k);
+ bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, &sectors);
+ return move_ptrs|compress_ptrs;
+}
- if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) {
- struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts);
- return old == NULL || memcmp(old, &new, sizeof(new));
- } else {
- return old != NULL;
+static inline bool bkey_should_have_rb_opts(struct bch_fs *c,
+ struct bch_inode_opts *opts,
+ struct bkey_s_c k)
+{
+ if (k.k->type == KEY_TYPE_reflink_v) {
+#define x(n) if (opts->n##_from_inode) return true;
+ BCH_REBALANCE_OPTS()
+#undef x
}
+ return bch2_bkey_ptrs_need_rebalance(c, opts, k);
}
int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts,
@@ -222,7 +207,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts,
struct bch_extent_rebalance *old =
(struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
- if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) {
+ if (bkey_should_have_rb_opts(c, opts, k.s_c)) {
if (!old) {
old = bkey_val_end(k);
k.k->u64s += sizeof(*old) / sizeof(u64);
@@ -243,22 +228,40 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
struct bkey_s_c k,
enum set_needs_rebalance_ctx ctx)
{
+ struct bch_fs *c = trans->c;
+
BUG_ON(iter->flags & BTREE_ITER_is_extents);
BUG_ON(iter->flags & BTREE_ITER_filter_snapshots);
- const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v
- ? bch2_bkey_rebalance_opts(k) : NULL;
- if (r) {
-#define x(_name) \
- if (r->_name##_from_inode) { \
- io_opts->_name = r->_name; \
- io_opts->_name##_from_inode = true; \
+ if (!bkey_extent_is_direct_data(k.k))
+ return 0;
+
+ bool may_update_indirect = ctx == SET_NEEDS_REBALANCE_opt_change_indirect;
+
+ /*
+ * If it's an indirect extent, and we walked to it directly, we won't
+ * have the options from the inode that were directly applied: options
+ * from the extent take precedence - unless the io_opts option came from
+ * the inode and may_update_indirect is true (walked from a
+ * REFLINK_P_MAY_UPDATE_OPTIONS pointer).
+ */
+ const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k);
+ if (old && k.k->type == KEY_TYPE_reflink_v) {
+#define x(_name) \
+ if (old->_name##_from_inode && \
+ !(may_update_indirect && io_opts->_name##_from_inode)) { \
+ io_opts->_name = old->_name; \
+ io_opts->_name##_from_inode = true; \
}
BCH_REBALANCE_OPTS()
#undef x
}
- if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k))
+ struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, io_opts);
+
+ if (bkey_should_have_rb_opts(c, io_opts, k)
+ ? old && !memcmp(old, &new, sizeof(new))
+ : !old)
return 0;
struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8);
@@ -270,10 +273,10 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
/* On successfull transaction commit, @k was invalidated: */
- return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n, ctx, 0) ?:
+ return bch2_bkey_set_needs_rebalance(c, io_opts, n, ctx, 0) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
- bch_err_throw(trans->c, transaction_restart_commit);
+ bch_err_throw(c, transaction_restart_commit);
}
static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans,
@@ -569,23 +572,25 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned move_ptrs = 0;
+ unsigned compress_ptrs = 0;
+ u64 sectors = 0;
- unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs);
- if (p) {
- prt_str(&buf, "compression=");
- bch2_compression_opt_to_text(&buf, opts->background_compression);
+ bch2_bkey_needs_rebalance(c, k, opts, &move_ptrs, &compress_ptrs, &sectors);
+
+ if (move_ptrs) {
+ prt_str(&buf, "move=");
+ bch2_target_to_text(&buf, c, opts->background_target);
prt_str(&buf, " ");
- bch2_prt_u64_base2(&buf, p);
+ bch2_prt_u64_base2(&buf, move_ptrs);
prt_newline(&buf);
}
- p = bch2_bkey_ptrs_need_move(c, opts, ptrs);
- if (p) {
- prt_str(&buf, "move=");
- bch2_target_to_text(&buf, c, opts->background_target);
+ if (compress_ptrs) {
+ prt_str(&buf, "compression=");
+ bch2_compression_opt_to_text(&buf, opts->background_compression);
prt_str(&buf, " ");
- bch2_prt_u64_base2(&buf, p);
+ bch2_prt_u64_base2(&buf, compress_ptrs);
prt_newline(&buf);
}
@@ -700,6 +705,8 @@ static int do_rebalance_scan(struct moving_context *ctxt,
BTREE_ITER_prefetch, k, ({
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+ atomic64_add(k.k->size, &r->scan_stats.sectors_seen);
+
struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans,
snapshot_io_opts, iter.pos, &iter, k,
SET_NEEDS_REBALANCE_opt_change);
@@ -709,10 +716,31 @@ static int do_rebalance_scan(struct moving_context *ctxt,
REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)
? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts)
: 0);
- })) ?:
- commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+ }));
+ if (ret)
+ goto out;
+
+ if (!inum) {
+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_reflink,
+ POS_MIN, POS_MAX,
+ BTREE_ITER_all_snapshots|
+ BTREE_ITER_prefetch, k, ({
+ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+
+ atomic64_add(k.k->size, &r->scan_stats.sectors_seen);
+
+ struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans,
+ snapshot_io_opts, iter.pos, &iter, k,
+ SET_NEEDS_REBALANCE_opt_change);
+ PTR_ERR_OR_ZERO(opts);
+ }));
+ if (ret)
+ goto out;
+ }
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+out:
*sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen);
/*
* Ensure that the rebalance_work entries we created are seen by the
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 531c2ef128ae..6942d3cfcba3 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -920,6 +920,13 @@ use_clean:
if (bch2_blacklist_entries_gc(c))
write_sb = true;
+ if (!(c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) &&
+ (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_extents)) &&
+ (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_indirect_extents))) {
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
+ write_sb = true;
+ }
+
if (write_sb)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@@ -982,8 +989,9 @@ int bch2_fs_initialize(struct bch_fs *c)
set_bit(BCH_FS_new_fs, &c->flags);
scoped_guard(mutex, &c->sb_lock) {
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_extents_above_btree_updates_done));
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_bformat_overflow_done));
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(BIT_ULL(BCH_COMPAT_no_stale_ptrs));
bch2_check_version_downgrade(c);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index de56a1ee79db..bfd06fd5d506 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -104,7 +104,10 @@
x(inode_has_case_insensitive, \
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \
- BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)
+ BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)\
+ x(btree_node_accounting, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_accounting_mismatch)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@@ -152,7 +155,11 @@
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch, \
BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
- BCH_FSCK_ERR_accounting_key_junk_at_end)
+ BCH_FSCK_ERR_accounting_key_junk_at_end) \
+ x(btree_node_accounting, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_accounting_mismatch, \
+ BCH_FSCK_ERR_accounting_key_nr_counters_wrong)
struct upgrade_downgrade_entry {
u64 recovery_passes;
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 728d878057af..77e3fc92e39b 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -170,9 +170,10 @@ enum bch_fsck_flags {
x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \
x(ptr_to_missing_stripe, 150, 0) \
x(ptr_to_incorrect_stripe, 151, 0) \
- x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
+ x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
x(ptr_too_stale, 153, 0) \
x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \
+ x(stale_ptr_with_no_stale_ptrs_feature, 327, FSCK_AUTOFIX) \
x(ptr_bucket_data_type_mismatch, 155, 0) \
x(ptr_cached_and_erasure_coded, 156, 0) \
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
@@ -338,7 +339,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
- x(MAX, 327, 0)
+ x(MAX, 328, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 473ad4b51180..03b12c2da097 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -238,6 +238,7 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
static int bch2_dev_attach_bdev(struct bch_fs *, struct bch_sb_handle *, struct printbuf *);
+static bool bch2_fs_will_resize_on_mount(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -964,6 +965,9 @@ static int bch2_fs_opt_version_init(struct bch_fs *c)
if (c->opts.journal_rewind)
c->opts.fsck = true;
+ bool may_upgrade_downgrade = !(c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) ||
+ bch2_fs_will_resize_on_mount(c);
+
CLASS(printbuf, p)();
bch2_log_msg_start(c, &p);
@@ -1040,22 +1044,24 @@ static int bch2_fs_opt_version_init(struct bch_fs *c)
prt_bitflags(&p, __bch2_btree_ids, btrees_lost_data);
}
- if (bch2_check_version_downgrade(c)) {
- prt_str(&p, "\nVersion downgrade required:");
-
- __le64 passes = ext->recovery_passes_required[0];
- bch2_sb_set_downgrade(c,
- BCH_VERSION_MINOR(bcachefs_metadata_version_current),
- BCH_VERSION_MINOR(c->sb.version));
- passes = ext->recovery_passes_required[0] & ~passes;
- if (passes) {
- prt_str(&p, "\nrunning recovery passes: ");
- prt_bitflags(&p, bch2_recovery_passes,
- bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ if (may_upgrade_downgrade) {
+ if (bch2_check_version_downgrade(c)) {
+ prt_str(&p, "\nVersion downgrade required:");
+
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_downgrade(c,
+ BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+ BCH_VERSION_MINOR(c->sb.version));
+ passes = ext->recovery_passes_required[0] & ~passes;
+ if (passes) {
+ prt_str(&p, "\nrunning recovery passes: ");
+ prt_bitflags(&p, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
}
- }
- check_version_upgrade(c);
+ check_version_upgrade(c);
+ }
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
@@ -1993,7 +1999,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
struct printbuf *err)
{
unsigned dev_idx = ca->dev_idx, data;
- bool fast_device_removal = !bch2_request_incompat_feature(c,
+ bool fast_device_removal = (c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) &&
+ !bch2_request_incompat_feature(c,
bcachefs_metadata_version_fast_device_removal);
int ret;
@@ -2421,15 +2428,29 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct p
return 0;
}
+static bool bch2_dev_will_resize_on_mount(struct bch_dev *ca)
+{
+ return ca->mi.resize_on_mount &&
+ ca->mi.nbuckets < div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
+ ca->mi.bucket_size);
+}
+
+static bool bch2_fs_will_resize_on_mount(struct bch_fs *c)
+{
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount)
+ if (bch2_dev_will_resize_on_mount(ca))
+ return true;
+ return false;
+}
+
int bch2_fs_resize_on_mount(struct bch_fs *c)
{
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
- u64 old_nbuckets = ca->mi.nbuckets;
- u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
- ca->mi.bucket_size);
+ if (bch2_dev_will_resize_on_mount(ca)) {
+ u64 old_nbuckets = ca->mi.nbuckets;
+ u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
+ ca->mi.bucket_size);
- if (ca->mi.resize_on_mount &&
- new_nbuckets > ca->mi.nbuckets) {
bch_info(ca, "resizing to size %llu", new_nbuckets * ca->mi.bucket_size);
int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets);
bch_err_fn(ca, ret);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 4c6e6c46d18a..ef6312c50f88 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -784,7 +784,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
u64 v;
ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?:
- bch2_opt_hook_pre_set(c, ca, 0, id, v);
+ bch2_opt_hook_pre_set(c, ca, 0, id, v, true);
kfree(tmp);
if (ret < 0)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 2a9462275f92..16d746f1d7e9 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -299,8 +299,10 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne
if (ret)
return ret;
+ skipnr += task == current;
+
do {
- nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
+ nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr);
} while (nr_entries == stack->size &&
!(ret = darray_make_room_gfp(stack, stack->size * 2, gfp)));
@@ -321,8 +323,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp)
{
+ skipnr += task == current;
+
CLASS(bch_stacktrace, stack)();
- int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp);
+ int ret = bch2_save_backtrace(&stack, task, skipnr, gfp);
bch2_prt_backtrace(out, &stack);
return ret;
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 784e75a21132..2b8d0502db1e 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -550,7 +550,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
if (ret < 0)
goto err;
- ret = bch2_opt_hook_pre_set(c, NULL, inode->ei_inode.bi_inum, opt_id, v);
+ ret = bch2_opt_hook_pre_set(c, NULL, inode->ei_inode.bi_inum, opt_id, v, true);
if (ret < 0)
goto err;
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 880fe85e35e9..f626044d6ca2 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -135,7 +135,7 @@ enum closure_state {
};
#define CLOSURE_GUARD_MASK \
- ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+ (((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)|(CLOSURE_BITS_START >> 1))
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
@@ -169,11 +169,18 @@ struct closure {
};
void closure_sub(struct closure *cl, int v);
-void closure_put(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void __closure_sync(struct closure *cl);
+/*
+ * closure_put - decrement a closure's refcount
+ */
+static inline void closure_put(struct closure *cl)
+{
+ closure_sub(cl, 1);
+}
+
static inline unsigned closure_nr_remaining(struct closure *cl)
{
return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK;
diff --git a/lib/closure.c b/lib/closure.c
index 4fb78d18ee1b..21fadd12093c 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,65 +13,70 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
-static inline void closure_put_after_sub_checks(struct closure *cl, int flags)
+static void closure_val_checks(struct closure *cl, unsigned new, int d)
{
- int r = flags & CLOSURE_REMAINING_MASK;
+ unsigned count = new & CLOSURE_REMAINING_MASK;
- if (WARN(flags & CLOSURE_GUARD_MASK,
- "closure %ps has guard bits set: %x (%u)",
+ if (WARN(new & CLOSURE_GUARD_MASK,
+ "closure %ps has guard bits set: %x (%u), delta %i",
cl->fn,
- flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
- r &= ~CLOSURE_GUARD_MASK;
+ new, (unsigned) __fls(new & CLOSURE_GUARD_MASK), d))
+ new &= ~CLOSURE_GUARD_MASK;
- WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
+ WARN(!count && (new & ~CLOSURE_DESTRUCTOR),
"closure %ps ref hit 0 with incorrect flags set: %x (%u)",
cl->fn,
- flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+ new, (unsigned) __fls(new));
}
-static inline void closure_put_after_sub(struct closure *cl, int flags)
-{
- closure_put_after_sub_checks(cl, flags);
+enum new_closure_state {
+ CLOSURE_normal_put,
+ CLOSURE_requeue,
+ CLOSURE_done,
+};
- if (!(flags & CLOSURE_REMAINING_MASK)) {
- smp_acquire__after_ctrl_dep();
+/* For clearing flags with the same atomic op as a put */
+void closure_sub(struct closure *cl, int v)
+{
+ enum new_closure_state s;
- cl->closure_get_happened = false;
+ int old = atomic_read(&cl->remaining), new;
+ do {
+ new = old - v;
- if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
- atomic_set(&cl->remaining,
- CLOSURE_REMAINING_INITIALIZER);
- closure_queue(cl);
+ if (new & CLOSURE_REMAINING_MASK) {
+ s = CLOSURE_normal_put;
} else {
- struct closure *parent = cl->parent;
- closure_fn *destructor = cl->fn;
+ if (cl->fn && !(new & CLOSURE_DESTRUCTOR)) {
+ s = CLOSURE_requeue;
+ new += CLOSURE_REMAINING_INITIALIZER;
+ } else
+ s = CLOSURE_done;
+ }
- closure_debug_destroy(cl);
+ closure_val_checks(cl, new, -v);
+ } while (!atomic_try_cmpxchg_release(&cl->remaining, &old, new));
- if (destructor)
- destructor(&cl->work);
+ if (s == CLOSURE_normal_put)
+ return;
- if (parent)
- closure_put(parent);
- }
- }
-}
+ if (s == CLOSURE_requeue) {
+ cl->closure_get_happened = false;
+ closure_queue(cl);
+ } else {
+ struct closure *parent = cl->parent;
+ closure_fn *destructor = cl->fn;
-/* For clearing flags with the same atomic op as a put */
-void closure_sub(struct closure *cl, int v)
-{
- closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
-}
-EXPORT_SYMBOL(closure_sub);
+ closure_debug_destroy(cl);
-/*
- * closure_put - decrement a closure's refcount
- */
-void closure_put(struct closure *cl)
-{
- closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
+ if (destructor)
+ destructor(&cl->work);
+
+ if (parent)
+ closure_put(parent);
+ }
}
-EXPORT_SYMBOL(closure_put);
+EXPORT_SYMBOL(closure_sub);
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -169,7 +174,7 @@ void __sched closure_return_sync(struct closure *cl)
unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
&cl->remaining);
- closure_put_after_sub_checks(cl, flags);
+ closure_val_checks(cl, flags, 1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR);
if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
while (1) {