summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/alloc_background.c3
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_io.c11
-rw-r--r--fs/bcachefs/buckets.c16
-rw-r--r--fs/bcachefs/data_update.c8
-rw-r--r--fs/bcachefs/disk_accounting.c185
-rw-r--r--fs/bcachefs/disk_accounting_format.h10
-rw-r--r--fs/bcachefs/error.c4
-rw-r--r--fs/bcachefs/inode.c30
-rw-r--r--fs/bcachefs/inode.h2
-rw-r--r--fs/bcachefs/io_misc.c12
-rw-r--r--fs/bcachefs/io_write.c50
-rw-r--r--fs/bcachefs/io_write.h2
-rw-r--r--fs/bcachefs/lru.c45
-rw-r--r--fs/bcachefs/lru.h5
-rw-r--r--fs/bcachefs/move.c162
-rw-r--r--fs/bcachefs/move.h30
-rw-r--r--fs/bcachefs/opts.c2
-rw-r--r--fs/bcachefs/opts.h2
-rw-r--r--fs/bcachefs/progress.c39
-rw-r--r--fs/bcachefs/progress.h12
-rw-r--r--fs/bcachefs/rebalance.c213
-rw-r--r--fs/bcachefs/rebalance.h50
-rw-r--r--fs/bcachefs/recovery.c14
-rw-r--r--fs/bcachefs/reflink.c16
-rw-r--r--fs/bcachefs/sb-downgrade.c11
-rw-r--r--fs/bcachefs/super.c9
28 files changed, 483 insertions, 464 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 22e689436316..cab4d6798dd7 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -2384,8 +2384,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
* We clear the LRU and need_discard btrees first so that we don't race
* with bch2_do_invalidates() and bch2_do_discards()
*/
- ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
- BTREE_TRIGGER_norun, NULL) ?:
+ ret = bch2_dev_remove_lrus(c, ca) ?:
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
BTREE_TRIGGER_norun, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 6f25e2687cd2..553031a3b06a 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -675,6 +675,7 @@ struct bch_dev {
x(error) \
x(topology_error) \
x(errors_fixed) \
+ x(errors_fixed_silent) \
x(errors_not_fixed) \
x(no_invalid_checks) \
x(discard_mount_opt_set) \
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 76a2ae7f8d2d..0839397105a9 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -706,8 +706,7 @@ struct bch_sb_field_ext {
x(fast_device_removal, BCH_VERSION(1, 27)) \
x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \
x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \
- x(31bit_dirent_offset, BCH_VERSION(1, 30)) \
- x(btree_node_accounting, BCH_VERSION(1, 31))
+ x(31bit_dirent_offset, BCH_VERSION(1, 30))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 34ec1a90980d..52d21259ed6f 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -27,10 +27,15 @@
#include <linux/moduleparam.h>
#include <linux/sched/mm.h>
+static __maybe_unused unsigned bch2_btree_read_corrupt_ratio;
+static __maybe_unused int bch2_btree_read_corrupt_device;
+
#ifdef CONFIG_BCACHEFS_DEBUG
-static unsigned bch2_btree_read_corrupt_ratio;
module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644);
MODULE_PARM_DESC(btree_read_corrupt_ratio, "");
+
+module_param_named(btree_read_corrupt_device, bch2_btree_read_corrupt_device, int, 0644);
+MODULE_PARM_DESC(btree_read_corrupt_ratio, "");
#endif
static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn)
@@ -1438,7 +1443,9 @@ start:
memset(&bio->bi_iter, 0, sizeof(bio->bi_iter));
bio->bi_iter.bi_size = btree_buf_bytes(b);
- bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio);
+ if (bch2_btree_read_corrupt_device == rb->pick.ptr.dev ||
+ bch2_btree_read_corrupt_device < 0)
+ bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio);
ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
if (ret != -BCH_ERR_btree_node_read_err_want_retry &&
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 99e928f77999..021f5cb7998d 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -749,7 +749,6 @@ static int __trigger_extent(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
bool gc = flags & BTREE_TRIGGER_gc;
- bool insert = !(flags & BTREE_TRIGGER_overwrite);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -803,7 +802,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (cur_compression_type &&
cur_compression_type != p.crc.compression_type) {
- if (!insert)
+ if (flags & BTREE_TRIGGER_overwrite)
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
@@ -836,7 +835,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (cur_compression_type) {
- if (!insert)
+ if (flags & BTREE_TRIGGER_overwrite)
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
@@ -846,17 +845,12 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (level) {
- const bool leaf_node = level == 1;
- s64 v[3] = {
- replicas_sectors,
- insert ? 1 : -1,
- !leaf_node ? (insert ? 1 : -1) : 0,
- };
-
- ret = bch2_disk_accounting_mod2(trans, gc, v, btree, btree_id);
+ ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, btree, btree_id);
if (ret)
return ret;
} else {
+ bool insert = !(flags & BTREE_TRIGGER_overwrite);
+
s64 v[3] = {
insert ? 1 : -1,
insert ? k.k->size : -((s64) k.k->size),
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 5d2f536986c8..7a0da6cdf78c 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -11,6 +11,7 @@
#include "ec.h"
#include "error.h"
#include "extents.h"
+#include "inode.h"
#include "io_write.h"
#include "keylist.h"
#include "move.h"
@@ -428,13 +429,18 @@ restart_drop_extra_replicas:
goto out;
}
+ struct bch_inode_opts opts;
+
ret = bch2_trans_log_str(trans, bch2_data_update_type_strs[m->type]) ?:
bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, insert->k.p) ?:
- bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?:
+ bch2_inum_snapshot_opts_get(trans, k.k->p.inode, k.k->p.snapshot, &opts) ?:
+ bch2_bkey_set_needs_rebalance(c, &opts, insert,
+ SET_NEEDS_REBALANCE_foreground,
+ m->op.opts.change_cookie) ?:
bch2_trans_update(trans, &iter, insert,
BTREE_UPDATE_internal_snapshot_node);
if (ret)
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 831b4c10b856..a99f821c6a1c 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -883,113 +883,118 @@ int bch2_accounting_read(struct bch_fs *c)
*dst++ = *i;
keys->gap = keys->nr = dst - keys->data;
- guard(percpu_write)(&c->mark_lock);
-
- darray_for_each_reverse(acc->k, i) {
- struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, i->pos);
-
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- memset(v, 0, sizeof(v));
-
- for (unsigned j = 0; j < i->nr_counters; j++)
- v[j] = percpu_u64_get(i->v[0] + j);
+ CLASS(printbuf, underflow_err)();
- /*
- * If the entry counters are zeroed, it should be treated as
- * nonexistent - it might point to an invalid device.
- *
- * Remove it, so that if it's re-added it gets re-marked in the
- * superblock:
- */
- ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
- ? -BCH_ERR_remove_disk_accounting_entry
- : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
-
- if (ret == -BCH_ERR_remove_disk_accounting_entry) {
- free_percpu(i->v[0]);
- free_percpu(i->v[1]);
- darray_remove_item(&acc->k, i);
- ret = 0;
- continue;
- }
+ scoped_guard(percpu_write, &c->mark_lock) {
+ darray_for_each_reverse(acc->k, i) {
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, i->pos);
- if (ret)
- return ret;
- }
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ memset(v, 0, sizeof(v));
- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
- accounting_pos_cmp, NULL);
+ for (unsigned j = 0; j < i->nr_counters; j++)
+ v[j] = percpu_u64_get(i->v[0] + j);
- for (unsigned i = 0; i < acc->k.nr; i++) {
- struct disk_accounting_pos k;
- bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
+ /*
+ * If the entry counters are zeroed, it should be treated as
+ * nonexistent - it might point to an invalid device.
+ *
+ * Remove it, so that if it's re-added it gets re-marked in the
+ * superblock:
+ */
+ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
+ ? -BCH_ERR_remove_disk_accounting_entry
+ : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
+
+ if (ret == -BCH_ERR_remove_disk_accounting_entry) {
+ free_percpu(i->v[0]);
+ free_percpu(i->v[1]);
+ darray_remove_item(&acc->k, i);
+ ret = 0;
+ continue;
+ }
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
+ if (ret)
+ return ret;
+ }
- /*
- * Check for underflow, schedule check_allocations
- * necessary:
- *
- * XXX - see if we can factor this out to run on a bkey
- * so we can check everything lazily, right now we don't
- * check the non in-mem counters at all
- */
- bool underflow = false;
- for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
- underflow |= (s64) v[j] < 0;
+ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
+ accounting_pos_cmp, NULL);
- if (underflow) {
- CLASS(printbuf, buf)();
- bch2_log_msg_start(c, &buf);
+ for (unsigned i = 0; i < acc->k.nr; i++) {
+ struct disk_accounting_pos k;
+ bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
- prt_printf(&buf, "Accounting underflow for\n");
- bch2_accounting_key_to_text(&buf, &k);
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
+ /*
+ * Check for underflow, schedule check_allocations
+ * necessary:
+ *
+ * XXX - see if we can factor this out to run on a bkey
+ * so we can check everything lazily, right now we don't
+ * check the non in-mem counters at all
+ */
+ bool underflow = false;
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
- prt_printf(&buf, " %lli", v[j]);
-
- bool print = bch2_count_fsck_err(c, accounting_key_underflow, &buf);
- unsigned pos = buf.pos;
- ret = bch2_run_explicit_recovery_pass(c, &buf,
- BCH_RECOVERY_PASS_check_allocations, 0);
- print |= buf.pos != pos;
+ underflow |= (s64) v[j] < 0;
- if (print)
- bch2_print_str(c, KERN_ERR, buf.buf);
- if (ret)
- return ret;
- }
+ if (underflow) {
+ if (!underflow_err.pos) {
+ bch2_log_msg_start(c, &underflow_err);
+ prt_printf(&underflow_err, "Accounting underflow for\n");
+ }
+ bch2_accounting_key_to_text(&underflow_err, &k);
- guard(preempt)();
- struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+ for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
+ prt_printf(&underflow_err, " %lli", v[j]);
+ prt_newline(&underflow_err);
+ }
- switch (k.type) {
- case BCH_DISK_ACCOUNTING_persistent_reserved:
- usage->reserved += v[0] * k.persistent_reserved.nr_replicas;
- break;
- case BCH_DISK_ACCOUNTING_replicas:
- fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]);
- break;
- case BCH_DISK_ACCOUNTING_dev_data_type: {
- guard(rcu)();
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
- if (ca) {
- struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
- percpu_u64_set(&d->buckets, v[0]);
- percpu_u64_set(&d->sectors, v[1]);
- percpu_u64_set(&d->fragmented, v[2]);
-
- if (k.dev_data_type.data_type == BCH_DATA_sb ||
- k.dev_data_type.data_type == BCH_DATA_journal)
- usage->hidden += v[0] * ca->mi.bucket_size;
+ guard(preempt)();
+ struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+
+ switch (k.type) {
+ case BCH_DISK_ACCOUNTING_persistent_reserved:
+ usage->reserved += v[0] * k.persistent_reserved.nr_replicas;
+ break;
+ case BCH_DISK_ACCOUNTING_replicas:
+ fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]);
+ break;
+ case BCH_DISK_ACCOUNTING_dev_data_type: {
+ guard(rcu)();
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
+ if (ca) {
+ struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
+ percpu_u64_set(&d->buckets, v[0]);
+ percpu_u64_set(&d->sectors, v[1]);
+ percpu_u64_set(&d->fragmented, v[2]);
+
+ if (k.dev_data_type.data_type == BCH_DATA_sb ||
+ k.dev_data_type.data_type == BCH_DATA_journal)
+ usage->hidden += v[0] * ca->mi.bucket_size;
+ }
+ break;
+ }
}
- break;
- }
}
}
+ if (underflow_err.pos) {
+ bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err);
+ unsigned pos = underflow_err.pos;
+ ret = bch2_run_explicit_recovery_pass(c, &underflow_err,
+ BCH_RECOVERY_PASS_check_allocations, 0);
+ print |= underflow_err.pos != pos;
+
+ if (print)
+ bch2_print_str(c, KERN_ERR, underflow_err.buf);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index 730a17ea4243..8269af1dbe2a 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -108,7 +108,7 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
x(dev_data_type, 3, 3) \
x(compression, 4, 3) \
x(snapshot, 5, 1) \
- x(btree, 6, 3) \
+ x(btree, 6, 1) \
x(rebalance_work, 7, 1) \
x(inum, 8, 3)
@@ -174,14 +174,6 @@ struct bch_acct_snapshot {
__u32 id;
} __packed;
-/*
- * Metadata accounting per btree id:
- * [
- * total btree disk usage in sectors
- * total number of btree nodes
- * number of non-leaf btree nodes
- * ]
- */
struct bch_acct_btree {
__u32 id;
} __packed;
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 9e69263eb796..a16f55d98d97 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -468,10 +468,10 @@ int __bch2_fsck_err(struct bch_fs *c,
if ((flags & FSCK_ERR_SILENT) ||
test_bit(err, c->sb.errors_silent)) {
- ret = flags & FSCK_CAN_FIX
+ set_bit(BCH_FS_errors_fixed_silent, &c->flags);
+ return flags & FSCK_CAN_FIX
? bch_err_throw(c, fsck_fix)
: bch_err_throw(c, fsck_ignore);
- goto err;
}
printbuf_indent_add_nextline(out, 2);
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 193c8ec0bdcd..655ed90b2a39 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -369,9 +369,9 @@ err:
}
int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans,
- u64 inode_nr, u32 snapshot,
- struct bch_inode_unpacked *inode,
- unsigned flags)
+ u64 inode_nr, u32 snapshot,
+ struct bch_inode_unpacked *inode,
+ unsigned flags)
{
CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
@@ -1238,20 +1238,30 @@ void bch2_inode_opts_get_inode(struct bch_fs *c,
BCH_INODE_OPTS()
#undef x
- ret->opt_change_cookie = atomic_read(&c->opt_change_cookie);
+ ret->change_cookie = atomic_read(&c->opt_change_cookie);
bch2_io_opts_fixups(ret);
}
-int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_inode_opts *opts)
+int bch2_inum_snapshot_opts_get(struct btree_trans *trans,
+ u64 inum, u32 snapshot,
+ struct bch_inode_opts *opts)
{
- struct bch_inode_unpacked inode;
- int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
+ if (inum) {
+ struct bch_inode_unpacked inode;
+ int ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
+ if (ret)
+ return ret;
- if (ret)
- return ret;
+ bch2_inode_opts_get_inode(trans->c, &inode, opts);
+ } else {
+ /*
+ * data_update_index_update may call us for reflink btree extent
+ * updates, inum will be 0
+ */
- bch2_inode_opts_get_inode(trans->c, &inode, opts);
+ bch2_inode_opts_get(trans->c, opts);
+ }
return 0;
}
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index 12e0a104c196..63b7088811fb 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -290,7 +290,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
void bch2_inode_opts_get_inode(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_opts *);
-int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_inode_opts *);
+int bch2_inum_snapshot_opts_get(struct btree_trans *, u64, u32, struct bch_inode_opts *);
int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *, unsigned);
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 5e03574059e0..04eb5ecd102b 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -109,7 +109,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
}
ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
- 0, i_sectors_delta, true);
+ 0, i_sectors_delta, true, 0);
err:
if (!ret && sectors_allocated)
bch2_increment_clock(c, sectors_allocated, WRITE);
@@ -211,7 +211,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
bch2_cut_back(end_pos, &delete);
ret = bch2_extent_update(trans, inum, iter, &delete,
- &disk_res, 0, i_sectors_delta, false);
+ &disk_res, 0, i_sectors_delta, false, 0);
bch2_disk_reservation_put(c, &disk_res);
}
@@ -373,7 +373,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
struct btree_iter iter;
struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
- struct bch_inode_opts opts;
u64 dst_offset = le64_to_cpu(op->v.dst_offset);
u64 src_offset = le64_to_cpu(op->v.src_offset);
s64 shift = dst_offset - src_offset;
@@ -384,10 +383,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
bool warn_errors = i_sectors_delta != NULL;
int ret = 0;
- ret = bch2_inum_opts_get(trans, inum, &opts);
- if (ret)
- return ret;
-
/*
* check for missing subvolume before fpunch, as in resume we don't want
* it to be a fatal error
@@ -476,8 +471,7 @@ case LOGGED_OP_FINSERT_shift_extents:
op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
- ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?:
- bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
bch2_logged_op_update(trans, &op->k_i) ?:
bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index aed22fc7759b..6a5da02ce266 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -205,7 +205,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
struct btree_iter *extent_iter,
u64 new_i_size,
- s64 i_sectors_delta)
+ s64 i_sectors_delta,
+ struct bch_inode_unpacked *inode_u)
{
/*
* Crazy performance optimization:
@@ -227,7 +228,13 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
BTREE_ITER_intent|
BTREE_ITER_cached);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
- int ret = bkey_err(k);
+
+ /*
+ * XXX: we currently need to unpack the inode on every write because we
+ * need the current io_opts, for transactional consistency - inode_v4?
+ */
+ int ret = bkey_err(k) ?:
+ bch2_inode_unpack(k, inode_u);
if (unlikely(ret))
return ret;
@@ -303,8 +310,10 @@ int bch2_extent_update(struct btree_trans *trans,
struct disk_reservation *disk_res,
u64 new_i_size,
s64 *i_sectors_delta_total,
- bool check_enospc)
+ bool check_enospc,
+ u32 change_cookie)
{
+ struct bch_fs *c = trans->c;
struct bpos next_pos;
bool usage_increasing;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
@@ -335,7 +344,7 @@ int bch2_extent_update(struct btree_trans *trans,
if (disk_res &&
disk_sectors_delta > (s64) disk_res->sectors) {
- ret = bch2_disk_reservation_add(trans->c, disk_res,
+ ret = bch2_disk_reservation_add(c, disk_res,
disk_sectors_delta - disk_res->sectors,
!check_enospc || !usage_increasing
? BCH_DISK_RESERVATION_NOFAIL : 0);
@@ -349,9 +358,16 @@ int bch2_extent_update(struct btree_trans *trans,
* aren't changing - for fsync to work properly; fsync relies on
* inode->bi_journal_seq which is updated by the trigger code:
*/
+ struct bch_inode_unpacked inode;
+ struct bch_inode_opts opts;
+
ret = bch2_extent_update_i_size_sectors(trans, iter,
min(k->k.p.offset << 9, new_i_size),
- i_sectors_delta) ?:
+ i_sectors_delta, &inode) ?:
+ (bch2_inode_opts_get_inode(c, &inode, &opts),
+ bch2_bkey_set_needs_rebalance(c, &opts, k,
+ SET_NEEDS_REBALANCE_foreground,
+ change_cookie)) ?:
bch2_trans_update(trans, iter, k, 0) ?:
bch2_trans_commit(trans, disk_res, NULL,
BCH_TRANS_COMMIT_no_check_rw|
@@ -402,7 +418,8 @@ static int bch2_write_index_default(struct bch_write_op *op)
ret = bch2_extent_update(trans, inum, &iter, sk.k,
&op->res,
op->new_i_size, &op->i_sectors_delta,
- op->flags & BCH_WRITE_check_enospc);
+ op->flags & BCH_WRITE_check_enospc,
+ op->opts.change_cookie);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
@@ -792,10 +809,6 @@ static void init_append_extent(struct bch_write_op *op,
bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
op->flags & BCH_WRITE_cached);
-
- if (!(op->flags & BCH_WRITE_move))
- bch2_bkey_set_needs_rebalance(op->c, &op->opts, &e->k_i);
-
bch2_keylist_push(&op->insert_keys);
}
@@ -1225,6 +1238,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
return 0;
}
+ struct bch_fs *c = trans->c;
struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans,
bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance));
int ret = PTR_ERR_OR_ZERO(new);
@@ -1239,8 +1253,6 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
bkey_for_each_ptr(ptrs, ptr)
ptr->unwritten = 0;
- bch2_bkey_set_needs_rebalance(op->c, &op->opts, new);
-
/*
* Note that we're not calling bch2_subvol_get_snapshot() in this path -
* that was done when we kicked off the write, and here it's important
@@ -1248,8 +1260,20 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
* since been created. The write is still outstanding, so we're ok
* w.r.t. snapshot atomicity:
*/
+
+ /*
+ * For transactional consistency, set_needs_rebalance() has to be called
+ * with the io_opts from the btree in the same transaction:
+ */
+ struct bch_inode_unpacked inode;
+ struct bch_inode_opts opts;
+
return bch2_extent_update_i_size_sectors(trans, iter,
- min(new->k.p.offset << 9, new_i_size), 0) ?:
+ min(new->k.p.offset << 9, new_i_size), 0, &inode) ?:
+ (bch2_inode_opts_get_inode(c, &inode, &opts),
+ bch2_bkey_set_needs_rebalance(c, &opts, new,
+ SET_NEEDS_REBALANCE_foreground,
+ op->opts.change_cookie)) ?:
bch2_trans_update(trans, iter, new,
BTREE_UPDATE_internal_snapshot_node);
}
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index 6c05ba6e15d6..692529bf401d 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -28,7 +28,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, subvol_inum,
struct btree_iter *, struct bkey_i *,
- struct disk_reservation *, u64, s64 *, bool);
+ struct disk_reservation *, u64, s64 *, bool, u32);
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
struct bch_inode_opts opts)
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index b9c0834498dd..c533b60706bf 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -51,25 +51,17 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
: 0;
}
-int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
+static int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
{
- return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
-}
-
-int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
-{
- return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, true);
}
int __bch2_lru_change(struct btree_trans *trans,
u16 lru_id, u64 dev_bucket,
u64 old_time, u64 new_time)
{
- if (old_time == new_time)
- return 0;
-
- return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
- bch2_lru_set(trans, lru_id, dev_bucket, new_time);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?:
+ __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true);
}
static const char * const bch2_lru_types[] = {
@@ -87,7 +79,6 @@ int bch2_lru_check_set(struct btree_trans *trans,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- CLASS(printbuf, buf)();
CLASS(btree_iter, lru_iter)(trans, BTREE_ID_lru, lru_pos(lru_id, dev_bucket, time), 0);
struct bkey_s_c lru_k = bch2_btree_iter_peek_slot(&lru_iter);
int ret = bkey_err(lru_k);
@@ -99,10 +90,13 @@ int bch2_lru_check_set(struct btree_trans *trans,
if (ret)
return ret;
- if (fsck_err(trans, alloc_key_to_missing_lru_entry,
- "missing %s lru entry\n%s",
- bch2_lru_types[lru_type(lru_k)],
- (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "missing %s lru entry at pos ", bch2_lru_types[lru_type(lru_k)]);
+ bch2_bpos_to_text(&buf, lru_iter.pos);
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, c, referring_k);
+
+ if (fsck_err(trans, alloc_key_to_missing_lru_entry, "%s", buf.buf)) {
ret = bch2_lru_set(trans, lru_id, dev_bucket, time);
if (ret)
return ret;
@@ -127,6 +121,23 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k)
}
}
+int bch2_dev_remove_lrus(struct bch_fs *c, struct bch_dev *ca)
+{
+ CLASS(btree_trans, trans)(c);
+ int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ for_each_btree_key(trans, iter,
+ BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, ({
+ struct bbpos bp = lru_pos_to_bp(k);
+
+ bp.btree == BTREE_ID_alloc && bp.pos.inode == ca->dev_idx
+ ? (bch2_btree_delete_at(trans, &iter, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0))
+ : 0;
+ }));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
static u64 bkey_lru_type_idx(struct bch_fs *c,
enum bch_lru_type type,
struct bkey_s_c k)
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index 6f1e0a7b5db5..d5a2620f2507 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -59,8 +59,6 @@ void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
.min_val_size = 8, \
})
-int bch2_lru_del(struct btree_trans *, u16, u64, u64);
-int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int __bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
static inline int bch2_lru_change(struct btree_trans *trans,
@@ -72,9 +70,10 @@ static inline int bch2_lru_change(struct btree_trans *trans,
: 0;
}
+int bch2_dev_remove_lrus(struct bch_fs *, struct bch_dev *);
+
struct bkey_buf;
int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c, struct bkey_buf *);
-
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 56e9ba4ed6a8..9a440d3f7180 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -451,93 +451,6 @@ err:
return ret;
}
-struct bch_inode_opts *bch2_move_get_io_opts(struct btree_trans *trans,
- struct per_snapshot_io_opts *io_opts,
- struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
- struct btree_iter *extent_iter,
- struct bkey_s_c extent_k)
-{
- struct bch_fs *c = trans->c;
- u32 restart_count = trans->restart_count;
- struct bch_inode_opts *opts_ret = &io_opts->fs_io_opts;
- int ret = 0;
-
- if (btree_iter_path(trans, extent_iter)->level)
- return opts_ret;
-
- if (extent_k.k->type == KEY_TYPE_reflink_v)
- goto out;
-
- if (io_opts->cur_inum != extent_pos.inode) {
- io_opts->d.nr = 0;
-
- ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
- BTREE_ITER_all_snapshots, k, ({
- if (k.k->p.offset != extent_pos.inode)
- break;
-
- if (!bkey_is_inode(k.k))
- continue;
-
- struct bch_inode_unpacked inode;
- _ret3 = bch2_inode_unpack(k, &inode);
- if (_ret3)
- break;
-
- struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
- bch2_inode_opts_get_inode(trans->c, &inode, &e.io_opts);
-
- darray_push(&io_opts->d, e);
- }));
- io_opts->cur_inum = extent_pos.inode;
- }
-
- ret = ret ?: trans_was_restarted(trans, restart_count);
- if (ret)
- return ERR_PTR(ret);
-
- if (extent_k.k->p.snapshot)
- darray_for_each(io_opts->d, i)
- if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) {
- opts_ret = &i->io_opts;
- break;
- }
-out:
- ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k);
- if (ret)
- return ERR_PTR(ret);
- return opts_ret;
-}
-
-int bch2_move_get_io_opts_one(struct btree_trans *trans,
- struct bch_inode_opts *io_opts,
- struct btree_iter *extent_iter,
- struct bkey_s_c extent_k)
-{
- struct bch_fs *c = trans->c;
-
- bch2_inode_opts_get(c, io_opts);
-
- /* reflink btree? */
- if (extent_k.k->p.inode) {
- CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes,
- SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
- BTREE_ITER_cached);
- struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter);
- int ret = bkey_err(inode_k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- return ret;
-
- if (!ret && bkey_is_inode(inode_k.k)) {
- struct bch_inode_unpacked inode;
- bch2_inode_unpack(inode_k, &inode);
- bch2_inode_opts_get_inode(c, &inode, io_opts);
- }
- }
-
- return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k);
-}
-
int bch2_move_ratelimit(struct moving_context *ctxt)
{
struct bch_fs *c = ctxt->trans->c;
@@ -582,37 +495,6 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
return 0;
}
-/*
- * Move requires non extents iterators, and there's also no need for it to
- * signal indirect_extent_missing_error:
- */
-static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c_reflink_p p)
-{
- if (unlikely(REFLINK_P_ERROR(p.v)))
- return bkey_s_c_null;
-
- struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v));
-
- bch2_trans_iter_init(trans, iter,
- BTREE_ID_reflink, reflink_pos,
- BTREE_ITER_not_extents);
-
- struct bkey_s_c k = bch2_btree_iter_peek(iter);
- if (!k.k || bkey_err(k)) {
- bch2_trans_iter_exit(iter);
- return k;
- }
-
- if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) {
- bch2_trans_iter_exit(iter);
- return bkey_s_c_null;
- }
-
- return k;
-}
-
int bch2_move_data_btree(struct moving_context *ctxt,
struct bpos start,
struct bpos end,
@@ -627,12 +509,6 @@ int bch2_move_data_btree(struct moving_context *ctxt,
struct btree_iter iter, reflink_iter = {};
struct bkey_s_c k;
struct data_update_opts data_opts;
- /*
- * If we're moving a single file, also process reflinked data it points
- * to (this includes propagating changed io_opts from the inode to the
- * extent):
- */
- bool walk_indirect = start.inode == end.inode;
int ret = 0, ret2;
per_snapshot_io_opts_init(&snapshot_io_opts, c);
@@ -697,8 +573,6 @@ root_err:
bch2_ratelimit_reset(ctxt->rate);
while (!bch2_move_ratelimit(ctxt)) {
- struct btree_iter *extent_iter = &iter;
-
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
@@ -717,41 +591,18 @@ root_err:
if (ctxt->stats)
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
- if (walk_indirect &&
- k.k->type == KEY_TYPE_reflink_p &&
- REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
- struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-
- bch2_trans_iter_exit(&reflink_iter);
- k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p);
- ret = bkey_err(k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- break;
-
- if (!k.k)
- goto next_nondata;
-
- /*
- * XXX: reflink pointers may point to multiple indirect
- * extents, so don't advance past the entire reflink
- * pointer - need to fixup iter->k
- */
- extent_iter = &reflink_iter;
- }
-
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
- iter.pos, extent_iter, k);
+ io_opts = bch2_extent_get_apply_io_opts(trans, &snapshot_io_opts,
+ iter.pos, &iter, k,
+ SET_NEEDS_REBALANCE_other);
ret = PTR_ERR_OR_ZERO(io_opts);
if (ret)
continue;
memset(&data_opts, 0, sizeof(data_opts));
- if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts))
+ if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts))
goto next;
/*
@@ -762,7 +613,7 @@ root_err:
k = bkey_i_to_s_c(sk.k);
if (!level)
- ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
+ ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
else if (!data_opts.scrub)
ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level,
k.k->p, data_opts.target, 0);
@@ -944,7 +795,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
goto next;
if (!bp.v->level) {
- ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k);
+ ret = bch2_extent_get_apply_io_opts_one(trans, &io_opts, &iter, k,
+ SET_NEEDS_REBALANCE_other);
if (ret) {
bch2_trans_iter_exit(&iter);
continue;
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 18021d2c51d0..754b0ad45950 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -87,32 +87,6 @@ void bch2_moving_ctxt_flush_all(struct moving_context *);
void bch2_move_ctxt_wait_for_io(struct moving_context *);
int bch2_move_ratelimit(struct moving_context *);
-/* Inodes in different snapshots may have different IO options: */
-struct snapshot_io_opts_entry {
- u32 snapshot;
- struct bch_inode_opts io_opts;
-};
-
-struct per_snapshot_io_opts {
- u64 cur_inum;
- struct bch_inode_opts fs_io_opts;
- DARRAY(struct snapshot_io_opts_entry) d;
-};
-
-static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
-{
- memset(io_opts, 0, sizeof(*io_opts));
- bch2_inode_opts_get(c, &io_opts->fs_io_opts);
-}
-
-static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
-{
- darray_exit(&io_opts->d);
-}
-
-int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_inode_opts *,
- struct btree_iter *, struct bkey_s_c);
-
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
int bch2_move_extent(struct moving_context *,
@@ -122,10 +96,6 @@ int bch2_move_extent(struct moving_context *,
struct bch_inode_opts,
struct data_update_opts);
-struct bch_inode_opts *bch2_move_get_io_opts(struct btree_trans *,
- struct per_snapshot_io_opts *, struct bpos,
- struct btree_iter *, struct bkey_s_c);
-
int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos,
move_pred_fn, void *, enum btree_id, unsigned);
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index c4faa66b55ce..122bc98e4cbb 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -803,7 +803,7 @@ void bch2_inode_opts_get(struct bch_fs *c, struct bch_inode_opts *ret)
BCH_INODE_OPTS()
#undef x
- ret->opt_change_cookie = atomic_read(&c->opt_change_cookie);
+ ret->change_cookie = atomic_read(&c->opt_change_cookie);
bch2_io_opts_fixups(ret);
}
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index a5779f8943cf..22cf109fb9c9 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -678,7 +678,7 @@ struct bch_inode_opts {
BCH_INODE_OPTS()
#undef x
- u32 opt_change_cookie;
+ u32 change_cookie;
};
static inline void bch2_io_opts_fixups(struct bch_inode_opts *opts)
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
index 7cc16490ffa9..541ee951d1c9 100644
--- a/fs/bcachefs/progress.c
+++ b/fs/bcachefs/progress.c
@@ -4,21 +4,14 @@
#include "disk_accounting.h"
#include "progress.h"
-void bch2_progress_init_inner(struct progress_indicator_state *s,
- struct bch_fs *c,
- u64 leaf_btree_id_mask,
- u64 inner_btree_id_mask)
+void bch2_progress_init(struct progress_indicator_state *s,
+ struct bch_fs *c,
+ u64 btree_id_mask)
{
memset(s, 0, sizeof(*s));
s->next_print = jiffies + HZ * 10;
- /* This is only an estimation: nodes can have different replica counts */
- const u32 expected_node_disk_sectors =
- READ_ONCE(c->opts.metadata_replicas) * btree_sectors(c);
-
- const u64 btree_id_mask = leaf_btree_id_mask | inner_btree_id_mask;
-
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
if (!(btree_id_mask & BIT_ULL(i)))
continue;
@@ -26,29 +19,9 @@ void bch2_progress_init_inner(struct progress_indicator_state *s,
struct disk_accounting_pos acc;
disk_accounting_key_init(acc, btree, .id = i);
- struct {
- u64 disk_sectors;
- u64 total_nodes;
- u64 inner_nodes;
- } v = {0};
- bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc),
- (u64 *)&v, sizeof(v) / sizeof(u64));
-
- /* Better to estimate as 0 than the total node count */
- if (inner_btree_id_mask & BIT_ULL(i))
- s->nodes_total += v.inner_nodes;
-
- if (!(leaf_btree_id_mask & BIT_ULL(i)))
- continue;
-
- /*
- * We check for zeros to degrade gracefully when run
- * with un-upgraded accounting info (missing some counters).
- */
- if (v.total_nodes != 0)
- s->nodes_total += v.total_nodes - v.inner_nodes;
- else
- s->nodes_total += div_u64(v.disk_sectors, expected_node_disk_sectors);
+ u64 v;
+ bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
+ s->nodes_total += div64_ul(v, btree_sectors(c));
}
}
diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h
index 91f345337709..972a73087ffe 100644
--- a/fs/bcachefs/progress.h
+++ b/fs/bcachefs/progress.h
@@ -20,17 +20,7 @@ struct progress_indicator_state {
struct btree *last_node;
};
-void bch2_progress_init_inner(struct progress_indicator_state *s,
- struct bch_fs *c,
- u64 leaf_btree_id_mask,
- u64 inner_btree_id_mask);
-
-static inline void bch2_progress_init(struct progress_indicator_state *s,
- struct bch_fs *c, u64 btree_id_mask)
-{
- bch2_progress_init_inner(s, c, btree_id_mask, 0);
-}
-
+void bch2_progress_init(struct progress_indicator_state *, struct bch_fs *, u64);
void bch2_progress_update_iter(struct btree_trans *,
struct progress_indicator_state *,
struct btree_iter *,
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 901cff84aab5..fa73de7890da 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -211,7 +211,9 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_inode_
}
int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts,
- struct bkey_i *_k)
+ struct bkey_i *_k,
+ enum set_needs_rebalance_ctx ctx,
+ u32 change_cookie)
{
if (!bkey_extent_is_direct_data(&_k->k))
return 0;
@@ -235,10 +237,11 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts,
return 0;
}
-int bch2_get_update_rebalance_opts(struct btree_trans *trans,
- struct bch_inode_opts *io_opts,
- struct btree_iter *iter,
- struct bkey_s_c k)
+static int bch2_get_update_rebalance_opts(struct btree_trans *trans,
+ struct bch_inode_opts *io_opts,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ enum set_needs_rebalance_ctx ctx)
{
BUG_ON(iter->flags & BTREE_ITER_is_extents);
BUG_ON(iter->flags & BTREE_ITER_filter_snapshots);
@@ -267,10 +270,121 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans,
/* On successfull transaction commit, @k was invalidated: */
- return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?:
+ return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n, ctx, 0) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
- bch_err_throw(trans->c, transaction_restart_nested);
+ bch_err_throw(trans->c, transaction_restart_commit);
+}
+
+static struct bch_inode_opts *bch2_extent_get_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ if (btree_iter_path(trans, extent_iter)->level)
+ return &io_opts->fs_io_opts;
+
+ if (extent_k.k->type == KEY_TYPE_reflink_v)
+ return &io_opts->fs_io_opts;
+
+ if (io_opts->cur_inum != extent_pos.inode) {
+ io_opts->d.nr = 0;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
+ BTREE_ITER_all_snapshots, k, ({
+ if (k.k->p.offset != extent_pos.inode)
+ break;
+
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ struct bch_inode_unpacked inode;
+ _ret3 = bch2_inode_unpack(k, &inode);
+ if (_ret3)
+ break;
+
+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+ bch2_inode_opts_get_inode(c, &inode, &e.io_opts);
+
+ darray_push(&io_opts->d, e);
+ }));
+ io_opts->cur_inum = extent_pos.inode;
+ }
+
+ ret = ret ?: trans_was_restarted(trans, restart_count);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (extent_k.k->p.snapshot)
+ darray_for_each(io_opts->d, i)
+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+ return &i->io_opts;
+
+ return &io_opts->fs_io_opts;
+}
+
+struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *snapshot_io_opts,
+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ enum set_needs_rebalance_ctx ctx)
+{
+ struct bch_inode_opts *opts =
+ bch2_extent_get_io_opts(trans, snapshot_io_opts, extent_pos, extent_iter, extent_k);
+ if (IS_ERR(opts) || btree_iter_path(trans, extent_iter)->level)
+ return opts;
+
+ int ret = bch2_get_update_rebalance_opts(trans, opts, extent_iter, extent_k, ctx);
+ return ret ? ERR_PTR(ret) : opts;
+}
+
+int bch2_extent_get_io_opts_one(struct btree_trans *trans,
+ struct bch_inode_opts *io_opts,
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ enum set_needs_rebalance_ctx ctx)
+{
+ struct bch_fs *c = trans->c;
+
+ bch2_inode_opts_get(c, io_opts);
+
+ /* reflink btree? */
+ if (extent_k.k->p.inode) {
+ CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes,
+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+ BTREE_ITER_cached);
+ struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter);
+ int ret = bkey_err(inode_k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+
+ if (!ret && bkey_is_inode(inode_k.k)) {
+ struct bch_inode_unpacked inode;
+ bch2_inode_unpack(inode_k, &inode);
+ bch2_inode_opts_get_inode(c, &inode, io_opts);
+ }
+ }
+
+ return 0;
+}
+
+int bch2_extent_get_apply_io_opts_one(struct btree_trans *trans,
+ struct bch_inode_opts *io_opts,
+ struct btree_iter *extent_iter,
+ struct bkey_s_c extent_k,
+ enum set_needs_rebalance_ctx ctx)
+{
+ int ret = bch2_extent_get_io_opts_one(trans, io_opts, extent_iter, extent_k, ctx);
+ if (ret || btree_iter_path(trans, extent_iter)->level)
+ return ret;
+
+ return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, ctx);
}
#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
@@ -403,9 +517,10 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
}
static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
+ struct per_snapshot_io_opts *snapshot_io_opts,
struct bpos work_pos,
struct btree_iter *extent_iter,
- struct bch_inode_opts *io_opts,
+ struct bch_inode_opts **opts_ret,
struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
@@ -419,13 +534,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
if (bkey_err(k))
return k;
- int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k);
+ struct bch_inode_opts *opts =
+ bch2_extent_get_apply_io_opts(trans, snapshot_io_opts,
+ extent_iter->pos, extent_iter, k,
+ SET_NEEDS_REBALANCE_other);
+ int ret = PTR_ERR_OR_ZERO(opts);
if (ret)
return bkey_s_c_err(ret);
+ *opts_ret = opts;
+
memset(data_opts, 0, sizeof(*data_opts));
- data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k);
- data_opts->target = io_opts->background_target;
+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, opts, k);
+ data_opts->target = opts->background_target;
data_opts->write_flags |= BCH_WRITE_only_specified_devs;
if (!data_opts->rewrite_ptrs) {
@@ -450,19 +571,19 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs);
+ unsigned p = bch2_bkey_ptrs_need_compress(c, opts, k, ptrs);
if (p) {
prt_str(&buf, "compression=");
- bch2_compression_opt_to_text(&buf, io_opts->background_compression);
+ bch2_compression_opt_to_text(&buf, opts->background_compression);
prt_str(&buf, " ");
bch2_prt_u64_base2(&buf, p);
prt_newline(&buf);
}
- p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs);
+ p = bch2_bkey_ptrs_need_move(c, opts, ptrs);
if (p) {
prt_str(&buf, "move=");
- bch2_target_to_text(&buf, c, io_opts->background_target);
+ bch2_target_to_text(&buf, c, opts->background_target);
prt_str(&buf, " ");
bch2_prt_u64_base2(&buf, p);
prt_newline(&buf);
@@ -477,6 +598,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
noinline_for_stack
static int do_rebalance_extent(struct moving_context *ctxt,
+ struct per_snapshot_io_opts *snapshot_io_opts,
struct bpos work_pos,
struct btree_iter *extent_iter)
{
@@ -484,7 +606,7 @@ static int do_rebalance_extent(struct moving_context *ctxt,
struct bch_fs *c = trans->c;
struct bch_fs_rebalance *r = &trans->c->rebalance;
struct data_update_opts data_opts;
- struct bch_inode_opts io_opts;
+ struct bch_inode_opts *io_opts;
struct bkey_s_c k;
struct bkey_buf sk;
int ret;
@@ -495,8 +617,8 @@ static int do_rebalance_extent(struct moving_context *ctxt,
bch2_bkey_buf_init(&sk);
ret = lockrestart_do(trans,
- bkey_err(k = next_rebalance_extent(trans, work_pos,
- extent_iter, &io_opts, &data_opts)));
+ bkey_err(k = next_rebalance_extent(trans, snapshot_io_opts,
+ work_pos, extent_iter, &io_opts, &data_opts)));
if (ret || !k.k)
goto out;
@@ -509,7 +631,7 @@ static int do_rebalance_extent(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts);
+ ret = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
if (ret) {
if (bch2_err_matches(ret, ENOMEM)) {
/* memory allocation failure, wait for some IO to finish */
@@ -528,7 +650,31 @@ out:
return ret;
}
+static int do_rebalance_scan_indirect(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ struct bch_inode_opts *opts)
+{
+ u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad);
+ u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad);
+ u32 restart_count = trans->restart_count;
+
+ int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink,
+ POS(0, idx), BTREE_ITER_not_extents, k, ({
+ if (bpos_ge(bkey_start_pos(k.k), POS(0, end)))
+ break;
+ bch2_get_update_rebalance_opts(trans, opts, &iter, k,
+ SET_NEEDS_REBALANCE_opt_change_indirect);
+ }));
+ if (ret)
+ return ret;
+
+ /* suppress trans_was_restarted() check */
+ trans->restart_count = restart_count;
+ return 0;
+}
+
static int do_rebalance_scan(struct moving_context *ctxt,
+ struct per_snapshot_io_opts *snapshot_io_opts,
u64 inum, u64 cookie, u64 *sectors_scanned)
{
struct btree_trans *trans = ctxt->trans;
@@ -548,32 +694,33 @@ static int do_rebalance_scan(struct moving_context *ctxt,
r->state = BCH_REBALANCE_scanning;
- struct per_snapshot_io_opts snapshot_io_opts;
- per_snapshot_io_opts_init(&snapshot_io_opts, c);
-
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
r->scan_start.pos, r->scan_end.pos,
BTREE_ITER_all_snapshots|
BTREE_ITER_prefetch, k, ({
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
- struct bch_inode_opts *io_opts = bch2_move_get_io_opts(trans,
- &snapshot_io_opts, iter.pos, &iter, k);
- PTR_ERR_OR_ZERO(io_opts);
+ struct bch_inode_opts *opts = bch2_extent_get_apply_io_opts(trans,
+ snapshot_io_opts, iter.pos, &iter, k,
+ SET_NEEDS_REBALANCE_opt_change);
+ PTR_ERR_OR_ZERO(opts) ?:
+ (inum &&
+ k.k->type == KEY_TYPE_reflink_p &&
+ REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)
+ ? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts)
+ : 0);
})) ?:
commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_clear_rebalance_needs_scan(trans, inum, cookie));
- per_snapshot_io_opts_exit(&snapshot_io_opts);
*sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen);
- bch2_move_stats_exit(&r->scan_stats, c);
-
/*
* Ensure that the rebalance_work entries we created are seen by the
* next iteration of do_rebalance(), so we don't end up stuck in
* rebalance_wait():
*/
*sectors_scanned += 1;
+ bch2_move_stats_exit(&r->scan_stats, c);
bch2_btree_write_buffer_flush_sync(trans);
@@ -625,6 +772,9 @@ static int do_rebalance(struct moving_context *ctxt)
bch2_move_stats_init(&r->work_stats, "rebalance_work");
+ struct per_snapshot_io_opts snapshot_io_opts;
+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
+
while (!bch2_move_ratelimit(ctxt)) {
if (!bch2_rebalance_enabled(c)) {
bch2_moving_ctxt_flush_all(ctxt);
@@ -639,15 +789,18 @@ static int do_rebalance(struct moving_context *ctxt)
break;
ret = k->k.type == KEY_TYPE_cookie
- ? do_rebalance_scan(ctxt, k->k.p.inode,
+ ? do_rebalance_scan(ctxt, &snapshot_io_opts,
+ k->k.p.inode,
le64_to_cpu(bkey_i_to_cookie(k)->v.cookie),
&sectors_scanned)
- : do_rebalance_extent(ctxt, k->k.p, &extent_iter);
+ : do_rebalance_extent(ctxt, &snapshot_io_opts,
+ k->k.p, &extent_iter);
if (ret)
break;
}
bch2_trans_iter_exit(&extent_iter);
+ per_snapshot_io_opts_exit(&snapshot_io_opts);
bch2_move_stats_exit(&r->work_stats, c);
if (!ret &&
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index c5f49f480a79..bff91aa0102e 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -30,11 +30,51 @@ void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *,
const struct bch_extent_rebalance *);
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
-int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *, struct bkey_i *);
-int bch2_get_update_rebalance_opts(struct btree_trans *,
- struct bch_inode_opts *,
- struct btree_iter *,
- struct bkey_s_c);
+
+enum set_needs_rebalance_ctx {
+ SET_NEEDS_REBALANCE_opt_change,
+ SET_NEEDS_REBALANCE_opt_change_indirect,
+ SET_NEEDS_REBALANCE_foreground,
+ SET_NEEDS_REBALANCE_other,
+};
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *,
+ struct bkey_i *, enum set_needs_rebalance_ctx, u32);
+
+/* Inodes in different snapshots may have different IO options: */
+struct snapshot_io_opts_entry {
+ u32 snapshot;
+ struct bch_inode_opts io_opts;
+};
+
+struct per_snapshot_io_opts {
+ u64 cur_inum;
+ struct bch_inode_opts fs_io_opts;
+ DARRAY(struct snapshot_io_opts_entry) d;
+};
+
+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
+{
+ memset(io_opts, 0, sizeof(*io_opts));
+ bch2_inode_opts_get(c, &io_opts->fs_io_opts);
+}
+
+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
+{
+ darray_exit(&io_opts->d);
+}
+
+struct bch_inode_opts *bch2_extent_get_apply_io_opts(struct btree_trans *,
+ struct per_snapshot_io_opts *, struct bpos,
+ struct btree_iter *, struct bkey_s_c,
+ enum set_needs_rebalance_ctx);
+
+int bch2_extent_get_io_opts_one(struct btree_trans *, struct bch_inode_opts *,
+ struct btree_iter *, struct bkey_s_c,
+ enum set_needs_rebalance_ctx);
+int bch2_extent_get_apply_io_opts_one(struct btree_trans *, struct bch_inode_opts *,
+ struct btree_iter *, struct bkey_s_c,
+ enum set_needs_rebalance_ctx);
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64);
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 8679c8aad0e7..531c2ef128ae 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -837,33 +837,39 @@ use_clean:
bch2_async_btree_node_rewrites_flush(c);
/* fsync if we fixed errors */
- if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ bool errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags) ||
+ test_bit(BCH_FS_errors_fixed_silent, &c->flags);
+
+ if (errors_fixed) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_meta(&c->journal);
}
/* If we fixed errors, verify that fs is actually clean now: */
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
- test_bit(BCH_FS_errors_fixed, &c->flags) &&
+ errors_fixed &&
!test_bit(BCH_FS_errors_not_fixed, &c->flags) &&
!test_bit(BCH_FS_error, &c->flags)) {
bch2_flush_fsck_errs(c);
bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
+ errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags);
clear_bit(BCH_FS_errors_fixed, &c->flags);
+ clear_bit(BCH_FS_errors_fixed_silent, &c->flags);
ret = bch2_run_recovery_passes(c,
BCH_RECOVERY_PASS_check_alloc_info);
if (ret)
goto err;
- if (test_bit(BCH_FS_errors_fixed, &c->flags) ||
+ if (errors_fixed ||
test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
bch_err(c, "Second fsck run was not clean");
set_bit(BCH_FS_errors_not_fixed, &c->flags);
}
- set_bit(BCH_FS_errors_fixed, &c->flags);
+ if (errors_fixed)
+ set_bit(BCH_FS_errors_fixed, &c->flags);
}
if (enabled_qtypes(c)) {
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 55ad8ab7a148..d54468fdcb18 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -589,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bpos dst_start = POS(dst_inum.inum, dst_offset);
struct bpos src_start = POS(src_inum.inum, src_offset);
struct bpos dst_end = dst_start, src_end = src_start;
- struct bch_inode_opts opts;
struct bpos src_want;
u64 dst_done = 0;
u32 dst_snapshot, src_snapshot;
@@ -609,10 +608,6 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_init(&new_src);
CLASS(btree_trans, trans)(c);
- ret = bch2_inum_opts_get(trans, src_inum, &opts);
- if (ret)
- goto err;
-
bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
BTREE_ITER_intent);
bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
@@ -709,11 +704,10 @@ s64 bch2_remap_range(struct bch_fs *c,
min(src_k.k->p.offset - src_want.offset,
dst_end.offset - dst_iter.pos.offset));
- ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?:
- bch2_extent_update(trans, dst_inum, &dst_iter,
- new_dst.k, &disk_res,
- new_i_size, i_sectors_delta,
- true);
+ ret = bch2_extent_update(trans, dst_inum, &dst_iter,
+ new_dst.k, &disk_res,
+ new_i_size, i_sectors_delta,
+ true, 0);
bch2_disk_reservation_put(c, &disk_res);
}
bch2_trans_iter_exit(&dst_iter);
@@ -744,7 +738,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_trans_iter_exit(&inode_iter);
} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
-err:
+
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index bfd06fd5d506..de56a1ee79db 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -104,10 +104,7 @@
x(inode_has_case_insensitive, \
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \
- BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)\
- x(btree_node_accounting, \
- BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
- BCH_FSCK_ERR_accounting_mismatch)
+ BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@@ -155,11 +152,7 @@
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch, \
BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
- BCH_FSCK_ERR_accounting_key_junk_at_end) \
- x(btree_node_accounting, \
- BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
- BCH_FSCK_ERR_accounting_mismatch, \
- BCH_FSCK_ERR_accounting_key_nr_counters_wrong)
+ BCH_FSCK_ERR_accounting_key_junk_at_end)
struct upgrade_downgrade_entry {
u64 recovery_passes;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 32b12311928e..de1e8912975c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -833,6 +833,8 @@ int bch2_fs_init_rw(struct bch_fs *c)
if (test_bit(BCH_FS_rw_init_done, &c->flags))
return 0;
+ bch_verbose(c, "doing rw allocations");
+
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
@@ -1286,7 +1288,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
if (ret)
goto err;
- if (go_rw_in_recovery(c)) {
+ /*
+ * just make sure this is always allocated if we might need it - mount
+ * failing due to kthread_create() failing is _very_ annoying
+ */
+ if (!(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) ||
+ go_rw_in_recovery(c)) {
/*
* start workqueues/kworkers early - kthread creation checks for
* pending signals, which is _very_ annoying