diff options
-rw-r--r-- | fs/bcachefs/alloc_background.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 11 | ||||
-rw-r--r-- | fs/bcachefs/data_update.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/disk_accounting.c | 185 | ||||
-rw-r--r-- | fs/bcachefs/error.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 30 | ||||
-rw-r--r-- | fs/bcachefs/inode.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/io_misc.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/io_write.c | 50 | ||||
-rw-r--r-- | fs/bcachefs/io_write.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/lru.c | 45 | ||||
-rw-r--r-- | fs/bcachefs/lru.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 75 | ||||
-rw-r--r-- | fs/bcachefs/opts.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/progress.c | 39 | ||||
-rw-r--r-- | fs/bcachefs/progress.h | 12 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.c | 41 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.h | 19 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 14 | ||||
-rw-r--r-- | fs/bcachefs/reflink.c | 16 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 9 |
24 files changed, 297 insertions, 292 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 22e689436316..cab4d6798dd7 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2384,8 +2384,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) * We clear the LRU and need_discard btrees first so that we don't race * with bch2_do_invalidates() and bch2_do_discards() */ - ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end, - BTREE_TRIGGER_norun, NULL) ?: + ret = bch2_dev_remove_lrus(c, ca) ?: bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, BTREE_TRIGGER_norun, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 6f25e2687cd2..553031a3b06a 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -675,6 +675,7 @@ struct bch_dev { x(error) \ x(topology_error) \ x(errors_fixed) \ + x(errors_fixed_silent) \ x(errors_not_fixed) \ x(no_invalid_checks) \ x(discard_mount_opt_set) \ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 76a2ae7f8d2d..afa0af06568f 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -718,7 +718,7 @@ enum bcachefs_metadata_version { }; static const __maybe_unused -unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work; +unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_btree_node_accounting; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 34ec1a90980d..a0cdbf0e056d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -27,10 +27,15 @@ #include <linux/moduleparam.h> #include <linux/sched/mm.h> -#ifdef CONFIG_BCACHEFS_DEBUG static unsigned bch2_btree_read_corrupt_ratio; +static int bch2_btree_read_corrupt_device; + +#ifdef CONFIG_BCACHEFS_DEBUG module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644); MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); + +module_param_named(btree_read_corrupt_device, bch2_btree_read_corrupt_device, int, 0644); +MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); #endif static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) @@ -1438,7 +1443,9 @@ start: memset(&bio->bi_iter, 0, sizeof(bio->bi_iter)); bio->bi_iter.bi_size = btree_buf_bytes(b); - bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); + if (bch2_btree_read_corrupt_device == rb->pick.ptr.dev || + bch2_btree_read_corrupt_device < 0) + bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); if (ret != -BCH_ERR_btree_node_read_err_want_retry && diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5d2f536986c8..7a0da6cdf78c 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -11,6 +11,7 @@ #include "ec.h" #include "error.h" #include "extents.h" +#include "inode.h" #include "io_write.h" #include "keylist.h" #include "move.h" @@ -428,13 +429,18 @@ restart_drop_extra_replicas: goto out; } + struct bch_inode_opts opts; + ret = bch2_trans_log_str(trans, bch2_data_update_type_strs[m->type]) ?: bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, insert->k.p) ?: - bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: + bch2_inum_snapshot_opts_get(trans, k.k->p.inode, k.k->p.snapshot, &opts) ?: + bch2_bkey_set_needs_rebalance(c, &opts, insert, + SET_NEEDS_REBALANCE_foreground, + m->op.opts.change_cookie) ?: bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_internal_snapshot_node); if (ret) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 831b4c10b856..a99f821c6a1c 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -883,113 +883,118 @@ int bch2_accounting_read(struct bch_fs *c) *dst++ = *i; keys->gap = keys->nr = dst - keys->data; - guard(percpu_write)(&c->mark_lock); - - darray_for_each_reverse(acc->k, i) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, i->pos); - - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - memset(v, 0, sizeof(v)); - - for (unsigned j = 0; j < i->nr_counters; j++) - v[j] = percpu_u64_get(i->v[0] + j); + CLASS(printbuf, underflow_err)(); - /* - * If the entry counters are zeroed, it should be treated as - * nonexistent - it might point to an invalid device. - * - * Remove it, so that if it's re-added it gets re-marked in the - * superblock: - */ - ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) - ? -BCH_ERR_remove_disk_accounting_entry - : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters); - - if (ret == -BCH_ERR_remove_disk_accounting_entry) { - free_percpu(i->v[0]); - free_percpu(i->v[1]); - darray_remove_item(&acc->k, i); - ret = 0; - continue; - } + scoped_guard(percpu_write, &c->mark_lock) { + darray_for_each_reverse(acc->k, i) { + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, i->pos); - if (ret) - return ret; - } + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + memset(v, 0, sizeof(v)); - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); + for (unsigned j = 0; j < i->nr_counters; j++) + v[j] = percpu_u64_get(i->v[0] + j); - for (unsigned i = 0; i < acc->k.nr; i++) { - struct disk_accounting_pos k; - bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); + /* + * If the entry counters are zeroed, it should be treated as + * nonexistent - it might point to an invalid device. + * + * Remove it, so that if it's re-added it gets re-marked in the + * superblock: + */ + ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) + ? -BCH_ERR_remove_disk_accounting_entry + : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters); + + if (ret == -BCH_ERR_remove_disk_accounting_entry) { + free_percpu(i->v[0]); + free_percpu(i->v[1]); + darray_remove_item(&acc->k, i); + ret = 0; + continue; + } - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); + if (ret) + return ret; + } - /* - * Check for underflow, schedule check_allocations - * necessary: - * - * XXX - see if we can factor this out to run on a bkey - * so we can check everything lazily, right now we don't - * check the non in-mem counters at all - */ - bool underflow = false; - for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) - underflow |= (s64) v[j] < 0; + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); - if (underflow) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + for (unsigned i = 0; i < acc->k.nr; i++) { + struct disk_accounting_pos k; + bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); - prt_printf(&buf, "Accounting underflow for\n"); - bch2_accounting_key_to_text(&buf, &k); + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); + /* + * Check for underflow, schedule check_allocations + * necessary: + * + * XXX - see if we can factor this out to run on a bkey + * so we can check everything lazily, right now we don't + * check the non in-mem counters at all + */ + bool underflow = false; for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) - prt_printf(&buf, " %lli", v[j]); - - bool print = bch2_count_fsck_err(c, accounting_key_underflow, &buf); - unsigned pos = buf.pos; - ret = bch2_run_explicit_recovery_pass(c, &buf, - BCH_RECOVERY_PASS_check_allocations, 0); - print |= buf.pos != pos; + underflow |= (s64) v[j] < 0; - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); - if (ret) - return ret; - } + if (underflow) { + if (!underflow_err.pos) { + bch2_log_msg_start(c, &underflow_err); + prt_printf(&underflow_err, "Accounting underflow for\n"); + } + bch2_accounting_key_to_text(&underflow_err, &k); - guard(preempt)(); - struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) + prt_printf(&underflow_err, " %lli", v[j]); + prt_newline(&underflow_err); + } - switch (k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - usage->reserved += v[0] * k.persistent_reserved.nr_replicas; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: { - guard(rcu)(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); - if (ca) { - struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; - percpu_u64_set(&d->buckets, v[0]); - percpu_u64_set(&d->sectors, v[1]); - percpu_u64_set(&d->fragmented, v[2]); - - if (k.dev_data_type.data_type == BCH_DATA_sb || - k.dev_data_type.data_type == BCH_DATA_journal) - usage->hidden += v[0] * ca->mi.bucket_size; + guard(preempt)(); + struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + + switch (k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + usage->reserved += v[0] * k.persistent_reserved.nr_replicas; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: { + guard(rcu)(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); + if (ca) { + struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; + percpu_u64_set(&d->buckets, v[0]); + percpu_u64_set(&d->sectors, v[1]); + percpu_u64_set(&d->fragmented, v[2]); + + if (k.dev_data_type.data_type == BCH_DATA_sb || + k.dev_data_type.data_type == BCH_DATA_journal) + usage->hidden += v[0] * ca->mi.bucket_size; + } + break; + } } - break; - } } } + if (underflow_err.pos) { + bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err); + unsigned pos = underflow_err.pos; + ret = bch2_run_explicit_recovery_pass(c, &underflow_err, + BCH_RECOVERY_PASS_check_allocations, 0); + print |= underflow_err.pos != pos; + + if (print) + bch2_print_str(c, KERN_ERR, underflow_err.buf); + if (ret) + return ret; + } + return ret; } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 9e69263eb796..a16f55d98d97 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -468,10 +468,10 @@ int __bch2_fsck_err(struct bch_fs *c, if ((flags & FSCK_ERR_SILENT) || test_bit(err, c->sb.errors_silent)) { - ret = flags & FSCK_CAN_FIX + set_bit(BCH_FS_errors_fixed_silent, &c->flags); + return flags & FSCK_CAN_FIX ? bch_err_throw(c, fsck_fix) : bch_err_throw(c, fsck_ignore); - goto err; } printbuf_indent_add_nextline(out, 2); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 193c8ec0bdcd..655ed90b2a39 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -369,9 +369,9 @@ err: } int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans, - u64 inode_nr, u32 snapshot, - struct bch_inode_unpacked *inode, - unsigned flags) + u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode, + unsigned flags) { CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); @@ -1238,20 +1238,30 @@ void bch2_inode_opts_get_inode(struct bch_fs *c, BCH_INODE_OPTS() #undef x - ret->opt_change_cookie = atomic_read(&c->opt_change_cookie); + ret->change_cookie = atomic_read(&c->opt_change_cookie); bch2_io_opts_fixups(ret); } -int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_inode_opts *opts) +int bch2_inum_snapshot_opts_get(struct btree_trans *trans, + u64 inum, u32 snapshot, + struct bch_inode_opts *opts) { - struct bch_inode_unpacked inode; - int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); + if (inum) { + struct bch_inode_unpacked inode; + int ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); + if (ret) + return ret; - if (ret) - return ret; + bch2_inode_opts_get_inode(trans->c, &inode, opts); + } else { + /* + * data_update_index_update may call us for reflink btree extent + * updates, inum will be 0 + */ - bch2_inode_opts_get_inode(trans->c, &inode, opts); + bch2_inode_opts_get(trans->c, opts); + } return 0; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 12e0a104c196..63b7088811fb 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -290,7 +290,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get_inode(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_opts *); -int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_inode_opts *); +int bch2_inum_snapshot_opts_get(struct btree_trans *, u64, u32, struct bch_inode_opts *); int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, unsigned); diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 5e03574059e0..04eb5ecd102b 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -109,7 +109,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, } ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, - 0, i_sectors_delta, true); + 0, i_sectors_delta, true, 0); err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); @@ -211,7 +211,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, - &disk_res, 0, i_sectors_delta, false); + &disk_res, 0, i_sectors_delta, false, 0); bch2_disk_reservation_put(c, &disk_res); } @@ -373,7 +373,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; - struct bch_inode_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; @@ -384,10 +383,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, bool warn_errors = i_sectors_delta != NULL; int ret = 0; - ret = bch2_inum_opts_get(trans, inum, &opts); - if (ret) - return ret; - /* * check for missing subvolume before fpunch, as in resume we don't want * it to be a fatal error @@ -476,8 +471,7 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: + ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index aed22fc7759b..6a5da02ce266 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -205,7 +205,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, struct btree_iter *extent_iter, u64 new_i_size, - s64 i_sectors_delta) + s64 i_sectors_delta, + struct bch_inode_unpacked *inode_u) { /* * Crazy performance optimization: @@ -227,7 +228,13 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, BTREE_ITER_intent| BTREE_ITER_cached); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); + + /* + * XXX: we currently need to unpack the inode on every write because we + * need the current io_opts, for transactional consistency - inode_v4? + */ + int ret = bkey_err(k) ?: + bch2_inode_unpack(k, inode_u); if (unlikely(ret)) return ret; @@ -303,8 +310,10 @@ int bch2_extent_update(struct btree_trans *trans, struct disk_reservation *disk_res, u64 new_i_size, s64 *i_sectors_delta_total, - bool check_enospc) + bool check_enospc, + u32 change_cookie) { + struct bch_fs *c = trans->c; struct bpos next_pos; bool usage_increasing; s64 i_sectors_delta = 0, disk_sectors_delta = 0; @@ -335,7 +344,7 @@ int bch2_extent_update(struct btree_trans *trans, if (disk_res && disk_sectors_delta > (s64) disk_res->sectors) { - ret = bch2_disk_reservation_add(trans->c, disk_res, + ret = bch2_disk_reservation_add(c, disk_res, disk_sectors_delta - disk_res->sectors, !check_enospc || !usage_increasing ? BCH_DISK_RESERVATION_NOFAIL : 0); @@ -349,9 +358,16 @@ int bch2_extent_update(struct btree_trans *trans, * aren't changing - for fsync to work properly; fsync relies on * inode->bi_journal_seq which is updated by the trigger code: */ + struct bch_inode_unpacked inode; + struct bch_inode_opts opts; + ret = bch2_extent_update_i_size_sectors(trans, iter, min(k->k.p.offset << 9, new_i_size), - i_sectors_delta) ?: + i_sectors_delta, &inode) ?: + (bch2_inode_opts_get_inode(c, &inode, &opts), + bch2_bkey_set_needs_rebalance(c, &opts, k, + SET_NEEDS_REBALANCE_foreground, + change_cookie)) ?: bch2_trans_update(trans, iter, k, 0) ?: bch2_trans_commit(trans, disk_res, NULL, BCH_TRANS_COMMIT_no_check_rw| @@ -402,7 +418,8 @@ static int bch2_write_index_default(struct bch_write_op *op) ret = bch2_extent_update(trans, inum, &iter, sk.k, &op->res, op->new_i_size, &op->i_sectors_delta, - op->flags & BCH_WRITE_check_enospc); + op->flags & BCH_WRITE_check_enospc, + op->opts.change_cookie); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -792,10 +809,6 @@ static void init_append_extent(struct bch_write_op *op, bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, op->flags & BCH_WRITE_cached); - - if (!(op->flags & BCH_WRITE_move)) - bch2_bkey_set_needs_rebalance(op->c, &op->opts, &e->k_i); - bch2_keylist_push(&op->insert_keys); } @@ -1225,6 +1238,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, return 0; } + struct bch_fs *c = trans->c; struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); int ret = PTR_ERR_OR_ZERO(new); @@ -1239,8 +1253,6 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) ptr->unwritten = 0; - bch2_bkey_set_needs_rebalance(op->c, &op->opts, new); - /* * Note that we're not calling bch2_subvol_get_snapshot() in this path - * that was done when we kicked off the write, and here it's important @@ -1248,8 +1260,20 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, * since been created. The write is still outstanding, so we're ok * w.r.t. snapshot atomicity: */ + + /* + * For transactional consistency, set_needs_rebalance() has to be called + * with the io_opts from the btree in the same transaction: + */ + struct bch_inode_unpacked inode; + struct bch_inode_opts opts; + return bch2_extent_update_i_size_sectors(trans, iter, - min(new->k.p.offset << 9, new_i_size), 0) ?: + min(new->k.p.offset << 9, new_i_size), 0, &inode) ?: + (bch2_inode_opts_get_inode(c, &inode, &opts), + bch2_bkey_set_needs_rebalance(c, &opts, new, + SET_NEEDS_REBALANCE_foreground, + op->opts.change_cookie)) ?: bch2_trans_update(trans, iter, new, BTREE_UPDATE_internal_snapshot_node); } diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index 6c05ba6e15d6..692529bf401d 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -28,7 +28,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, struct bkey_i *, bool *, s64 *, s64 *); int bch2_extent_update(struct btree_trans *, subvol_inum, struct btree_iter *, struct bkey_i *, - struct disk_reservation *, u64, s64 *, bool); + struct disk_reservation *, u64, s64 *, bool, u32); static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, struct bch_inode_opts opts) diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index b9c0834498dd..c533b60706bf 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -51,25 +51,17 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, : 0; } -int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) +static int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); -} - -int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) -{ - return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, true); } int __bch2_lru_change(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 old_time, u64 new_time) { - if (old_time == new_time) - return 0; - - return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: - bch2_lru_set(trans, lru_id, dev_bucket, new_time); + return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?: + __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true); } static const char * const bch2_lru_types[] = { @@ -87,7 +79,6 @@ int bch2_lru_check_set(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); CLASS(btree_iter, lru_iter)(trans, BTREE_ID_lru, lru_pos(lru_id, dev_bucket, time), 0); struct bkey_s_c lru_k = bch2_btree_iter_peek_slot(&lru_iter); int ret = bkey_err(lru_k); @@ -99,10 +90,13 @@ int bch2_lru_check_set(struct btree_trans *trans, if (ret) return ret; - if (fsck_err(trans, alloc_key_to_missing_lru_entry, - "missing %s lru entry\n%s", - bch2_lru_types[lru_type(lru_k)], - (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "missing %s lru entry at pos ", bch2_lru_types[lru_type(lru_k)]); + bch2_bpos_to_text(&buf, lru_iter.pos); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, referring_k); + + if (fsck_err(trans, alloc_key_to_missing_lru_entry, "%s", buf.buf)) { ret = bch2_lru_set(trans, lru_id, dev_bucket, time); if (ret) return ret; @@ -127,6 +121,23 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k) } } +int bch2_dev_remove_lrus(struct bch_fs *c, struct bch_dev *ca) +{ + CLASS(btree_trans, trans)(c); + int ret = bch2_btree_write_buffer_flush_sync(trans) ?: + for_each_btree_key(trans, iter, + BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, ({ + struct bbpos bp = lru_pos_to_bp(k); + + bp.btree == BTREE_ID_alloc && bp.pos.inode == ca->dev_idx + ? (bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_trans_commit(trans, NULL, NULL, 0)) + : 0; + })); + bch_err_fn(c, ret); + return ret; +} + static u64 bkey_lru_type_idx(struct bch_fs *c, enum bch_lru_type type, struct bkey_s_c k) diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 6f1e0a7b5db5..d5a2620f2507 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -59,8 +59,6 @@ void bch2_lru_pos_to_text(struct printbuf *, struct bpos); .min_val_size = 8, \ }) -int bch2_lru_del(struct btree_trans *, u16, u64, u64); -int bch2_lru_set(struct btree_trans *, u16, u64, u64); int __bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); static inline int bch2_lru_change(struct btree_trans *trans, @@ -72,9 +70,10 @@ static inline int bch2_lru_change(struct btree_trans *trans, : 0; } +int bch2_dev_remove_lrus(struct bch_fs *, struct bch_dev *); + struct bkey_buf; int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c, struct bkey_buf *); - int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 56e9ba4ed6a8..09cd19946fbe 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -503,7 +503,8 @@ struct bch_inode_opts *bch2_move_get_io_opts(struct btree_trans *trans, break; } out: - ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); + ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k, + SET_NEEDS_REBALANCE_other); if (ret) return ERR_PTR(ret); return opts_ret; @@ -535,7 +536,8 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, } } - return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); + return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k, + SET_NEEDS_REBALANCE_other); } int bch2_move_ratelimit(struct moving_context *ctxt) @@ -582,37 +584,6 @@ int bch2_move_ratelimit(struct moving_context *ctxt) return 0; } -/* - * Move requires non extents iterators, and there's also no need for it to - * signal indirect_extent_missing_error: - */ -static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c_reflink_p p) -{ - if (unlikely(REFLINK_P_ERROR(p.v))) - return bkey_s_c_null; - - struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v)); - - bch2_trans_iter_init(trans, iter, - BTREE_ID_reflink, reflink_pos, - BTREE_ITER_not_extents); - - struct bkey_s_c k = bch2_btree_iter_peek(iter); - if (!k.k || bkey_err(k)) { - bch2_trans_iter_exit(iter); - return k; - } - - if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) { - bch2_trans_iter_exit(iter); - return bkey_s_c_null; - } - - return k; -} - int bch2_move_data_btree(struct moving_context *ctxt, struct bpos start, struct bpos end, @@ -627,12 +598,6 @@ int bch2_move_data_btree(struct moving_context *ctxt, struct btree_iter iter, reflink_iter = {}; struct bkey_s_c k; struct data_update_opts data_opts; - /* - * If we're moving a single file, also process reflinked data it points - * to (this includes propagating changed io_opts from the inode to the - * extent): - */ - bool walk_indirect = start.inode == end.inode; int ret = 0, ret2; per_snapshot_io_opts_init(&snapshot_io_opts, c); @@ -697,8 +662,6 @@ root_err: bch2_ratelimit_reset(ctxt->rate); while (!bch2_move_ratelimit(ctxt)) { - struct btree_iter *extent_iter = &iter; - bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -717,41 +680,17 @@ root_err: if (ctxt->stats) ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - if (walk_indirect && - k.k->type == KEY_TYPE_reflink_p && - REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - - bch2_trans_iter_exit(&reflink_iter); - k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - - if (!k.k) - goto next_nondata; - - /* - * XXX: reflink pointers may point to multiple indirect - * extents, so don't advance past the entire reflink - * pointer - need to fixup iter->k - */ - extent_iter = &reflink_iter; - } - if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, - iter.pos, extent_iter, k); + iter.pos, &iter, k); ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts)) + if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts)) goto next; /* @@ -762,7 +701,7 @@ root_err: k = bkey_i_to_s_c(sk.k); if (!level) - ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); else if (!data_opts.scrub) ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level, k.k->p, data_opts.target, 0); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index c4faa66b55ce..122bc98e4cbb 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -803,7 +803,7 @@ void bch2_inode_opts_get(struct bch_fs *c, struct bch_inode_opts *ret) BCH_INODE_OPTS() #undef x - ret->opt_change_cookie = atomic_read(&c->opt_change_cookie); + ret->change_cookie = atomic_read(&c->opt_change_cookie); bch2_io_opts_fixups(ret); } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index a5779f8943cf..22cf109fb9c9 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -678,7 +678,7 @@ struct bch_inode_opts { BCH_INODE_OPTS() #undef x - u32 opt_change_cookie; + u32 change_cookie; }; static inline void bch2_io_opts_fixups(struct bch_inode_opts *opts) diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c index 7cc16490ffa9..541ee951d1c9 100644 --- a/fs/bcachefs/progress.c +++ b/fs/bcachefs/progress.c @@ -4,21 +4,14 @@ #include "disk_accounting.h" #include "progress.h" -void bch2_progress_init_inner(struct progress_indicator_state *s, - struct bch_fs *c, - u64 leaf_btree_id_mask, - u64 inner_btree_id_mask) +void bch2_progress_init(struct progress_indicator_state *s, + struct bch_fs *c, + u64 btree_id_mask) { memset(s, 0, sizeof(*s)); s->next_print = jiffies + HZ * 10; - /* This is only an estimation: nodes can have different replica counts */ - const u32 expected_node_disk_sectors = - READ_ONCE(c->opts.metadata_replicas) * btree_sectors(c); - - const u64 btree_id_mask = leaf_btree_id_mask | inner_btree_id_mask; - for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { if (!(btree_id_mask & BIT_ULL(i))) continue; @@ -26,29 +19,9 @@ void bch2_progress_init_inner(struct progress_indicator_state *s, struct disk_accounting_pos acc; disk_accounting_key_init(acc, btree, .id = i); - struct { - u64 disk_sectors; - u64 total_nodes; - u64 inner_nodes; - } v = {0}; - bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), - (u64 *)&v, sizeof(v) / sizeof(u64)); - - /* Better to estimate as 0 than the total node count */ - if (inner_btree_id_mask & BIT_ULL(i)) - s->nodes_total += v.inner_nodes; - - if (!(leaf_btree_id_mask & BIT_ULL(i))) - continue; - - /* - * We check for zeros to degrade gracefully when run - * with un-upgraded accounting info (missing some counters). - */ - if (v.total_nodes != 0) - s->nodes_total += v.total_nodes - v.inner_nodes; - else - s->nodes_total += div_u64(v.disk_sectors, expected_node_disk_sectors); + u64 v; + bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); + s->nodes_total += div64_ul(v, btree_sectors(c)); } } diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h index 91f345337709..972a73087ffe 100644 --- a/fs/bcachefs/progress.h +++ b/fs/bcachefs/progress.h @@ -20,17 +20,7 @@ struct progress_indicator_state { struct btree *last_node; }; -void bch2_progress_init_inner(struct progress_indicator_state *s, - struct bch_fs *c, - u64 leaf_btree_id_mask, - u64 inner_btree_id_mask); - -static inline void bch2_progress_init(struct progress_indicator_state *s, - struct bch_fs *c, u64 btree_id_mask) -{ - bch2_progress_init_inner(s, c, btree_id_mask, 0); -} - +void bch2_progress_init(struct progress_indicator_state *, struct bch_fs *, u64); void bch2_progress_update_iter(struct btree_trans *, struct progress_indicator_state *, struct btree_iter *, diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 901cff84aab5..849bdbf800d4 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -211,7 +211,9 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_inode_ } int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts, - struct bkey_i *_k) + struct bkey_i *_k, + enum set_needs_rebalance_ctx ctx, + u32 change_cookie) { if (!bkey_extent_is_direct_data(&_k->k)) return 0; @@ -238,7 +240,8 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_inode_opts *opts, int bch2_get_update_rebalance_opts(struct btree_trans *trans, struct bch_inode_opts *io_opts, struct btree_iter *iter, - struct bkey_s_c k) + struct bkey_s_c k, + enum set_needs_rebalance_ctx ctx) { BUG_ON(iter->flags & BTREE_ITER_is_extents); BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); @@ -267,7 +270,7 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans, /* On successfull transaction commit, @k was invalidated: */ - return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: + return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n, ctx, 0) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0) ?: bch_err_throw(trans->c, transaction_restart_nested); @@ -528,6 +531,29 @@ out: return ret; } +static int do_rebalance_scan_indirect(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + struct bch_inode_opts *opts) +{ + u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad); + u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); + u32 restart_count = trans->restart_count; + + int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink, + POS(0, idx), BTREE_ITER_not_extents, k, ({ + if (bpos_ge(bkey_start_pos(k.k), POS(0, end))) + break; + bch2_get_update_rebalance_opts(trans, opts, &iter, k, + SET_NEEDS_REBALANCE_opt_change_indirect); + })); + if (ret) + return ret; + + /* suppress trans_was_restarted() check */ + trans->restart_count = restart_count; + return 0; +} + static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie, u64 *sectors_scanned) { @@ -557,9 +583,14 @@ static int do_rebalance_scan(struct moving_context *ctxt, BTREE_ITER_prefetch, k, ({ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - struct bch_inode_opts *io_opts = bch2_move_get_io_opts(trans, + struct bch_inode_opts *opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, iter.pos, &iter, k); - PTR_ERR_OR_ZERO(io_opts); + PTR_ERR_OR_ZERO(opts) ?: + (inum && + k.k->type == KEY_TYPE_reflink_p && + REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v) + ? do_rebalance_scan_indirect(trans, bkey_s_c_to_reflink_p(k), opts) + : 0); })) ?: commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_clear_rebalance_needs_scan(trans, inum, cookie)); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index c5f49f480a79..6558a7626272 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -30,11 +30,20 @@ void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *, const struct bch_extent_rebalance *); u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); -int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *, struct bkey_i *); -int bch2_get_update_rebalance_opts(struct btree_trans *, - struct bch_inode_opts *, - struct btree_iter *, - struct bkey_s_c); + +enum set_needs_rebalance_ctx { + SET_NEEDS_REBALANCE_opt_change, + SET_NEEDS_REBALANCE_opt_change_indirect, + SET_NEEDS_REBALANCE_foreground, + SET_NEEDS_REBALANCE_other, +}; + +int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *, + struct bkey_i *, enum set_needs_rebalance_ctx, u32); + +int bch2_get_update_rebalance_opts(struct btree_trans *, struct bch_inode_opts *, + struct btree_iter *, struct bkey_s_c, + enum set_needs_rebalance_ctx); int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 8679c8aad0e7..531c2ef128ae 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -837,33 +837,39 @@ use_clean: bch2_async_btree_node_rewrites_flush(c); /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + bool errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags) || + test_bit(BCH_FS_errors_fixed_silent, &c->flags); + + if (errors_fixed) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); } /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && - test_bit(BCH_FS_errors_fixed, &c->flags) && + errors_fixed && !test_bit(BCH_FS_errors_not_fixed, &c->flags) && !test_bit(BCH_FS_error, &c->flags)) { bch2_flush_fsck_errs(c); bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); + errors_fixed = test_bit(BCH_FS_errors_fixed, &c->flags); clear_bit(BCH_FS_errors_fixed, &c->flags); + clear_bit(BCH_FS_errors_fixed_silent, &c->flags); ret = bch2_run_recovery_passes(c, BCH_RECOVERY_PASS_check_alloc_info); if (ret) goto err; - if (test_bit(BCH_FS_errors_fixed, &c->flags) || + if (errors_fixed || test_bit(BCH_FS_errors_not_fixed, &c->flags)) { bch_err(c, "Second fsck run was not clean"); set_bit(BCH_FS_errors_not_fixed, &c->flags); } - set_bit(BCH_FS_errors_fixed, &c->flags); + if (errors_fixed) + set_bit(BCH_FS_errors_fixed, &c->flags); } if (enabled_qtypes(c)) { diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 55ad8ab7a148..d54468fdcb18 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -589,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c, struct bpos dst_start = POS(dst_inum.inum, dst_offset); struct bpos src_start = POS(src_inum.inum, src_offset); struct bpos dst_end = dst_start, src_end = src_start; - struct bch_inode_opts opts; struct bpos src_want; u64 dst_done = 0; u32 dst_snapshot, src_snapshot; @@ -609,10 +608,6 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); CLASS(btree_trans, trans)(c); - ret = bch2_inum_opts_get(trans, src_inum, &opts); - if (ret) - goto err; - bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, BTREE_ITER_intent); bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, @@ -709,11 +704,10 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?: - bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, - true); + ret = bch2_extent_update(trans, dst_inum, &dst_iter, + new_dst.k, &disk_res, + new_i_size, i_sectors_delta, + true, 0); bch2_disk_reservation_put(c, &disk_res); } bch2_trans_iter_exit(&dst_iter); @@ -744,7 +738,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(&inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); -err: + bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 32b12311928e..de1e8912975c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -833,6 +833,8 @@ int bch2_fs_init_rw(struct bch_fs *c) if (test_bit(BCH_FS_rw_init_done, &c->flags)) return 0; + bch_verbose(c, "doing rw allocations"); + if (!(c->btree_update_wq = alloc_workqueue("bcachefs", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete", @@ -1286,7 +1288,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; - if (go_rw_in_recovery(c)) { + /* + * just make sure this is always allocated if we might need it - mount + * failing due to kthread_create() failing is _very_ annoying + */ + if (!(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) || + go_rw_in_recovery(c)) { /* * start workqueues/kworkers early - kthread creation checks for * pending signals, which is _very_ annoying |