diff options
-rw-r--r-- | fs/bcachefs/bcachefs.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/fs-io-buffered.c | 56 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/io_read.c | 55 | ||||
-rw-r--r-- | fs/bcachefs/sb-counters_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/sb-errors_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 2 |
9 files changed, 91 insertions, 46 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 553031a3b06a..83d6ab9c1a91 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -458,7 +458,6 @@ BCH_DEBUG_PARAMS_ALL() x(btree_node_compact) \ x(btree_node_merge) \ x(btree_node_sort) \ - x(btree_node_get) \ x(btree_node_read) \ x(btree_node_read_done) \ x(btree_node_write) \ @@ -466,10 +465,6 @@ BCH_DEBUG_PARAMS_ALL() x(btree_interior_update_total) \ x(btree_gc) \ x(data_write) \ - x(data_write_to_submit) \ - x(data_write_to_queue) \ - x(data_write_to_btree_update) \ - x(data_write_btree_update) \ x(data_read) \ x(data_promote) \ x(journal_flush_write) \ @@ -483,6 +478,7 @@ BCH_DEBUG_PARAMS_ALL() x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ + x(blocked_writeback_throttle) \ x(nocow_lock_contended) enum bch_time_stats { diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index adc1f9315eab..420f6922dacb 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -345,6 +345,7 @@ x(BCH_ERR_data_read, data_read_no_encryption_key) \ x(BCH_ERR_data_read, data_read_buffer_too_small) \ x(BCH_ERR_data_read, data_read_key_overwritten) \ + x(0, rbio_narrow_crcs_fail) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index aab30571b056..fe684adca370 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -532,6 +532,39 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); } +static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closure *cl) +{ + unsigned reserved = OPEN_BUCKETS_COUNT - + (OPEN_BUCKETS_COUNT - bch2_open_buckets_reserved(BCH_WATERMARK_normal)) / 2; + + if (unlikely(c->open_buckets_nr_free <= reserved)) { + closure_wait(&c->open_buckets_wait, cl); + return false; + } + + if (BCH_WATERMARK_normal < c->journal.watermark && !bch2_journal_error(&c->journal)) { + closure_wait(&c->journal.async_wait, cl); + return false; + } + + return true; +} + +static void throttle_writes(struct bch_fs *c, unsigned replicas_want, struct closure *cl) +{ + u64 start = 0; + while (!can_write_now(c, replicas_want, cl)) { + if (!start) + start = local_clock(); + closure_sync(cl); + } + + BUG_ON(closure_nr_remaining(cl) > 1); + + if (start) + bch2_time_stats_update(&c->times[BCH_TIME_blocked_writeback_throttle], start); +} + static int __bch2_writepage(struct folio *folio, struct writeback_control *wbc, void *data) @@ -667,17 +700,6 @@ do_io: return 0; } -static int bch2_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, void *data) -{ - struct folio *folio = NULL; - int error; - - while ((folio = writeback_iter(mapping, wbc, folio, &error))) - error = __bch2_writepage(folio, wbc, data); - return error; -} - int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct bch_fs *c = mapping->host->i_sb->s_fs_info; @@ -686,7 +708,17 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc bch2_inode_opts_get_inode(c, &to_bch_ei(mapping->host)->ei_inode, &w->opts); blk_start_plug(&w->plug); - int ret = bch2_write_cache_pages(mapping, wbc, w); + + struct closure cl; + closure_init_stack(&cl); + + struct folio *folio = NULL; + int ret = 0; + + while (throttle_writes(c, w->opts.data_replicas, &cl), + (folio = writeback_iter(mapping, wbc, folio, &ret))) + ret = __bch2_writepage(folio, wbc, w); + if (w->io) bch2_writepage_do_io(w); blk_finish_plug(&w->plug); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c3839af067f2..d6a2031e17e8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2147,9 +2147,11 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); int ret = bch2_inode_rm(c, inode_inum(inode)); if (ret && !bch2_err_matches(ret, EROFS)) { - bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu", - inode->ei_inum.subvol, - inode->ei_inum.inum); + CLASS(printbuf, buf)(); + bch2_trans_do(c, bch2_inum_to_path(trans, inode->ei_inum, &buf)); + + bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu\n%s", + inode->ei_inum.subvol, inode->ei_inum.inum, buf.buf); bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm); } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 655ed90b2a39..543627fb58be 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1359,7 +1359,7 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - return ret ?: bch_err_throw(c, transaction_restart_nested); + return ret; } /* @@ -1398,7 +1398,8 @@ next_parent: int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { return __bch2_inode_rm_snapshot(trans, inum, snapshot) ?: - delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); + delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)) ?: + bch_err_throw(trans->c, transaction_restart_nested); } static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 7066be2701c0..e7ba0d0bf5ef 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -740,15 +740,13 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, } static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, - struct bch_read_bio *rbio) + struct bch_read_bio *rbio, + struct bch_extent_crc_unpacked *new_crc) { struct bch_fs *c = rbio->c; u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; int ret = 0; - if (crc_is_compressed(rbio->pick.crc)) - return 0; - CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) @@ -756,21 +754,12 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (bversion_cmp(k.k->bversion, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) - return 0; + return bch_err_throw(c, rbio_narrow_crcs_fail); - /* Extent was merged? */ - if (bkey_start_offset(k.k) < data_offset || - k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) - return 0; - - struct bch_extent_crc_unpacked new_crc; - if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, - rbio->pick.crc, NULL, &new_crc, - bkey_start_offset(k.k) - data_offset, k.k->size, - rbio->pick.crc.csum_type)) { - bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); - return 0; - } + /* Extent was trimmed/merged? */ + if (!bpos_eq(bkey_start_pos(k.k), rbio->data_pos) || + k.k->p.offset != rbio->data_pos.offset + rbio->pick.crc.live_size) + return bch_err_throw(c, rbio_narrow_crcs_fail); /* * going to be temporarily appending another checksum entry: @@ -782,17 +771,37 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, bkey_reassemble(new, k); - if (!bch2_bkey_narrow_crcs(new, new_crc)) - return 0; + if (!bch2_bkey_narrow_crcs(new, *new_crc)) + return bch_err_throw(c, rbio_narrow_crcs_fail); return bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node); } static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - CLASS(btree_trans, trans)(rbio->c); - commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + struct bch_fs *c = rbio->c; + + if (crc_is_compressed(rbio->pick.crc)) + return; + + u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; + + struct bch_extent_crc_unpacked new_crc; + if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, + rbio->pick.crc, NULL, &new_crc, + rbio->data_pos.offset - data_offset, rbio->pick.crc.live_size, + rbio->pick.crc.csum_type)) { + bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); + return; + } + + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio, &new_crc)); + if (!ret) + count_event(c, io_read_narrow_crcs); + else if (ret == -BCH_ERR_rbio_narrow_crcs_fail) + count_event(c, io_read_narrow_crcs_fail); } static void bch2_read_decompress_err(struct work_struct *work) diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index 17cd617664d9..3907ba7edff2 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -23,6 +23,8 @@ enum counters_flags { x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ x(io_read_fail_and_poison, 95, TYPE_COUNTER) \ + x(io_read_narrow_crcs, 97, TYPE_COUNTER) \ + x(io_read_narrow_crcs_fail, 98, TYPE_COUNTER) \ x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 7c6f18a1ee2a..728d878057af 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -160,7 +160,7 @@ enum bch_fsck_flags { x(extent_ptrs_unwritten, 140, 0) \ x(extent_ptrs_written_and_unwritten, 141, 0) \ x(ptr_to_invalid_device, 142, 0) \ - x(ptr_to_removed_device, 322, 0) \ + x(ptr_to_removed_device, 322, FSCK_AUTOFIX) \ x(ptr_to_duplicate_device, 143, 0) \ x(ptr_after_last_bucket, 144, 0) \ x(ptr_before_first_bucket, 145, 0) \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 97eb420cd2d4..473ad4b51180 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1211,12 +1211,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch2_opts_apply(&c->opts, *opts); +#ifdef __KERNEL__ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && c->opts.block_size > PAGE_SIZE) { bch_err(c, "cannot mount bs > ps filesystem without CONFIG_TRANSPARENT_HUGEPAGE"); ret = -EINVAL; goto err; } +#endif c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; if (c->opts.inodes_use_key_cache) |