summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c147
1 files changed, 66 insertions, 81 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9e1596bb208d..59ea049fe7ee 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1341,17 +1341,8 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
{
int ret;
- unsigned int nofs_flag;
- /*
- * We might be called under a transaction (e.g. indirect backref
- * resolution) which could deadlock if it triggers memory reclaim
- */
- nofs_flag = memalloc_nofs_save();
- ret = btrfs_drew_lock_init(&root->snapshot_lock);
- memalloc_nofs_restore(nofs_flag);
- if (ret)
- goto fail;
+ btrfs_drew_lock_init(&root->snapshot_lock);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
!btrfs_is_data_reloc_root(root)) {
@@ -2065,7 +2056,6 @@ void btrfs_put_root(struct btrfs_root *root)
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
- btrfs_drew_lock_destroy(&root->snapshot_lock);
free_root_extent_buffers(root);
#ifdef CONFIG_BTRFS_DEBUG
spin_lock(&root->fs_info->fs_roots_radix_lock);
@@ -2125,11 +2115,16 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
atomic_set(&fs_info->reloc_cancel_req, 0);
}
-static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
+static int btrfs_init_btree_inode(struct super_block *sb)
{
- struct inode *inode = fs_info->btree_inode;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID,
fs_info->tree_root);
+ struct inode *inode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return -ENOMEM;
inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
set_nlink(inode, 1);
@@ -2140,6 +2135,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
*/
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &btree_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
@@ -2152,6 +2148,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
BTRFS_I(inode)->location.offset = 0;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
__insert_inode_hash(inode, hash);
+ fs_info->btree_inode = inode;
+
+ return 0;
}
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
@@ -2966,7 +2965,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
atomic64_set(&fs_info->free_chunk_space, 0);
fs_info->tree_mod_log = RB_ROOT;
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
- fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
btrfs_init_ref_verify(fs_info);
fs_info->thread_pool_size = min_t(unsigned long,
@@ -3344,14 +3342,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
struct btrfs_root *tree_root;
struct btrfs_root *chunk_root;
int ret;
- int err = -EINVAL;
int level;
ret = init_mount_fs_info(fs_info, sb);
- if (ret) {
- err = ret;
+ if (ret)
goto fail;
- }
/* These need to be init'ed before we start creating inodes and such. */
tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID,
@@ -3361,17 +3356,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
GFP_KERNEL);
fs_info->chunk_root = chunk_root;
if (!tree_root || !chunk_root) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto fail;
}
- fs_info->btree_inode = new_inode(sb);
- if (!fs_info->btree_inode) {
- err = -ENOMEM;
+ ret = btrfs_init_btree_inode(sb);
+ if (ret)
goto fail;
- }
- mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
- btrfs_init_btree_inode(fs_info);
invalidate_bdev(fs_devices->latest_dev->bdev);
@@ -3380,7 +3371,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
if (IS_ERR(disk_super)) {
- err = PTR_ERR(disk_super);
+ ret = PTR_ERR(disk_super);
goto fail_alloc;
}
@@ -3392,7 +3383,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (!btrfs_supported_super_csum(csum_type)) {
btrfs_err(fs_info, "unsupported checksum algorithm: %u",
csum_type);
- err = -EINVAL;
+ ret = -EINVAL;
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
@@ -3401,7 +3392,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
ret = btrfs_init_csum_hash(fs_info, csum_type);
if (ret) {
- err = ret;
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
@@ -3412,7 +3402,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
if (btrfs_check_super_csum(fs_info, disk_super)) {
btrfs_err(fs_info, "superblock checksum mismatch");
- err = -EINVAL;
+ ret = -EINVAL;
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
@@ -3442,12 +3432,15 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
ret = btrfs_validate_mount_super(fs_info);
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
- err = -EINVAL;
+ ret = -EINVAL;
goto fail_alloc;
}
- if (!btrfs_super_root(disk_super))
+ if (!btrfs_super_root(disk_super)) {
+ btrfs_err(fs_info, "invalid superblock tree root bytenr");
+ ret = -EINVAL;
goto fail_alloc;
+ }
/* check FS state, whether FS is broken. */
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
@@ -3474,16 +3467,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->stripesize = stripesize;
ret = btrfs_parse_options(fs_info, options, sb->s_flags);
- if (ret) {
- err = ret;
+ if (ret)
goto fail_alloc;
- }
ret = btrfs_check_features(fs_info, !sb_rdonly(sb));
- if (ret < 0) {
- err = ret;
+ if (ret < 0)
goto fail_alloc;
- }
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
@@ -3503,17 +3492,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
"read-write for sector size %u with page size %lu is experimental",
sectorsize, PAGE_SIZE);
subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
- if (!subpage_info)
+ if (!subpage_info) {
+ ret = -ENOMEM;
goto fail_alloc;
+ }
btrfs_init_subpage_info(subpage_info, sectorsize);
fs_info->subpage_info = subpage_info;
}
ret = btrfs_init_workqueues(fs_info);
- if (ret) {
- err = ret;
+ if (ret)
goto fail_sb_buffer;
- }
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
@@ -3559,6 +3548,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
btrfs_free_extra_devids(fs_devices);
if (!fs_devices->latest_dev->bdev) {
btrfs_err(fs_info, "failed to read devices");
+ ret = -EIO;
goto fail_tree_roots;
}
@@ -3574,8 +3564,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
ret = btrfs_get_dev_zone_info_all_devices(fs_info);
if (ret) {
btrfs_err(fs_info,
- "zoned: failed to read device zone info: %d",
- ret);
+ "zoned: failed to read device zone info: %d", ret);
goto fail_block_groups;
}
@@ -3654,19 +3643,24 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
"writable mount is not allowed due to too many missing devices");
+ ret = -EINVAL;
goto fail_sysfs;
}
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
"btrfs-cleaner");
- if (IS_ERR(fs_info->cleaner_kthread))
+ if (IS_ERR(fs_info->cleaner_kthread)) {
+ ret = PTR_ERR(fs_info->cleaner_kthread);
goto fail_sysfs;
+ }
fs_info->transaction_kthread = kthread_run(transaction_kthread,
tree_root,
"btrfs-transaction");
- if (IS_ERR(fs_info->transaction_kthread))
+ if (IS_ERR(fs_info->transaction_kthread)) {
+ ret = PTR_ERR(fs_info->transaction_kthread);
goto fail_cleaner;
+ }
if (!btrfs_test_opt(fs_info, NOSSD) &&
!fs_info->fs_devices->rotating) {
@@ -3684,7 +3678,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->fs_devices->discardable) {
btrfs_set_and_info(fs_info, DISCARD_ASYNC,
"auto enabling async discard");
- btrfs_clear_opt(fs_info->mount_opt, NODISCARD);
}
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
@@ -3711,16 +3704,14 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
btrfs_info(fs_info, "start tree-log replay");
ret = btrfs_replay_log(fs_info, fs_devices);
- if (ret) {
- err = ret;
+ if (ret)
goto fail_qgroup;
- }
}
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
if (IS_ERR(fs_info->fs_root)) {
- err = PTR_ERR(fs_info->fs_root);
- btrfs_warn(fs_info, "failed to read fs tree: %d", err);
+ ret = PTR_ERR(fs_info->fs_root);
+ btrfs_warn(fs_info, "failed to read fs tree: %d", ret);
fs_info->fs_root = NULL;
goto fail_qgroup;
}
@@ -3797,7 +3788,8 @@ fail_alloc:
iput(fs_info->btree_inode);
fail:
btrfs_close_devices(fs_info->fs_devices);
- return err;
+ ASSERT(ret < 0);
+ return ret;
}
ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
@@ -4094,6 +4086,8 @@ static void write_dev_flush(struct btrfs_device *device)
{
struct bio *bio = &device->flush_bio;
+ device->last_flush_error = BLK_STS_OK;
+
#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
/*
* When a disk has write caching disabled, we skip submission of a bio
@@ -4122,25 +4116,24 @@ static void write_dev_flush(struct btrfs_device *device)
/*
* If the flush bio has been submitted by write_dev_flush, wait for it.
+ * Return true for any error, and false otherwise.
*/
-static blk_status_t wait_dev_flush(struct btrfs_device *device)
+static bool wait_dev_flush(struct btrfs_device *device)
{
struct bio *bio = &device->flush_bio;
- if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
- return BLK_STS_OK;
+ if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
+ return false;
- clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
wait_for_completion_io(&device->flush_wait);
- return bio->bi_status;
-}
+ if (bio->bi_status) {
+ device->last_flush_error = bio->bi_status;
+ btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS);
+ return true;
+ }
-static int check_barrier_error(struct btrfs_fs_info *fs_info)
-{
- if (!btrfs_check_rw_degradable(fs_info, NULL))
- return -EIO;
- return 0;
+ return false;
}
/*
@@ -4152,7 +4145,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
struct list_head *head;
struct btrfs_device *dev;
int errors_wait = 0;
- blk_status_t ret;
lockdep_assert_held(&info->fs_devices->device_list_mutex);
/* send down all the barriers */
@@ -4167,7 +4159,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
continue;
write_dev_flush(dev);
- dev->last_flush_error = BLK_STS_OK;
}
/* wait for all the barriers */
@@ -4182,23 +4173,17 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
- ret = wait_dev_flush(dev);
- if (ret) {
- dev->last_flush_error = ret;
- btrfs_dev_stat_inc_and_print(dev,
- BTRFS_DEV_STAT_FLUSH_ERRS);
+ if (wait_dev_flush(dev))
errors_wait++;
- }
}
- if (errors_wait) {
- /*
- * At some point we need the status of all disks
- * to arrive at the volume status. So error checking
- * is being pushed to a separate loop.
- */
- return check_barrier_error(info);
- }
+ /*
+ * Checks last_flush_error of disks in order to determine the device
+ * state.
+ */
+ if (errors_wait && !btrfs_check_rw_degradable(info, NULL))
+ return -EIO;
+
return 0;
}
@@ -4404,12 +4389,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
root_objectid = gang[i]->root_key.objectid;
err = btrfs_orphan_cleanup(gang[i]);
if (err)
- break;
+ goto out;
btrfs_put_root(gang[i]);
}
root_objectid++;
}
-
+out:
/* release the uncleaned roots due to error */
for (; i < ret; i++) {
if (gang[i])