summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c269
1 files changed, 149 insertions, 120 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f556c3732c2c..2d4667594681 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -319,9 +319,9 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
- printk_ratelimited(KERN_WARNING
- "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
- "level %d\n",
+ btrfs_warn_rl(fs_info,
+ "%s checksum verify failed on %llu wanted %X found %X "
+ "level %d",
fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
@@ -368,9 +368,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0;
goto out;
}
- printk_ratelimited(KERN_ERR
- "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
- eb->fs_info->sb->s_id, eb->start,
+ btrfs_err_rl(eb->fs_info,
+ "parent transid verify failed on %llu wanted %llu found %llu",
+ eb->start,
parent_transid, btrfs_header_generation(eb));
ret = 1;
@@ -629,15 +629,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
- "%llu %llu\n",
- eb->fs_info->sb->s_id, found_start, eb->start);
+ btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
+ found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root->fs_info, eb)) {
- printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
- eb->fs_info->sb->s_id, eb->start);
+ btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
+ eb->start);
ret = -EIO;
goto err;
}
@@ -703,7 +702,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
return -EIO; /* we fixed nothing */
}
-static void end_workqueue_bio(struct bio *bio, int err)
+static void end_workqueue_bio(struct bio *bio)
{
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
@@ -711,7 +710,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
btrfs_work_func_t func;
fs_info = end_io_wq->info;
- end_io_wq->error = err;
+ end_io_wq->error = bio->bi_error;
if (bio->bi_rw & REQ_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -802,13 +801,17 @@ static void run_one_async_done(struct btrfs_work *work)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
+ /*
+ * atomic_dec_return implies a barrier for waitqueue_active
+ */
if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
/* If an error occured we just want to clean up the bio and move on */
if (async->error) {
- bio_endio(async->bio, async->error);
+ async->bio->bi_error = async->error;
+ bio_endio(async->bio);
return;
}
@@ -908,8 +911,10 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* submission context. Just jump into btrfs_map_bio
*/
ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
- if (ret)
- bio_endio(bio, ret);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
return ret;
}
@@ -960,10 +965,13 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
__btree_submit_bio_done);
}
- if (ret) {
+ if (ret)
+ goto out_w_error;
+ return 0;
+
out_w_error:
- bio_endio(bio, ret);
- }
+ bio->bi_error = ret;
+ bio_endio(bio);
return ret;
}
@@ -1259,6 +1267,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1);
atomic_set(&root->will_be_snapshoted, 0);
+ atomic_set(&root->qgroup_meta_rsv, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
@@ -1724,6 +1733,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
bdi->congested_fn = btrfs_congested_fn;
bdi->congested_data = info;
+ bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
return 0;
}
@@ -1735,16 +1745,15 @@ static void end_workqueue_fn(struct btrfs_work *work)
{
struct bio *bio;
struct btrfs_end_io_wq *end_io_wq;
- int error;
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
- error = end_io_wq->error;
+ bio->bi_error = end_io_wq->error;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
- bio_endio(bio, error);
+ bio_endio(bio);
}
static int cleaner_kthread(void *arg)
@@ -1753,6 +1762,7 @@ static int cleaner_kthread(void *arg)
int again;
struct btrfs_trans_handle *trans;
+ set_freezable();
do {
again = 0;
@@ -2342,8 +2352,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
- printk(KERN_WARNING "BTRFS: log replay required "
- "on RO media\n");
+ btrfs_warn(fs_info, "log replay required on RO media");
return -EIO;
}
@@ -2358,12 +2367,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
log_tree_root->node = read_tree_block(tree_root, bytenr,
fs_info->generation + 1);
if (IS_ERR(log_tree_root->node)) {
- printk(KERN_ERR "BTRFS: failed to read log tree\n");
+ btrfs_warn(fs_info, "failed to read log tree");
ret = PTR_ERR(log_tree_root->node);
kfree(log_tree_root);
return ret;
} else if (!extent_buffer_uptodate(log_tree_root->node)) {
- printk(KERN_ERR "BTRFS: failed to read log tree\n");
+ btrfs_err(fs_info, "failed to read log tree");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
return -EIO;
@@ -2371,7 +2380,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
if (ret) {
- btrfs_error(tree_root->fs_info, ret,
+ btrfs_std_error(tree_root->fs_info, ret,
"Failed to recover log tree");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
@@ -2608,7 +2617,6 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->ordered_operations_mutex);
- mutex_init(&fs_info->ordered_extent_flush_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2648,8 +2656,8 @@ int open_ctree(struct super_block *sb,
* Read super block and check the signature bytes only
*/
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
- if (!bh) {
- err = -EINVAL;
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
goto fail_alloc;
}
@@ -2842,6 +2850,8 @@ int open_ctree(struct super_block *sb,
!extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
+ if (!IS_ERR(chunk_root->node))
+ free_extent_buffer(chunk_root->node);
chunk_root->node = NULL;
goto fail_tree_roots;
}
@@ -2880,6 +2890,8 @@ retry_root_backup:
!extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
+ if (!IS_ERR(tree_root->node))
+ free_extent_buffer(tree_root->node);
tree_root->node = NULL;
goto recovery_tree_root;
}
@@ -2928,7 +2940,7 @@ retry_root_backup:
goto fail_fsdev_sysfs;
}
- ret = btrfs_sysfs_add_one(fs_info);
+ ret = btrfs_sysfs_add_mounted(fs_info);
if (ret) {
pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
goto fail_fsdev_sysfs;
@@ -2950,8 +2962,9 @@ retry_root_backup:
if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures &&
!(sb->s_flags & MS_RDONLY)) {
- printk(KERN_WARNING "BTRFS: "
- "too many missing devices, writeable mount is not allowed\n");
+ pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
+ fs_info->fs_devices->missing_devices,
+ fs_info->num_tolerated_disk_barrier_failures);
goto fail_sysfs;
}
@@ -3107,7 +3120,7 @@ fail_cleaner:
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
fail_sysfs:
- btrfs_sysfs_remove_one(fs_info);
+ btrfs_sysfs_remove_mounted(fs_info);
fail_fsdev_sysfs:
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
@@ -3169,8 +3182,8 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
- printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
- "I/O error on %s\n",
+ btrfs_warn_rl_in_rcu(device->dev_root->fs_info,
+ "lost page write due to IO error on %s",
rcu_str_deref(device->name));
/* note, we dont' set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
@@ -3182,6 +3195,37 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
put_bh(bh);
}
+int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
+ struct buffer_head **bh_ret)
+{
+ struct buffer_head *bh;
+ struct btrfs_super_block *super;
+ u64 bytenr;
+
+ bytenr = btrfs_sb_offset(copy_num);
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
+ return -EINVAL;
+
+ bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE);
+ /*
+ * If we fail to read from the underlying devices, as of now
+ * the best option we have is to mark it EIO.
+ */
+ if (!bh)
+ return -EIO;
+
+ super = (struct btrfs_super_block *)bh->b_data;
+ if (btrfs_super_bytenr(super) != bytenr ||
+ btrfs_super_magic(super) != BTRFS_MAGIC) {
+ brelse(bh);
+ return -EINVAL;
+ }
+
+ *bh_ret = bh;
+ return 0;
+}
+
+
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
{
struct buffer_head *bh;
@@ -3189,7 +3233,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
struct btrfs_super_block *super;
int i;
u64 transid = 0;
- u64 bytenr;
+ int ret = -EINVAL;
/* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
@@ -3197,21 +3241,11 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
for (i = 0; i < 1; i++) {
- bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE >=
- i_size_read(bdev->bd_inode))
- break;
- bh = __bread(bdev, bytenr / 4096,
- BTRFS_SUPER_INFO_SIZE);
- if (!bh)
+ ret = btrfs_read_dev_one_super(bdev, i, &bh);
+ if (ret)
continue;
super = (struct btrfs_super_block *)bh->b_data;
- if (btrfs_super_bytenr(super) != bytenr ||
- btrfs_super_magic(super) != BTRFS_MAGIC) {
- brelse(bh);
- continue;
- }
if (!latest || btrfs_super_generation(super) > transid) {
brelse(latest);
@@ -3221,6 +3255,10 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
brelse(bh);
}
}
+
+ if (!latest)
+ return ERR_PTR(ret);
+
return latest;
}
@@ -3289,8 +3327,9 @@ static int write_dev_supers(struct btrfs_device *device,
bh = __getblk(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
- printk(KERN_ERR "BTRFS: couldn't get super "
- "buffer head for bytenr %Lu\n", bytenr);
+ btrfs_err(device->dev_root->fs_info,
+ "couldn't get super buffer head for bytenr %llu",
+ bytenr);
errors++;
continue;
}
@@ -3324,10 +3363,8 @@ static int write_dev_supers(struct btrfs_device *device,
* endio for the write_dev_flush, this will wake anyone waiting
* for the barrier when it is done
*/
-static void btrfs_end_empty_barrier(struct bio *bio, int err)
+static void btrfs_end_empty_barrier(struct bio *bio)
{
- if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (bio->bi_private)
complete(bio->bi_private);
bio_put(bio);
@@ -3355,8 +3392,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
wait_for_completion(&device->flush_wait);
- if (!bio_flagged(bio, BIO_UPTODATE)) {
- ret = -EIO;
+ if (bio->bi_error) {
+ ret = bio->bi_error;
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
@@ -3439,6 +3476,35 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
return 0;
}
+int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
+{
+ int raid_type;
+ int min_tolerated = INT_MAX;
+
+ if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
+ (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
+ min_tolerated = min(min_tolerated,
+ btrfs_raid_array[BTRFS_RAID_SINGLE].
+ tolerated_failures);
+
+ for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
+ if (raid_type == BTRFS_RAID_SINGLE)
+ continue;
+ if (!(flags & btrfs_raid_group[raid_type]))
+ continue;
+ min_tolerated = min(min_tolerated,
+ btrfs_raid_array[raid_type].
+ tolerated_failures);
+ }
+
+ if (min_tolerated == INT_MAX) {
+ pr_warn("BTRFS: unknown raid flag: %llu\n", flags);
+ min_tolerated = 0;
+ }
+
+ return min_tolerated;
+}
+
int btrfs_calc_num_tolerated_disk_barrier_failures(
struct btrfs_fs_info *fs_info)
{
@@ -3448,13 +3514,12 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
- int num_types = 4;
int i;
int c;
int num_tolerated_disk_barrier_failures =
(int)fs_info->fs_devices->num_devices;
- for (i = 0; i < num_types; i++) {
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
struct btrfs_space_info *tmp;
sinfo = NULL;
@@ -3472,44 +3537,21 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
down_read(&sinfo->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
- if (!list_empty(&sinfo->block_groups[c])) {
- u64 flags;
-
- btrfs_get_block_group_info(
- &sinfo->block_groups[c], &space);
- if (space.total_bytes == 0 ||
- space.used_bytes == 0)
- continue;
- flags = space.flags;
- /*
- * return
- * 0: if dup, single or RAID0 is configured for
- * any of metadata, system or data, else
- * 1: if RAID5 is configured, or if RAID1 or
- * RAID10 is configured and only two mirrors
- * are used, else
- * 2: if RAID6 is configured, else
- * num_mirrors - 1: if RAID1 or RAID10 is
- * configured and more than
- * 2 mirrors are used.
- */
- if (num_tolerated_disk_barrier_failures > 0 &&
- ((flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID0)) ||
- ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
- == 0)))
- num_tolerated_disk_barrier_failures = 0;
- else if (num_tolerated_disk_barrier_failures > 1) {
- if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID10)) {
- num_tolerated_disk_barrier_failures = 1;
- } else if (flags &
- BTRFS_BLOCK_GROUP_RAID6) {
- num_tolerated_disk_barrier_failures = 2;
- }
- }
- }
+ u64 flags;
+
+ if (list_empty(&sinfo->block_groups[c]))
+ continue;
+
+ btrfs_get_block_group_info(&sinfo->block_groups[c],
+ &space);
+ if (space.total_bytes == 0 || space.used_bytes == 0)
+ continue;
+ flags = space.flags;
+
+ num_tolerated_disk_barrier_failures = min(
+ num_tolerated_disk_barrier_failures,
+ btrfs_get_num_tolerated_disk_barrier_failures(
+ flags));
}
up_read(&sinfo->groups_sem);
}
@@ -3544,7 +3586,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (ret) {
mutex_unlock(
&root->fs_info->fs_devices->device_list_mutex);
- btrfs_error(root->fs_info, ret,
+ btrfs_std_error(root->fs_info, ret,
"errors while submitting device barriers.");
return ret;
}
@@ -3584,7 +3626,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
/* FUA is masked off if unsupported and can't be the reason */
- btrfs_error(root->fs_info, -EIO,
+ btrfs_std_error(root->fs_info, -EIO,
"%d errors while writing supers", total_errors);
return -EIO;
}
@@ -3602,7 +3644,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (total_errors > max_errors) {
- btrfs_error(root->fs_info, -EIO,
+ btrfs_std_error(root->fs_info, -EIO,
"%d errors while writing supers", total_errors);
return -EIO;
}
@@ -3760,6 +3802,13 @@ void close_ctree(struct btrfs_root *root)
cancel_work_sync(&fs_info->async_reclaim_work);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
+ /*
+ * If the cleaner thread is stopped and there are
+ * block groups queued for removal, the deletion will be
+ * skipped when we quit the cleaner thread.
+ */
+ btrfs_delete_unused_bgs(root->fs_info);
+
ret = btrfs_commit_super(root);
if (ret)
btrfs_err(fs_info, "commit super ret %d", ret);
@@ -3781,7 +3830,7 @@ void close_ctree(struct btrfs_root *root)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
- btrfs_sysfs_remove_one(fs_info);
+ btrfs_sysfs_remove_mounted(fs_info);
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
btrfs_free_fs_roots(fs_info);
@@ -4279,25 +4328,6 @@ again:
return 0;
}
-static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_ordered_extent *ordered;
-
- spin_lock(&fs_info->trans_lock);
- while (!list_empty(&cur_trans->pending_ordered)) {
- ordered = list_first_entry(&cur_trans->pending_ordered,
- struct btrfs_ordered_extent,
- trans_list);
- list_del_init(&ordered->trans_list);
- spin_unlock(&fs_info->trans_lock);
-
- btrfs_put_ordered_extent(ordered);
- spin_lock(&fs_info->trans_lock);
- }
- spin_unlock(&fs_info->trans_lock);
-}
-
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
@@ -4309,7 +4339,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&root->fs_info->transaction_wait);
- btrfs_free_pending_ordered(cur_trans, root->fs_info);
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);