summaryrefslogtreecommitdiff
path: root/fs/btrfs/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r--fs/btrfs/bio.c211
1 files changed, 123 insertions, 88 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 726592868e9c..5379c4714905 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -31,11 +31,11 @@ struct btrfs_failed_bio {
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
-void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode,
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
btrfs_bio_end_io_t end_io, void *private)
{
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
- bbio->inode = inode;
+ bbio->fs_info = fs_info;
bbio->end_io = end_io;
bbio->private = private;
atomic_set(&bbio->pending_ios, 1);
@@ -48,41 +48,58 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode,
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
*/
-struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
- struct btrfs_inode *inode,
- btrfs_bio_end_io_t end_io, void *private)
+struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+ struct btrfs_fs_info *fs_info,
+ btrfs_bio_end_io_t end_io, void *private)
{
+ struct btrfs_bio *bbio;
struct bio *bio;
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
- btrfs_bio_init(btrfs_bio(bio), inode, end_io, private);
- return bio;
+ bbio = btrfs_bio(bio);
+ btrfs_bio_init(bbio, fs_info, end_io, private);
+ return bbio;
}
-static struct bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
- struct bio *orig, u64 map_length,
- bool use_append)
+static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
{
- struct btrfs_bio *orig_bbio = btrfs_bio(orig);
+ struct btrfs_ordered_extent *ordered;
+ int ret;
+
+ ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
+ if (WARN_ON_ONCE(!ordered))
+ return BLK_STS_IOERR;
+ ret = btrfs_extract_ordered_extent(bbio, ordered);
+ btrfs_put_ordered_extent(ordered);
+
+ return errno_to_blk_status(ret);
+}
+
+static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
+ struct btrfs_bio *orig_bbio,
+ u64 map_length, bool use_append)
+{
+ struct btrfs_bio *bbio;
struct bio *bio;
if (use_append) {
unsigned int nr_segs;
- bio = bio_split_rw(orig, &fs_info->limits, &nr_segs,
+ bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs,
&btrfs_clone_bioset, map_length);
} else {
- bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
- &btrfs_clone_bioset);
+ bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT,
+ GFP_NOFS, &btrfs_clone_bioset);
}
- btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio);
-
- btrfs_bio(bio)->file_offset = orig_bbio->file_offset;
- if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED))
+ bbio = btrfs_bio(bio);
+ btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
+ bbio->inode = orig_bbio->inode;
+ bbio->file_offset = orig_bbio->file_offset;
+ if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
orig_bbio->file_offset += map_length;
atomic_inc(&orig_bbio->pending_ios);
- return bio;
+ return bbio;
}
static void btrfs_orig_write_end_io(struct bio *bio);
@@ -164,7 +181,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
goto done;
}
- btrfs_submit_bio(&repair_bbio->bio, mirror);
+ btrfs_submit_bio(repair_bbio, mirror);
return;
}
@@ -224,15 +241,16 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
&btrfs_repair_bioset);
repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
- bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
+ __bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
repair_bbio = btrfs_bio(repair_bio);
- btrfs_bio_init(repair_bbio, failed_bbio->inode, NULL, fbio);
+ btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
+ repair_bbio->inode = failed_bbio->inode;
repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;
mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
- btrfs_submit_bio(repair_bio, mirror);
+ btrfs_submit_bio(repair_bbio, mirror);
return fbio;
}
@@ -246,6 +264,9 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
struct btrfs_failed_bio *fbio = NULL;
u32 offset = 0;
+ /* Read-repair requires the inode field to be set by the submitter. */
+ ASSERT(inode);
+
/*
* Hand off repair bios to the repair code as there is no upper level
* submitter for them.
@@ -306,17 +327,17 @@ static void btrfs_end_bio_work(struct work_struct *work)
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
/* Metadata reads are checked and repaired by the submitter. */
- if (bbio->bio.bi_opf & REQ_META)
- bbio->end_io(bbio);
- else
+ if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
+ else
+ bbio->end_io(bbio);
}
static void btrfs_simple_end_io(struct bio *bio)
{
struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_device *dev = bio->bi_private;
- struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
btrfs_bio_counter_dec(fs_info);
@@ -340,7 +361,8 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
- if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META))
+ if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
+ !(bbio->bio.bi_opf & REQ_META))
btrfs_check_read_bio(bbio, NULL);
else
btrfs_orig_bbio_end_io(bbio);
@@ -418,7 +440,11 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
dev->devid, bio->bi_iter.bi_size);
btrfsic_check_bio(bio);
- submit_bio(bio);
+
+ if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
+ blkcg_punt_bio_submit(bio);
+ else
+ submit_bio(bio);
}
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
@@ -534,10 +560,10 @@ static void run_one_async_done(struct btrfs_work *work)
/*
* All of the bios that pass through here are from async helpers.
- * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
- * This changes nothing when cgroups aren't in use.
+ * Use REQ_BTRFS_CGROUP_PUNT to issue them from the owning cgroup's
+ * context. This changes nothing when cgroups aren't in use.
*/
- bio->bi_opf |= REQ_CGROUP_PUNT;
+ bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;
__btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
}
@@ -562,7 +588,7 @@ static bool should_async_write(struct btrfs_bio *bbio)
* in order.
*/
if (bbio->bio.bi_opf & REQ_META) {
- struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
if (btrfs_is_zoned(fs_info))
return false;
@@ -582,7 +608,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
- struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
@@ -603,12 +629,12 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
return true;
}
-static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
+static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
- struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_inode *inode = bbio->inode;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
struct btrfs_bio *orig_bbio = bbio;
+ struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
@@ -631,15 +657,15 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
map_length = min(map_length, fs_info->max_zone_append_size);
if (map_length < length) {
- bio = btrfs_split_bio(fs_info, bio, map_length, use_append);
- bbio = btrfs_bio(bio);
+ bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append);
+ bio = &bbio->bio;
}
/*
* Save the iter for the end_io handler and preload the checksums for
* data reads.
*/
- if (bio_op(bio) == REQ_OP_READ && !(bio->bi_opf & REQ_META)) {
+ if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
@@ -650,7 +676,7 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
if (use_append) {
bio->bi_opf &= ~REQ_OP_WRITE;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
- ret = btrfs_extract_ordered_extent(btrfs_bio(bio));
+ ret = btrfs_bio_extract_ordered_extent(bbio);
if (ret)
goto fail_put_bio;
}
@@ -659,7 +685,7 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
* Csum items for reloc roots have already been cloned at this
* point, so they are handled as part of the no-checksum case.
*/
- if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
+ if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
!btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) &&
@@ -686,9 +712,12 @@ fail:
return true;
}
-void btrfs_submit_bio(struct bio *bio, int mirror_num)
+void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
{
- while (!btrfs_submit_chunk(bio, mirror_num))
+ /* If bbio->inode is not populated, its file_offset must be 0. */
+ ASSERT(bbio->inode || bbio->file_offset == 0);
+
+ while (!btrfs_submit_chunk(bbio, mirror_num))
;
}
@@ -706,12 +735,9 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num)
{
- struct btrfs_device *dev;
+ struct btrfs_io_stripe smap = { 0 };
struct bio_vec bvec;
struct bio bio;
- u64 map_length = 0;
- u64 sector;
- struct btrfs_io_context *bioc = NULL;
int ret = 0;
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
@@ -720,68 +746,38 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
if (btrfs_repair_one_zone(fs_info, logical))
return 0;
- map_length = length;
-
/*
* Avoid races with device replace and make sure our bioc has devices
* associated to its stripes that don't go away while we are doing the
* read repair operation.
*/
btrfs_bio_counter_inc_blocked(fs_info);
- if (btrfs_is_parity_mirror(fs_info, logical, length)) {
- /*
- * Note that we don't use BTRFS_MAP_WRITE because it's supposed
- * to update all raid stripes, but here we just want to correct
- * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
- * stripe's dev and sector.
- */
- ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
- &map_length, &bioc, 0);
- if (ret)
- goto out_counter_dec;
- ASSERT(bioc->mirror_num == 1);
- } else {
- ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
- &map_length, &bioc, mirror_num);
- if (ret)
- goto out_counter_dec;
- /*
- * This happens when dev-replace is also running, and the
- * mirror_num indicates the dev-replace target.
- *
- * In this case, we don't need to do anything, as the read
- * error just means the replace progress hasn't reached our
- * read range, and later replace routine would handle it well.
- */
- if (mirror_num != bioc->mirror_num)
- goto out_counter_dec;
- }
-
- sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
- dev = bioc->stripes[bioc->mirror_num - 1].dev;
- btrfs_put_bioc(bioc);
+ ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
+ if (ret < 0)
+ goto out_counter_dec;
- if (!dev || !dev->bdev ||
- !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
+ if (!smap.dev->bdev ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) {
ret = -EIO;
goto out_counter_dec;
}
- bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
- bio.bi_iter.bi_sector = sector;
+ bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
+ bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
__bio_add_page(&bio, page, length, pg_offset);
btrfsic_check_bio(&bio);
ret = submit_bio_wait(&bio);
if (ret) {
/* try to remap that extent elsewhere? */
- btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
+ btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
goto out_bio_uninit;
}
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- ino, start, btrfs_dev_name(dev), sector);
+ ino, start, btrfs_dev_name(smap.dev),
+ smap.physical >> SECTOR_SHIFT);
ret = 0;
out_bio_uninit:
@@ -791,6 +787,45 @@ out_counter_dec:
return ret;
}
+/*
+ * Submit a btrfs_bio based repair write.
+ *
+ * If @dev_replace is true, the write would be submitted to dev-replace target.
+ */
+void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
+{
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
+ u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 length = bbio->bio.bi_iter.bi_size;
+ struct btrfs_io_stripe smap = { 0 };
+ int ret;
+
+ ASSERT(fs_info);
+ ASSERT(mirror_num > 0);
+ ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
+ ASSERT(!bbio->inode);
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
+ if (ret < 0)
+ goto fail;
+
+ if (dev_replace) {
+ if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
+ bbio->bio.bi_opf &= ~REQ_OP_WRITE;
+ bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
+ }
+ ASSERT(smap.dev == fs_info->dev_replace.srcdev);
+ smap.dev = fs_info->dev_replace.tgtdev;
+ }
+ __btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
+ return;
+
+fail:
+ btrfs_bio_counter_dec(fs_info);
+ btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
+}
+
int __init btrfs_bioset_init(void)
{
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,