summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-08-27 16:32:01 -0600
committerJens Axboe <axboe@kernel.dk>2021-08-27 16:32:01 -0600
commit461d971215dfb55bcd5f7d040b2b222592040f95 (patch)
tree2b1baab8e9462a35f47d16ca98cc937584e52f93
parent7ee656c3ac3d047b4cf1269f83ac9d6c0bba916b (diff)
parent6607cd319b6b91bff94e90f798a61c031650b514 (diff)
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.15/drivers
Pull MD changes from Song. * 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: raid1: ensure write behind bio has less than BIO_MAX_VECS sectors md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
-rw-r--r--drivers/md/raid1.c19
-rw-r--r--drivers/md/raid10.c14
2 files changed, 29 insertions, 4 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 51f2547c2007..21b348408478 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1331,6 +1331,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct raid1_plug_cb *plug = NULL;
int first_clone;
int max_sectors;
+ bool write_behind = false;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1383,6 +1384,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+
+ /*
+ * The write-behind io is only attempted on drives marked as
+ * write-mostly, which means we could allocate write behind
+ * bio later.
+ */
+ if (rdev && test_bit(WriteMostly, &rdev->flags))
+ write_behind = true;
+
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
@@ -1456,6 +1466,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
goto retry_write;
}
+ /*
+ * When using a bitmap, we may call alloc_behind_master_bio below.
+ * alloc_behind_master_bio allocates a copy of the data payload a page
+ * at a time and thus needs a new bio that can fit the whole payload
+ * this bio in page sized chunks.
+ */
+ if (write_behind && bitmap)
+ max_sectors = min_t(int, max_sectors,
+ BIO_MAX_VECS * (PAGE_SIZE >> 9));
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
GFP_NOIO, &conf->bio_split);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 16977e8e075d..d5d92337e35e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1712,6 +1712,11 @@ retry_discard:
} else
r10_bio->master_bio = (struct bio *)first_r10bio;
+ /*
+ * first select target devices under rcu_lock and
+ * inc refcount on their rdev. Record them by setting
+ * bios[x] to bio
+ */
rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1743,9 +1748,6 @@ retry_discard:
for (disk = 0; disk < geo->raid_disks; disk++) {
sector_t dev_start, dev_end;
struct bio *mbio, *rbio = NULL;
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[disk].replacement);
/*
* Now start to calculate the start and end address for each disk.
@@ -1775,9 +1777,12 @@ retry_discard:
/*
* It only handles discard bio which size is >= stripe size, so
- * dev_end > dev_start all the time
+ * dev_end > dev_start all the time.
+ * It doesn't need to use rcu lock to get rdev here. We already
+ * add rdev->nr_pending in the first loop.
*/
if (r10_bio->devs[disk].bio) {
+ struct md_rdev *rdev = conf->mirrors[disk].rdev;
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
mbio->bi_end_io = raid10_end_discard_request;
mbio->bi_private = r10_bio;
@@ -1790,6 +1795,7 @@ retry_discard:
bio_endio(mbio);
}
if (r10_bio->devs[disk].repl_bio) {
+ struct md_rdev *rrdev = conf->mirrors[disk].replacement;
rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
rbio->bi_end_io = raid10_end_discard_request;
rbio->bi_private = r10_bio;