summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2018-04-17 11:09:08 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2018-04-17 11:09:08 +1000
commitab67b25626811e0b1b0bb01906ddf4de2a1075ec (patch)
treeab863fa788f6db47472ffc5cc875b6e3feff81ea
parentc7ce64a93f3f516bed419984064ad53cec49f616 (diff)
parent50c35295db950dae16c9ecc59eb70e9c2b6fc167 (diff)
Merge remote-tracking branch 'md/for-next'
-rw-r--r--drivers/md/md.c194
-rw-r--r--drivers/md/md.h23
-rw-r--r--drivers/md/raid1.c25
3 files changed, 173 insertions, 69 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3bea45e8ccff..ec86ed16ec2f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -367,6 +367,7 @@ void mddev_suspend(struct mddev *mddev)
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
smp_mb__after_atomic();
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+ wait_event(mddev->sb_wait, atomic_read(&mddev->flush_io) == 0);
mddev->pers->quiesce(mddev, 1);
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
@@ -412,30 +413,77 @@ static int md_congested(void *data, int bits)
/*
* Generic flush handling for md
*/
+static void submit_flushes(struct work_struct *ws)
+{
+ struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
+ struct mddev *mddev = fi->mddev;
+ struct bio *fbio = fi->fbio;
-static void md_end_flush(struct bio *bio)
+ fi->fbio = NULL;
+ atomic_dec(&mddev->flush_io);
+ wake_up(&fi->flush_queue);
+ wake_up(&mddev->sb_wait);
+
+ fbio->bi_opf &= ~REQ_PREFLUSH;
+ md_handle_request(mddev, fbio);
+}
+
+static void rdev_end_flush(struct bio *bi)
{
- struct md_rdev *rdev = bio->bi_private;
- struct mddev *mddev = rdev->mddev;
+ struct flush_info *fi = bi->bi_private;
+ struct mddev *mddev = fi->mddev;
+ struct bio *fbio = fi->fbio;
+ struct md_rdev *rdev;
- rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (fi->bios[rdev->raid_disk] == bi) {
+ fi->bios[rdev->raid_disk] = NULL;
+ rdev_dec_pending(rdev, mddev);
+ break;
+ }
+ rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pre-request flush has finished */
- queue_work(md_wq, &mddev->flush_work);
+ if (atomic_dec_and_test(&fi->flush_pending)) {
+ if (fbio->bi_iter.bi_size == 0) {
+ /* an empty barrier - all done */
+ bio_endio(fbio);
+ fi->fbio = NULL;
+ atomic_dec(&mddev->flush_io);
+ wake_up(&fi->flush_queue);
+ wake_up(&mddev->sb_wait);
+ } else {
+ INIT_WORK(&fi->flush_work, submit_flushes);
+ queue_work(md_wq, &fi->flush_work);
+ }
}
- bio_put(bio);
-}
-static void md_submit_flush_data(struct work_struct *ws);
+ bio_put(bi);
+}
-static void submit_flushes(struct work_struct *ws)
+void md_flush_request(struct mddev *mddev, struct bio *fbio)
{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct md_rdev *rdev;
+ struct flush_info *fi;
+ char *p = (char*)mddev->flush_info;
+ int index;
+
+ atomic_inc(&mddev->flush_io);
+
+ index = jhash((void*)fbio, sizeof(fbio), 0) % NR_FLUSHS;
+ fi = (struct flush_info *)(p + index * (sizeof(struct flush_info)
+ + mddev->raid_disks * sizeof(struct bio*)));
+
+ spin_lock_irq(&fi->flush_lock);
+ wait_event_lock_irq(fi->flush_queue,
+ !fi->fbio,
+ fi->flush_lock);
+ fi->fbio = fbio;
+ spin_unlock_irq(&fi->flush_lock);
+
+ fi->mddev = mddev;
+ atomic_set(&fi->flush_pending, 1);
- INIT_WORK(&mddev->flush_work, md_submit_flush_data);
- atomic_set(&mddev->flush_pending, 1);
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
@@ -448,56 +496,36 @@ static void submit_flushes(struct work_struct *ws)
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
+
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
- bi->bi_end_io = md_end_flush;
- bi->bi_private = rdev;
+ bi->bi_end_io = rdev_end_flush;
+ bi->bi_private = fi;
bio_set_dev(bi, rdev->bdev);
bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
- atomic_inc(&mddev->flush_pending);
+
+ fi->bios[rdev->raid_disk] = bi;
+ atomic_inc(&fi->flush_pending);
submit_bio(bi);
+
rcu_read_lock();
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending))
- queue_work(md_wq, &mddev->flush_work);
-}
-
-static void md_submit_flush_data(struct work_struct *ws)
-{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
- struct bio *bio = mddev->flush_bio;
- /*
- * must reset flush_bio before calling into md_handle_request to avoid a
- * deadlock, because other bios passed md_handle_request suspend check
- * could wait for this and below md_handle_request could wait for those
- * bios because of suspend check
- */
- mddev->flush_bio = NULL;
- wake_up(&mddev->sb_wait);
-
- if (bio->bi_iter.bi_size == 0)
- /* an empty barrier - all done */
- bio_endio(bio);
- else {
- bio->bi_opf &= ~REQ_PREFLUSH;
- md_handle_request(mddev, bio);
+ if (atomic_dec_and_test(&fi->flush_pending)) {
+ if (fbio->bi_iter.bi_size == 0) {
+ /* an empty barrier - all done */
+ bio_endio(fbio);
+ fi->fbio = NULL;
+ atomic_dec(&mddev->flush_io);
+ wake_up(&fi->flush_queue);
+ wake_up(&mddev->sb_wait);
+ } else {
+ INIT_WORK(&fi->flush_work, submit_flushes);
+ queue_work(md_wq, &fi->flush_work);
+ }
}
}
-
-void md_flush_request(struct mddev *mddev, struct bio *bio)
-{
- spin_lock_irq(&mddev->lock);
- wait_event_lock_irq(mddev->sb_wait,
- !mddev->flush_bio,
- mddev->lock);
- mddev->flush_bio = bio;
- spin_unlock_irq(&mddev->lock);
-
- INIT_WORK(&mddev->flush_work, submit_flushes);
- queue_work(md_wq, &mddev->flush_work);
-}
EXPORT_SYMBOL(md_flush_request);
static inline struct mddev *mddev_get(struct mddev *mddev)
@@ -555,7 +583,6 @@ void mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->active_io, 0);
spin_lock_init(&mddev->lock);
- atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
@@ -5509,6 +5536,27 @@ int md_run(struct mddev *mddev)
goto abort;
}
}
+ if (mddev->flush_info == NULL) {
+ int index = 0;
+ char *p;
+ struct flush_info *fi;
+ mddev->flush_info = kzalloc((sizeof(struct flush_info) +
+ sizeof(struct bio*) * mddev->raid_disks) *
+ NR_FLUSHS, GFP_KERNEL);
+ if (!mddev->flush_info) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ p = (char*)mddev->flush_info;
+ while (index < NR_FLUSHS) {
+ fi = (struct flush_info *)(p + index * (sizeof(struct flush_info)
+ + mddev->raid_disks * sizeof(struct bio*)));
+ spin_lock_init(&fi->flush_lock);
+ init_waitqueue_head(&fi->flush_queue);
+ index++;
+ }
+ }
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
@@ -5668,6 +5716,9 @@ int md_run(struct mddev *mddev)
return 0;
abort:
+ if (mddev->flush_info) {
+ kfree(mddev->flush_info);
+ }
if (mddev->bio_set) {
bioset_free(mddev->bio_set);
mddev->bio_set = NULL;
@@ -5888,6 +5939,10 @@ void md_stop(struct mddev *mddev)
* This is called from dm-raid
*/
__md_stop(mddev);
+ if (mddev->flush_info) {
+ kfree(mddev->flush_info);
+ mddev->flush_info = NULL;
+ }
if (mddev->bio_set) {
bioset_free(mddev->bio_set);
mddev->bio_set = NULL;
@@ -6854,8 +6909,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
static int update_raid_disks(struct mddev *mddev, int raid_disks)
{
- int rv;
+ int rv, index;
struct md_rdev *rdev;
+ struct flush_info *new, *fi;
+ char *p;
/* change the number of raid disks */
if (mddev->pers->check_reshape == NULL)
return -EINVAL;
@@ -6884,10 +6941,31 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
else if (mddev->delta_disks > 0)
mddev->reshape_backwards = 0;
+ new = kzalloc((sizeof(struct flush_info) + sizeof(struct bio*) *
+ raid_disks) * NR_FLUSHS, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ p = (char*)new;
+ index = 0;
+ while (index < NR_FLUSHS) {
+ fi = (struct flush_info *)(p + index * (sizeof(struct flush_info)
+ + raid_disks * sizeof(struct bio*)));
+ spin_lock_init(&fi->flush_lock);
+ init_waitqueue_head(&fi->flush_queue);
+ index++;
+ }
+
rv = mddev->pers->check_reshape(mddev);
if (rv < 0) {
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
+ kfree(new);
+ } else {
+ mddev_suspend(mddev);
+ kfree(mddev->flush_info);
+ mddev->flush_info = new;
+ mddev_resume(mddev);
}
return rv;
}
@@ -9256,8 +9334,10 @@ void md_reload_sb(struct mddev *mddev, int nr)
check_sb_changes(mddev, rdev);
/* Read all rdev's to update recovery_offset */
- rdev_for_each_rcu(rdev, mddev)
- read_rdev(mddev, rdev);
+ rdev_for_each_rcu(rdev, mddev) {
+ if (!test_bit(Faulty, &rdev->flags))
+ read_rdev(mddev, rdev);
+ }
}
EXPORT_SYMBOL(md_reload_sb);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fbc925cce810..24f22f51e190 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -252,6 +252,17 @@ enum mddev_sb_flags {
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
};
+#define NR_FLUSHS 16
+struct flush_info {
+ struct mddev *mddev;
+ struct work_struct flush_work;
+ atomic_t flush_pending;
+ spinlock_t flush_lock;
+ wait_queue_head_t flush_queue;
+ struct bio *fbio;
+ struct bio *bios[0];
+};
+
struct mddev {
void *private;
struct md_personality *pers;
@@ -399,7 +410,6 @@ struct mddev {
struct work_struct del_work; /* used for delayed sysfs removal */
/* "lock" protects:
- * flush_bio transition from NULL to !NULL
* rdev superblocks, events
* clearing MD_CHANGE_*
* in_sync - and related safemode and MD_CHANGE changes
@@ -457,13 +467,12 @@ struct mddev {
* metadata and bitmap writes
*/
- /* Generic flush handling.
- * The last to finish preflush schedules a worker to submit
- * the rest of the request (without the REQ_PREFLUSH flag).
+ /*
+ * Generic flush handling.
*/
- struct bio *flush_bio;
- atomic_t flush_pending;
- struct work_struct flush_work;
+ struct flush_info *flush_info;
+ atomic_t flush_io;
+
struct work_struct event_work; /* used by dm to report failure event */
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e2943fb74056..e9e3308cb0a7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -854,7 +854,7 @@ static void flush_pending_writes(struct r1conf *conf)
* there is no normal IO happeing. It must arrange to call
* lower_barrier when the particular background IO completes.
*/
-static void raise_barrier(struct r1conf *conf, sector_t sector_nr)
+static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr)
{
int idx = sector_to_idx(sector_nr);
@@ -885,13 +885,23 @@ static void raise_barrier(struct r1conf *conf, sector_t sector_nr)
* max resync count which allowed on current I/O barrier bucket.
*/
wait_event_lock_irq(conf->wait_barrier,
- !conf->array_frozen &&
+ (!conf->array_frozen &&
!atomic_read(&conf->nr_pending[idx]) &&
- atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH,
+ atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH) ||
+ test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery),
conf->resync_lock);
+ if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
+ atomic_dec(&conf->barrier[idx]);
+ spin_unlock_irq(&conf->resync_lock);
+ wake_up(&conf->wait_barrier);
+ return -EINTR;
+ }
+
atomic_inc(&conf->nr_sync_pending);
spin_unlock_irq(&conf->resync_lock);
+
+ return 0;
}
static void lower_barrier(struct r1conf *conf, sector_t sector_nr)
@@ -1092,6 +1102,8 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
goto skip_copy;
}
+ behind_bio->bi_write_hint = bio->bi_write_hint;
+
while (i < vcnt && size) {
struct page *page;
int len = min_t(int, PAGE_SIZE, size);
@@ -2662,9 +2674,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
bitmap_cond_end_sync(mddev->bitmap, sector_nr,
mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
- r1_bio = raid1_alloc_init_r1buf(conf);
- raise_barrier(conf, sector_nr);
+
+ if (raise_barrier(conf, sector_nr))
+ return 0;
+
+ r1_bio = raid1_alloc_init_r1buf(conf);
rcu_read_lock();
/*