diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-04-17 11:09:08 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-04-17 11:09:08 +1000 |
commit | ab67b25626811e0b1b0bb01906ddf4de2a1075ec (patch) | |
tree | ab863fa788f6db47472ffc5cc875b6e3feff81ea | |
parent | c7ce64a93f3f516bed419984064ad53cec49f616 (diff) | |
parent | 50c35295db950dae16c9ecc59eb70e9c2b6fc167 (diff) |
Merge remote-tracking branch 'md/for-next'
-rw-r--r-- | drivers/md/md.c | 194 | ||||
-rw-r--r-- | drivers/md/md.h | 23 | ||||
-rw-r--r-- | drivers/md/raid1.c | 25 |
3 files changed, 173 insertions, 69 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 3bea45e8ccff..ec86ed16ec2f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -367,6 +367,7 @@ void mddev_suspend(struct mddev *mddev) set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); smp_mb__after_atomic(); wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); + wait_event(mddev->sb_wait, atomic_read(&mddev->flush_io) == 0); mddev->pers->quiesce(mddev, 1); clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); @@ -412,30 +413,77 @@ static int md_congested(void *data, int bits) /* * Generic flush handling for md */ +static void submit_flushes(struct work_struct *ws) +{ + struct flush_info *fi = container_of(ws, struct flush_info, flush_work); + struct mddev *mddev = fi->mddev; + struct bio *fbio = fi->fbio; -static void md_end_flush(struct bio *bio) + fi->fbio = NULL; + atomic_dec(&mddev->flush_io); + wake_up(&fi->flush_queue); + wake_up(&mddev->sb_wait); + + fbio->bi_opf &= ~REQ_PREFLUSH; + md_handle_request(mddev, fbio); +} + +static void rdev_end_flush(struct bio *bi) { - struct md_rdev *rdev = bio->bi_private; - struct mddev *mddev = rdev->mddev; + struct flush_info *fi = bi->bi_private; + struct mddev *mddev = fi->mddev; + struct bio *fbio = fi->fbio; + struct md_rdev *rdev; - rdev_dec_pending(rdev, mddev); + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) + if (fi->bios[rdev->raid_disk] == bi) { + fi->bios[rdev->raid_disk] = NULL; + rdev_dec_pending(rdev, mddev); + break; + } + rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) { - /* The pre-request flush has finished */ - queue_work(md_wq, &mddev->flush_work); + if (atomic_dec_and_test(&fi->flush_pending)) { + if (fbio->bi_iter.bi_size == 0) { + /* an empty barrier - all done */ + bio_endio(fbio); + fi->fbio = NULL; + atomic_dec(&mddev->flush_io); + wake_up(&fi->flush_queue); + wake_up(&mddev->sb_wait); + } else { + INIT_WORK(&fi->flush_work, submit_flushes); + queue_work(md_wq, &fi->flush_work); + } } - bio_put(bio); -} -static void md_submit_flush_data(struct work_struct *ws); + bio_put(bi); +} -static void submit_flushes(struct work_struct *ws) +void md_flush_request(struct mddev *mddev, struct bio *fbio) { - struct mddev *mddev = container_of(ws, struct mddev, flush_work); struct md_rdev *rdev; + struct flush_info *fi; + char *p = (char*)mddev->flush_info; + int index; + + atomic_inc(&mddev->flush_io); + + index = jhash((void*)fbio, sizeof(fbio), 0) % NR_FLUSHS; + fi = (struct flush_info *)(p + index * (sizeof(struct flush_info) + + mddev->raid_disks * sizeof(struct bio*))); + + spin_lock_irq(&fi->flush_lock); + wait_event_lock_irq(fi->flush_queue, + !fi->fbio, + fi->flush_lock); + fi->fbio = fbio; + spin_unlock_irq(&fi->flush_lock); + + fi->mddev = mddev; + atomic_set(&fi->flush_pending, 1); - INIT_WORK(&mddev->flush_work, md_submit_flush_data); - atomic_set(&mddev->flush_pending, 1); rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && @@ -448,56 +496,36 @@ static void submit_flushes(struct work_struct *ws) atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); + bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); - bi->bi_end_io = md_end_flush; - bi->bi_private = rdev; + bi->bi_end_io = rdev_end_flush; + bi->bi_private = fi; bio_set_dev(bi, rdev->bdev); bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - atomic_inc(&mddev->flush_pending); + + fi->bios[rdev->raid_disk] = bi; + atomic_inc(&fi->flush_pending); submit_bio(bi); + rcu_read_lock(); rdev_dec_pending(rdev, mddev); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) - queue_work(md_wq, &mddev->flush_work); -} - -static void md_submit_flush_data(struct work_struct *ws) -{ - struct mddev *mddev = container_of(ws, struct mddev, flush_work); - struct bio *bio = mddev->flush_bio; - /* - * must reset flush_bio before calling into md_handle_request to avoid a - * deadlock, because other bios passed md_handle_request suspend check - * could wait for this and below md_handle_request could wait for those - * bios because of suspend check - */ - mddev->flush_bio = NULL; - wake_up(&mddev->sb_wait); - - if (bio->bi_iter.bi_size == 0) - /* an empty barrier - all done */ - bio_endio(bio); - else { - bio->bi_opf &= ~REQ_PREFLUSH; - md_handle_request(mddev, bio); + if (atomic_dec_and_test(&fi->flush_pending)) { + if (fbio->bi_iter.bi_size == 0) { + /* an empty barrier - all done */ + bio_endio(fbio); + fi->fbio = NULL; + atomic_dec(&mddev->flush_io); + wake_up(&fi->flush_queue); + wake_up(&mddev->sb_wait); + } else { + INIT_WORK(&fi->flush_work, submit_flushes); + queue_work(md_wq, &fi->flush_work); + } } } - -void md_flush_request(struct mddev *mddev, struct bio *bio) -{ - spin_lock_irq(&mddev->lock); - wait_event_lock_irq(mddev->sb_wait, - !mddev->flush_bio, - mddev->lock); - mddev->flush_bio = bio; - spin_unlock_irq(&mddev->lock); - - INIT_WORK(&mddev->flush_work, submit_flushes); - queue_work(md_wq, &mddev->flush_work); -} EXPORT_SYMBOL(md_flush_request); static inline struct mddev *mddev_get(struct mddev *mddev) @@ -555,7 +583,6 @@ void mddev_init(struct mddev *mddev) atomic_set(&mddev->openers, 0); atomic_set(&mddev->active_io, 0); spin_lock_init(&mddev->lock); - atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); init_waitqueue_head(&mddev->recovery_wait); mddev->reshape_position = MaxSector; @@ -5509,6 +5536,27 @@ int md_run(struct mddev *mddev) goto abort; } } + if (mddev->flush_info == NULL) { + int index = 0; + char *p; + struct flush_info *fi; + mddev->flush_info = kzalloc((sizeof(struct flush_info) + + sizeof(struct bio*) * mddev->raid_disks) * + NR_FLUSHS, GFP_KERNEL); + if (!mddev->flush_info) { + err = -ENOMEM; + goto abort; + } + + p = (char*)mddev->flush_info; + while (index < NR_FLUSHS) { + fi = (struct flush_info *)(p + index * (sizeof(struct flush_info) + + mddev->raid_disks * sizeof(struct bio*))); + spin_lock_init(&fi->flush_lock); + init_waitqueue_head(&fi->flush_queue); + index++; + } + } spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -5668,6 +5716,9 @@ int md_run(struct mddev *mddev) return 0; abort: + if (mddev->flush_info) { + kfree(mddev->flush_info); + } if (mddev->bio_set) { bioset_free(mddev->bio_set); mddev->bio_set = NULL; @@ -5888,6 +5939,10 @@ void md_stop(struct mddev *mddev) * This is called from dm-raid */ __md_stop(mddev); + if (mddev->flush_info) { + kfree(mddev->flush_info); + mddev->flush_info = NULL; + } if (mddev->bio_set) { bioset_free(mddev->bio_set); mddev->bio_set = NULL; @@ -6854,8 +6909,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) static int update_raid_disks(struct mddev *mddev, int raid_disks) { - int rv; + int rv, index; struct md_rdev *rdev; + struct flush_info *new, *fi; + char *p; /* change the number of raid disks */ if (mddev->pers->check_reshape == NULL) return -EINVAL; @@ -6884,10 +6941,31 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) else if (mddev->delta_disks > 0) mddev->reshape_backwards = 0; + new = kzalloc((sizeof(struct flush_info) + sizeof(struct bio*) * + raid_disks) * NR_FLUSHS, GFP_KERNEL); + if (!new) + return -ENOMEM; + + p = (char*)new; + index = 0; + while (index < NR_FLUSHS) { + fi = (struct flush_info *)(p + index * (sizeof(struct flush_info) + + raid_disks * sizeof(struct bio*))); + spin_lock_init(&fi->flush_lock); + init_waitqueue_head(&fi->flush_queue); + index++; + } + rv = mddev->pers->check_reshape(mddev); if (rv < 0) { mddev->delta_disks = 0; mddev->reshape_backwards = 0; + kfree(new); + } else { + mddev_suspend(mddev); + kfree(mddev->flush_info); + mddev->flush_info = new; + mddev_resume(mddev); } return rv; } @@ -9256,8 +9334,10 @@ void md_reload_sb(struct mddev *mddev, int nr) check_sb_changes(mddev, rdev); /* Read all rdev's to update recovery_offset */ - rdev_for_each_rcu(rdev, mddev) - read_rdev(mddev, rdev); + rdev_for_each_rcu(rdev, mddev) { + if (!test_bit(Faulty, &rdev->flags)) + read_rdev(mddev, rdev); + } } EXPORT_SYMBOL(md_reload_sb); diff --git a/drivers/md/md.h b/drivers/md/md.h index fbc925cce810..24f22f51e190 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -252,6 +252,17 @@ enum mddev_sb_flags { MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ }; +#define NR_FLUSHS 16 +struct flush_info { + struct mddev *mddev; + struct work_struct flush_work; + atomic_t flush_pending; + spinlock_t flush_lock; + wait_queue_head_t flush_queue; + struct bio *fbio; + struct bio *bios[0]; +}; + struct mddev { void *private; struct md_personality *pers; @@ -399,7 +410,6 @@ struct mddev { struct work_struct del_work; /* used for delayed sysfs removal */ /* "lock" protects: - * flush_bio transition from NULL to !NULL * rdev superblocks, events * clearing MD_CHANGE_* * in_sync - and related safemode and MD_CHANGE changes @@ -457,13 +467,12 @@ struct mddev { * metadata and bitmap writes */ - /* Generic flush handling. - * The last to finish preflush schedules a worker to submit - * the rest of the request (without the REQ_PREFLUSH flag). + /* + * Generic flush handling. */ - struct bio *flush_bio; - atomic_t flush_pending; - struct work_struct flush_work; + struct flush_info *flush_info; + atomic_t flush_io; + struct work_struct event_work; /* used by dm to report failure event */ void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e2943fb74056..e9e3308cb0a7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -854,7 +854,7 @@ static void flush_pending_writes(struct r1conf *conf) * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. */ -static void raise_barrier(struct r1conf *conf, sector_t sector_nr) +static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr) { int idx = sector_to_idx(sector_nr); @@ -885,13 +885,23 @@ static void raise_barrier(struct r1conf *conf, sector_t sector_nr) * max resync count which allowed on current I/O barrier bucket. */ wait_event_lock_irq(conf->wait_barrier, - !conf->array_frozen && + (!conf->array_frozen && !atomic_read(&conf->nr_pending[idx]) && - atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH, + atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH) || + test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery), conf->resync_lock); + if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { + atomic_dec(&conf->barrier[idx]); + spin_unlock_irq(&conf->resync_lock); + wake_up(&conf->wait_barrier); + return -EINTR; + } + atomic_inc(&conf->nr_sync_pending); spin_unlock_irq(&conf->resync_lock); + + return 0; } static void lower_barrier(struct r1conf *conf, sector_t sector_nr) @@ -1092,6 +1102,8 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, goto skip_copy; } + behind_bio->bi_write_hint = bio->bi_write_hint; + while (i < vcnt && size) { struct page *page; int len = min_t(int, PAGE_SIZE, size); @@ -2662,9 +2674,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bitmap_cond_end_sync(mddev->bitmap, sector_nr, mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - r1_bio = raid1_alloc_init_r1buf(conf); - raise_barrier(conf, sector_nr); + + if (raise_barrier(conf, sector_nr)) + return 0; + + r1_bio = raid1_alloc_init_r1buf(conf); rcu_read_lock(); /* |