diff options
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 346 |
1 files changed, 151 insertions, 195 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 859bd3ffe435..378a25894c57 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -31,6 +31,7 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <linux/slab.h> #include <linux/delay.h> #include <linux/blkdev.h> #include <linux/seq_file.h> @@ -262,7 +263,7 @@ static inline void update_head_pos(int disk, r1bio_t *r1_bio) static void raid1_end_read_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; int mirror; conf_t *conf = r1_bio->mddev->private; @@ -296,7 +297,8 @@ static void raid1_end_read_request(struct bio *bio, int error) */ char b[BDEVNAME_SIZE]; if (printk_ratelimit()) - printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n", + printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n", + mdname(conf->mddev), bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector); reschedule_retry(r1_bio); } @@ -307,7 +309,7 @@ static void raid1_end_read_request(struct bio *bio, int error) static void raid1_end_write_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); conf_t *conf = r1_bio->mddev->private; struct bio *to_put = NULL; @@ -317,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error) if (r1_bio->bios[mirror] == bio) break; - if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { - set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); - set_bit(R1BIO_BarrierRetry, &r1_bio->state); - r1_bio->mddev->barriers_work = 0; - /* Don't rdev_dec_pending in this branch - keep it for the retry */ - } else { + /* + * 'one mirror IO has finished' event handler: + */ + r1_bio->bios[mirror] = NULL; + to_put = bio; + if (!uptodate) { + md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); + /* an I/O failed, we can't clear the bitmap */ + set_bit(R1BIO_Degraded, &r1_bio->state); + } else /* - * this branch is our 'one mirror IO has finished' event handler: + * Set R1BIO_Uptodate in our master bio, so that we + * will return a good error code for to the higher + * levels even if IO on some other mirrored buffer + * fails. + * + * The 'master' represents the composite IO operation + * to user-side. So if something waits for IO, then it + * will wait for the 'master' bio. */ - r1_bio->bios[mirror] = NULL; - to_put = bio; - if (!uptodate) { - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - } else - /* - * Set R1BIO_Uptodate in our master bio, so that - * we will return a good error code for to the higher - * levels even if IO on some other mirrored buffer fails. - * - * The 'master' represents the composite IO operation to - * user-side. So if something waits for IO, then it will - * wait for the 'master' bio. - */ - set_bit(R1BIO_Uptodate, &r1_bio->state); - - update_head_pos(mirror, r1_bio); - - if (behind) { - if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) - atomic_dec(&r1_bio->behind_remaining); - - /* In behind mode, we ACK the master bio once the I/O has safely - * reached all non-writemostly disks. Setting the Returned bit - * ensures that this gets done only once -- we don't ever want to - * return -EIO here, instead we'll wait */ - - if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && - test_bit(R1BIO_Uptodate, &r1_bio->state)) { - /* Maybe we can return now */ - if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { - struct bio *mbio = r1_bio->master_bio; - PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", - (unsigned long long) mbio->bi_sector, - (unsigned long long) mbio->bi_sector + - (mbio->bi_size >> 9) - 1); - bio_endio(mbio, 0); - } + set_bit(R1BIO_Uptodate, &r1_bio->state); + + update_head_pos(mirror, r1_bio); + + if (behind) { + if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) + atomic_dec(&r1_bio->behind_remaining); + + /* + * In behind mode, we ACK the master bio once the I/O + * has safely reached all non-writemostly + * disks. Setting the Returned bit ensures that this + * gets done only once -- we don't ever want to return + * -EIO here, instead we'll wait + */ + if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && + test_bit(R1BIO_Uptodate, &r1_bio->state)) { + /* Maybe we can return now */ + if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { + struct bio *mbio = r1_bio->master_bio; + PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", + (unsigned long long) mbio->bi_sector, + (unsigned long long) mbio->bi_sector + + (mbio->bi_size >> 9) - 1); + bio_endio(mbio, 0); } } - rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } + rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); + /* - * * Let's see if all mirrored write operations have finished * already. */ if (atomic_dec_and_test(&r1_bio->remaining)) { - if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) - reschedule_retry(r1_bio); - else { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - /* free extra copy of the data pages */ - int i = bio->bi_vcnt; - while (i--) - safe_put_page(bio->bi_io_vec[i].bv_page); - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - behind); - md_write_end(r1_bio->mddev); - raid_end_bio_io(r1_bio); + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + /* free extra copy of the data pages */ + int i = bio->bi_vcnt; + while (i--) + safe_put_page(bio->bi_io_vec[i].bv_page); } + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + behind); + md_write_end(r1_bio->mddev); + raid_end_bio_io(r1_bio); } if (to_put) @@ -417,7 +410,7 @@ static void raid1_end_write_request(struct bio *bio, int error) */ static int read_balance(conf_t *conf, r1bio_t *r1_bio) { - const unsigned long this_sector = r1_bio->sector; + const sector_t this_sector = r1_bio->sector; int new_disk = conf->last_used, disk = new_disk; int wonly_disk = -1; const int sectors = r1_bio->sectors; @@ -433,7 +426,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) retry: if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { - /* Choose the first operation device, for consistancy */ + /* Choose the first operational device, for consistancy */ new_disk = 0; for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); @@ -773,9 +766,8 @@ do_sync_io: return NULL; } -static int make_request(struct request_queue *q, struct bio * bio) +static int make_request(mddev_t *mddev, struct bio * bio) { - mddev_t *mddev = q->queuedata; conf_t *conf = mddev->private; mirror_info_t *mirror; r1bio_t *r1_bio; @@ -786,18 +778,14 @@ static int make_request(struct request_queue *q, struct bio * bio) struct bio_list bl; struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); - const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); - int cpu; - bool do_barriers; + const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); + const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); mdk_rdev_t *blocked_rdev; /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. - * We test barriers_work *after* md_write_start as md_write_start - * may cause the first superblock write, and that will check out - * if barriers work. */ md_write_start(mddev, bio); /* wait on superblock update early */ @@ -821,24 +809,11 @@ static int make_request(struct request_queue *q, struct bio * bio) } finish_wait(&conf->wait_barrier, &w); } - if (unlikely(!mddev->barriers_work && - bio_rw_flagged(bio, BIO_RW_BARRIER))) { - if (rw == WRITE) - md_write_end(mddev); - bio_endio(bio, -EOPNOTSUPP); - return 0; - } wait_barrier(conf); bitmap = mddev->bitmap; - cpu = part_stat_lock(); - part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], - bio_sectors(bio)); - part_stat_unlock(); - /* * make_request() can abort the operation when READA is being * used and no empty request is available. @@ -865,6 +840,15 @@ static int make_request(struct request_queue *q, struct bio * bio) } mirror = conf->mirrors + rdisk; + if (test_bit(WriteMostly, &mirror->rdev->flags) && + bitmap) { + /* Reading from a write-mostly device must + * take care not to over-take any writes + * that are 'behind' + */ + wait_event(bitmap->behind_wait, + atomic_read(&bitmap->behind_writes) == 0); + } r1_bio->read_disk = rdisk; read_bio = bio_clone(bio, GFP_NOIO); @@ -874,7 +858,7 @@ static int make_request(struct request_queue *q, struct bio * bio) read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid1_end_read_request; - read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r1_bio; generic_make_request(read_bio); @@ -911,9 +895,10 @@ static int make_request(struct request_queue *q, struct bio * bio) if (test_bit(Faulty, &rdev->flags)) { rdev_dec_pending(rdev, mddev); r1_bio->bios[i] = NULL; - } else + } else { r1_bio->bios[i] = bio; - targets++; + targets++; + } } else r1_bio->bios[i] = NULL; } @@ -941,20 +926,20 @@ static int make_request(struct request_queue *q, struct bio * bio) set_bit(R1BIO_Degraded, &r1_bio->state); } - /* do behind I/O ? */ + /* do behind I/O ? + * Not if there are too many, or cannot allocate memory, + * or a reader on WriteMostly is waiting for behind writes + * to flush */ if (bitmap && (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && + !waitqueue_active(&bitmap->behind_wait) && (behind_pages = alloc_behind_pages(bio)) != NULL) set_bit(R1BIO_BehindIO, &r1_bio->state); atomic_set(&r1_bio->remaining, 0); atomic_set(&r1_bio->behind_remaining, 0); - do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER); - if (do_barriers) - set_bit(R1BIO_Barrier, &r1_bio->state); - bio_list_init(&bl); for (i = 0; i < disks; i++) { struct bio *mbio; @@ -967,8 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) | - (do_sync << BIO_RW_SYNCIO); + mbio->bi_rw = WRITE | do_flush_fua | do_sync; mbio->bi_private = r1_bio; if (behind_pages) { @@ -1069,21 +1053,22 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) } else set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); - printk(KERN_ALERT "raid1: Disk failure on %s, disabling device.\n" - "raid1: Operation continuing on %d devices.\n", - bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); + printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" + KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n", + mdname(mddev), bdevname(rdev->bdev, b), + mdname(mddev), conf->raid_disks - mddev->degraded); } static void print_conf(conf_t *conf) { int i; - printk("RAID1 conf printout:\n"); + printk(KERN_DEBUG "RAID1 conf printout:\n"); if (!conf) { - printk("(!conf)\n"); + printk(KERN_DEBUG "(!conf)\n"); return; } - printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, + printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, conf->raid_disks); rcu_read_lock(); @@ -1091,7 +1076,7 @@ static void print_conf(conf_t *conf) char b[BDEVNAME_SIZE]; mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev) - printk(" disk %d, wo:%d, o:%d, dev:%s\n", + printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n", i, !test_bit(In_sync, &rdev->flags), !test_bit(Faulty, &rdev->flags), bdevname(rdev->bdev,b)); @@ -1112,6 +1097,8 @@ static int raid1_spare_active(mddev_t *mddev) { int i; conf_t *conf = mddev->private; + int count = 0; + unsigned long flags; /* * Find all failed disks within the RAID1 configuration @@ -1123,15 +1110,16 @@ static int raid1_spare_active(mddev_t *mddev) if (rdev && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; + sysfs_notify_dirent(rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_conf(conf); - return 0; + return count; } @@ -1152,13 +1140,17 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + /* as we don't honour merge_bvec_fn, we must + * never risk violating it, so limit + * ->max_segments to one lying with a single + * page, as a one page request is never in + * violation. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } p->head_position = 0; rdev->raid_disk = mirror; @@ -1218,7 +1210,7 @@ abort: static void end_sync_read(struct bio *bio, int error) { - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; int i; for (i=r1_bio->mddev->raid_disks; i--; ) @@ -1241,7 +1233,7 @@ static void end_sync_read(struct bio *bio, int error) static void end_sync_write(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; mddev_t *mddev = r1_bio->mddev; conf_t *conf = mddev->private; int i; @@ -1448,9 +1440,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) char b[BDEVNAME_SIZE]; /* Cannot read from anywhere, array is toast */ md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); - printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error" + printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" " for block %llu\n", - bdevname(bio->bi_bdev,b), + mdname(mddev), + bdevname(bio->bi_bdev, b), (unsigned long long)r1_bio->sector); md_done_sync(mddev, r1_bio->sectors, 0); put_buf(r1_bio); @@ -1572,7 +1565,7 @@ static void fix_read_error(conf_t *conf, int read_disk, else { atomic_add(s, &rdev->corrected_errors); printk(KERN_INFO - "raid1:%s: read error corrected " + "md/raid1:%s: read error corrected " "(%d sectors at %llu on %s)\n", mdname(mddev), s, (unsigned long long)(sect + @@ -1618,42 +1611,6 @@ static void raid1d(mddev_t *mddev) if (test_bit(R1BIO_IsSync, &r1_bio->state)) { sync_request_write(mddev, r1_bio); unplug = 1; - } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { - /* some requests in the r1bio were BIO_RW_BARRIER - * requests which failed with -EOPNOTSUPP. Hohumm.. - * Better resubmit without the barrier. - * We know which devices to resubmit for, because - * all others have had their bios[] entry cleared. - * We already have a nr_pending reference on these rdevs. - */ - int i; - const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO); - clear_bit(R1BIO_BarrierRetry, &r1_bio->state); - clear_bit(R1BIO_Barrier, &r1_bio->state); - for (i=0; i < conf->raid_disks; i++) - if (r1_bio->bios[i]) - atomic_inc(&r1_bio->remaining); - for (i=0; i < conf->raid_disks; i++) - if (r1_bio->bios[i]) { - struct bio_vec *bvec; - int j; - - bio = bio_clone(r1_bio->master_bio, GFP_NOIO); - /* copy pages from the failed bio, as - * this might be a write-behind device */ - __bio_for_each_segment(bvec, bio, j, 0) - bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; - bio_put(r1_bio->bios[i]); - bio->bi_sector = r1_bio->sector + - conf->mirrors[i].rdev->data_offset; - bio->bi_bdev = conf->mirrors[i].rdev->bdev; - bio->bi_end_io = raid1_end_write_request; - bio->bi_rw = WRITE | - (do_sync << BIO_RW_SYNCIO); - bio->bi_private = r1_bio; - r1_bio->bios[i] = bio; - generic_make_request(bio); - } } else { int disk; @@ -1677,13 +1634,14 @@ static void raid1d(mddev_t *mddev) bio = r1_bio->bios[r1_bio->read_disk]; if ((disk=read_balance(conf, r1_bio)) == -1) { - printk(KERN_ALERT "raid1: %s: unrecoverable I/O" + printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" " read error for block %llu\n", + mdname(mddev), bdevname(bio->bi_bdev,b), (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO); + const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; @@ -1692,14 +1650,15 @@ static void raid1d(mddev_t *mddev) r1_bio->bios[r1_bio->read_disk] = bio; rdev = conf->mirrors[disk].rdev; if (printk_ratelimit()) - printk(KERN_ERR "raid1: %s: redirecting sector %llu to" - " another mirror\n", - bdevname(rdev->bdev,b), - (unsigned long long)r1_bio->sector); + printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to" + " other mirror: %s\n", + mdname(mddev), + (unsigned long long)r1_bio->sector, + bdevname(rdev->bdev,b)); bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + bio->bi_rw = READ | do_sync; bio->bi_private = r1_bio; unplug = 1; generic_make_request(bio); @@ -1750,13 +1709,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int still_degraded = 0; if (!conf->r1buf_pool) - { -/* - printk("sync start - bitmap %p\n", mddev->bitmap); -*/ if (init_resync(conf)) return 0; - } max_sector = mddev->dev_sectors; if (sector_nr >= max_sector) { @@ -1827,7 +1781,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i /* take from bio_init */ bio->bi_next = NULL; + bio->bi_flags &= ~(BIO_POOL_MASK-1); bio->bi_flags |= 1 << BIO_UPTODATE; + bio->bi_comp_cpu = -1; bio->bi_rw = READ; bio->bi_vcnt = 0; bio->bi_idx = 0; @@ -1900,7 +1856,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; BUG_ON(sync_blocks < (PAGE_SIZE>>9)); - if (len > (sync_blocks<<9)) + if ((len >> 9) > sync_blocks) len = sync_blocks<<9; } @@ -2037,7 +1993,7 @@ static conf_t *setup_conf(mddev_t *mddev) err = -EIO; if (conf->last_used < 0) { - printk(KERN_ERR "raid1: no operational mirrors for %s\n", + printk(KERN_ERR "md/raid1:%s: no operational mirrors\n", mdname(mddev)); goto abort; } @@ -2045,7 +2001,7 @@ static conf_t *setup_conf(mddev_t *mddev) conf->thread = md_register_thread(raid1d, mddev, NULL); if (!conf->thread) { printk(KERN_ERR - "raid1: couldn't allocate thread for %s\n", + "md/raid1:%s: couldn't allocate thread\n", mdname(mddev)); goto abort; } @@ -2071,12 +2027,12 @@ static int run(mddev_t *mddev) mdk_rdev_t *rdev; if (mddev->level != 1) { - printk("raid1: %s: raid level not set to mirroring (%d)\n", + printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n", mdname(mddev), mddev->level); return -EIO; } if (mddev->reshape_position != MaxSector) { - printk("raid1: %s: reshape_position set but not supported\n", + printk(KERN_ERR "md/raid1:%s: reshape_position set but not supported\n", mdname(mddev)); return -EIO; } @@ -2098,12 +2054,14 @@ static int run(mddev_t *mddev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + * violating it, so limit ->max_segments to 1 lying within + * a single page, as a one page request is never in violation. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } } mddev->degraded = 0; @@ -2117,11 +2075,11 @@ static int run(mddev_t *mddev) mddev->recovery_cp = MaxSector; if (mddev->recovery_cp != MaxSector) - printk(KERN_NOTICE "raid1: %s is not clean" + printk(KERN_NOTICE "md/raid1:%s: not clean" " -- starting background reconstruction\n", mdname(mddev)); printk(KERN_INFO - "raid1: raid set %s active with %d out of %d mirrors\n", + "md/raid1:%s: active with %d out of %d mirrors\n", mdname(mddev), mddev->raid_disks - mddev->degraded, mddev->raid_disks); @@ -2145,15 +2103,14 @@ static int stop(mddev_t *mddev) { conf_t *conf = mddev->private; struct bitmap *bitmap = mddev->bitmap; - int behind_wait = 0; /* wait for behind writes to complete */ - while (bitmap && atomic_read(&bitmap->behind_writes) > 0) { - behind_wait++; - printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); /* wait a second */ + if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { + printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n", + mdname(mddev)); /* need to kick something here to make sure I/O goes? */ + wait_event(bitmap->behind_wait, + atomic_read(&bitmap->behind_writes) == 0); } raise_barrier(conf); @@ -2184,7 +2141,6 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) if (mddev->array_sectors > raid1_size(mddev, sectors, 0)) return -EINVAL; set_capacity(mddev->gendisk, mddev->array_sectors); - mddev->changed = 1; revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { @@ -2279,9 +2235,9 @@ static int raid1_reshape(mddev_t *mddev) if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) printk(KERN_WARNING - "md/raid1: cannot register " - "%s for %s\n", - nm, mdname(mddev)); + "md/raid1:%s: cannot register " + "%s\n", + mdname(mddev), nm); } if (rdev) newmirrors[d2++].rdev = rdev; |