From 070ec55d07157a3041f92654135c3c6e2eaaf901 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:54:21 +1000 Subject: md: remove mddev_to_conf "helper" macro Having a macro just to cast a void* isn't really helpful. I would must rather see that we are simply de-referencing ->private, than have to know what the macro does. So open code the macro everywhere and remove the pointless cast. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bef876698232..7fb97c65ad37 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3284,7 +3284,7 @@ static void activate_bit_delay(raid5_conf_t *conf) static void unplug_slaves(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -3308,7 +3308,7 @@ static void unplug_slaves(mddev_t *mddev) static void raid5_unplug_device(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); @@ -3327,7 +3327,7 @@ static void raid5_unplug_device(struct request_queue *q) static int raid5_congested(void *data, int bits) { mddev_t *mddev = data; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; /* No difference between reads and writes. Just check * how busy the stripe_cache is @@ -3440,7 +3440,7 @@ static void raid5_align_endio(struct bio *bi, int error) bio_put(bi); mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; - conf = mddev_to_conf(mddev); + conf = mddev->private; rdev = (void*)raid_bi->bi_next; raid_bi->bi_next = NULL; @@ -3482,7 +3482,7 @@ static int bio_fits_rdev(struct bio *bi) static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned int dd_idx; struct bio* align_bi; mdk_rdev_t *rdev; @@ -3599,7 +3599,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) static int make_request(struct request_queue *q, struct bio * bi) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int dd_idx; sector_t new_sector; sector_t logical_sector, last_sector; @@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) static void raid5d(mddev_t *mddev) { struct stripe_head *sh; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int handled; pr_debug("+++ raid5d active\n"); @@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev) static ssize_t raid5_show_stripe_cache_size(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->max_nr_stripes); else @@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) static ssize_t raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; int err; @@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, static ssize_t raid5_show_preread_threshold(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->bypass_threshold); else @@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page) static ssize_t raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; if (len >= PAGE_SIZE) return -EINVAL; @@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, static ssize_t stripe_cache_active_show(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); else @@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = { static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!sectors) sectors = mddev->dev_sectors; @@ -4845,7 +4845,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) static int raid5_check_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && @@ -4890,7 +4890,7 @@ static int raid5_check_reshape(mddev_t *mddev) static int raid5_start_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; mdk_rdev_t *rdev; int spares = 0; int added_devices = 0; @@ -5022,7 +5022,7 @@ static void end_reshape(raid5_conf_t *conf) static void raid5_finish_reshape(mddev_t *mddev) { struct block_device *bdev; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5061,7 +5061,7 @@ static void raid5_finish_reshape(mddev_t *mddev) static void raid5_quiesce(mddev_t *mddev, int state) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; switch(state) { case 2: /* resume for a suspend */ @@ -5157,7 +5157,7 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) * For larger arrays we record the new value - after validation * to be used by a reshape pass. */ - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) return -EINVAL; -- cgit v1.2.3 From 740da44918680a0c72411ae4ccdd1861069afcc4 Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:01:36 +1000 Subject: md: raid5: chunk size check in setup_conf have raid5 check chunk size in run/reshape method instead of in md Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7fb97c65ad37..be4e62f611bc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4336,7 +4336,8 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) return ERR_PTR(-EINVAL); } - if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { + if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE || + !is_power_of_2(mddev->new_chunk)) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", mddev->new_chunk, mdname(mddev)); return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 9d8f0363623b3da12c43007cf77f5e1a4e8a5964 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:45:01 +1000 Subject: md: Make mddev->chunk_size sector-based. This patch renames the chunk_size field to chunk_sectors with the implied change of semantics. Since is_power_of_2(chunk_size) = is_power_of_2(chunk_sectors << 9) = is_power_of_2(chunk_sectors) these bits don't need an adjustment for the shift. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 +- drivers/md/md.c | 51 ++++++++++++++++++++++++++------------------------- drivers/md/md.h | 2 +- drivers/md/raid0.c | 27 ++++++++++++++------------- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 15 ++++++++------- drivers/md/raid5.c | 41 ++++++++++++++++++++++------------------- 7 files changed, 74 insertions(+), 68 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 9b02a73fbc6b..9f7cec42dd8e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -305,7 +305,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) static void linear_status (struct seq_file *seq, mddev_t *mddev) { - seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } diff --git a/drivers/md/md.c b/drivers/md/md.c index a02bde70874b..abcc0fef30e3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -869,7 +869,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->minor_version = sb->minor_version; mddev->patch_version = sb->patch_version; mddev->external = 0; - mddev->chunk_size = sb->chunk_size; + mddev->chunk_sectors = sb->chunk_size >> 9; mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; @@ -892,7 +892,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; } if (sb->state & (1<recovery_cp = 0; sb->layout = mddev->layout; - sb->chunk_size = mddev->chunk_size; + sb->chunk_size = mddev->chunk_sectors << 9; if (mddev->bitmap && mddev->bitmap_file == NULL) sb->state |= (1<major_version = 1; mddev->patch_version = 0; mddev->external = 0; - mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; + mddev->chunk_sectors = le32_to_cpu(sb->chunksize); mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); mddev->level = le32_to_cpu(sb->level); @@ -1310,7 +1310,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; } } else if (mddev->pers == NULL) { @@ -1382,7 +1382,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); - sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -2753,7 +2753,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (IS_ERR(priv)) { mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; mddev->raid_disks -= mddev->delta_disks; mddev->delta_disks = 0; module_put(pers->owner); @@ -2771,7 +2771,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->level = mddev->new_level; mddev->layout = mddev->new_layout; - mddev->chunk_size = mddev->new_chunk; + mddev->chunk_sectors = mddev->new_chunk >> 9; mddev->delta_disks = 0; pers->run(mddev); mddev_resume(mddev); @@ -2864,10 +2864,10 @@ static ssize_t chunk_size_show(mddev_t *mddev, char *page) { if (mddev->reshape_position != MaxSector && - mddev->chunk_size != mddev->new_chunk) + mddev->chunk_sectors << 9 != mddev->new_chunk) return sprintf(page, "%d (%d)\n", mddev->new_chunk, - mddev->chunk_size); - return sprintf(page, "%d\n", mddev->chunk_size); + mddev->chunk_sectors << 9); + return sprintf(page, "%d\n", mddev->chunk_sectors << 9); } static ssize_t @@ -2889,7 +2889,7 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) } else { mddev->new_chunk = n; if (mddev->reshape_position == MaxSector) - mddev->chunk_size = n; + mddev->chunk_sectors = n >> 9; } return len; } @@ -3534,9 +3534,9 @@ min_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { + if (mddev->chunk_sectors) { sector_t temp = min; - if (sector_div(temp, (mddev->chunk_size>>9))) + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_min = min; @@ -3572,9 +3572,9 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { + if (mddev->chunk_sectors) { sector_t temp = max; - if (sector_div(temp, (mddev->chunk_size>>9))) + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_max = max; @@ -3665,7 +3665,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; return len; } @@ -4007,7 +4007,7 @@ static int do_md_run(mddev_t * mddev) analyze_sbs(mddev); } - chunk_size = mddev->chunk_size; + chunk_size = mddev->chunk_sectors << 9; if (chunk_size) { if (chunk_size > MAX_CHUNK_SIZE) { @@ -4406,7 +4406,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->flags = 0; mddev->ro = 0; mddev->metadata_type[0] = 0; - mddev->chunk_size = 0; + mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; mddev->layout = 0; mddev->max_disks = 0; @@ -4619,7 +4619,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.spare_disks = spare; info.layout = mddev->layout; - info.chunk_size = mddev->chunk_size; + info.chunk_size = mddev->chunk_sectors << 9; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; @@ -4844,7 +4844,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; } else rdev->sb_start = calc_dev_sboffset(rdev->bdev); - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = calc_num_sectors(rdev, + mddev->chunk_sectors << 9); err = bind_rdev_to_array(rdev, mddev); if (err) { @@ -4914,7 +4915,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) else rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = calc_num_sectors(rdev, mddev->chunk_sectors << 9); if (test_bit(Faulty, &rdev->flags)) { printk(KERN_WARNING @@ -5063,7 +5064,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->external = 0; mddev->layout = info->layout; - mddev->chunk_size = info->chunk_size; + mddev->chunk_sectors = info->chunk_size >> 9; mddev->max_disks = MD_SB_DISKS; @@ -5082,7 +5083,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) get_random_bytes(mddev->uuid, 16); mddev->new_level = mddev->level; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; mddev->new_layout = mddev->layout; mddev->delta_disks = 0; @@ -5192,7 +5193,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) mddev->level != info->level || /* mddev->layout != info->layout || */ !mddev->persistent != info->not_persistent|| - mddev->chunk_size != info->chunk_size || + mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) ) diff --git a/drivers/md/md.h b/drivers/md/md.h index 8227ab909d44..5d78830043d0 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -145,7 +145,7 @@ struct mddev_s int external; /* metadata is * managed externally */ char metadata_type[17]; /* externally set*/ - int chunk_size; + int chunk_sectors; time_t ctime, utime; int level, layout; char clevel[16]; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7cd2671cc794..f20b18ff7969 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -238,10 +238,10 @@ static int create_strip_zones(mddev_t *mddev) * now since we have the hard sector sizes, we can make sure * chunk size is a multiple of that sector size */ - if (mddev->chunk_size % queue_logical_block_size(mddev->queue)) { + if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { printk(KERN_ERR "%s chunk_size of %d not valid\n", mdname(mddev), - mddev->chunk_size); + mddev->chunk_sectors << 9); goto abort; } printk(KERN_INFO "raid0: done.\n"); @@ -270,10 +270,10 @@ static int raid0_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - if (is_power_of_2(mddev->chunk_size)) + if (is_power_of_2(mddev->chunk_sectors)) max = (chunk_sectors - ((sector & (chunk_sectors-1)) + bio_sectors)) << 9; else @@ -304,11 +304,11 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0) { + if (mddev->chunk_sectors == 0) { printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } - blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); + blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; ret = create_strip_zones(mddev); @@ -330,7 +330,8 @@ static int raid0_run(mddev_t *mddev) * chunksize should be used in that case. */ { - int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; + int stripe = mddev->raid_disks * + (mddev->chunk_sectors << 9) / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } @@ -381,9 +382,9 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, unsigned int sect_in_chunk; sector_t chunk; raid0_conf_t *conf = mddev->private; - unsigned int chunk_sects = mddev->chunk_size >> 9; + unsigned int chunk_sects = mddev->chunk_sectors; - if (is_power_of_2(mddev->chunk_size)) { + if (is_power_of_2(mddev->chunk_sectors)) { int chunksect_bits = ffz(~chunk_sects); /* find the sector offset inside the chunk */ sect_in_chunk = sector & (chunk_sects - 1); @@ -413,7 +414,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, static inline int is_io_in_chunk_boundary(mddev_t *mddev, unsigned int chunk_sects, struct bio *bio) { - if (likely(is_power_of_2(mddev->chunk_size))) { + if (likely(is_power_of_2(mddev->chunk_sectors))) { return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + (bio->bi_size >> 9)); } else{ @@ -444,7 +445,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); - chunk_sects = mddev->chunk_size >> 9; + chunk_sects = mddev->chunk_sectors; if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { sector_t sector = bio->bi_sector; struct bio_pair *bp; @@ -455,7 +456,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - if (likely(is_power_of_2(mddev->chunk_size))) + if (likely(is_power_of_2(mddev->chunk_sectors))) bp = bio_split(bio, chunk_sects - (sector & (chunk_sects-1))); else @@ -519,7 +520,7 @@ static void raid0_status(struct seq_file *seq, mddev_t *mddev) zone_start = conf->strip_zone[j].zone_end; } #endif - seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); return; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5ea5bca53a5e..388635735ae5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2161,10 +2161,10 @@ static int raid1_reshape(mddev_t *mddev) int d, d2, err; /* Cannot change chunk_size, layout, or level */ - if (mddev->chunk_size != mddev->new_chunk || + if (mddev->chunk_sectors << 9 != mddev->new_chunk || mddev->layout != mddev->new_layout || mddev->level != mddev->new_level) { - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; mddev->new_layout = mddev->layout; mddev->new_level = mddev->level; return -EINVAL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 06bef686f91b..30029a312cf5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -461,7 +461,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; @@ -985,7 +985,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) int i; if (conf->near_copies < conf->raid_disks) - seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); if (conf->near_copies > 1) seq_printf(seq, " %d near-copies", conf->near_copies); if (conf->far_copies > 1) { @@ -2050,8 +2050,8 @@ static int run(mddev_t *mddev) int nc, fc, fo; sector_t stride, size; - if (mddev->chunk_size < PAGE_SIZE || - !is_power_of_2(mddev->chunk_size)) { + if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || + !is_power_of_2(mddev->chunk_sectors)) { printk(KERN_ERR "md/raid10: chunk size must be " "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); return -EINVAL; @@ -2096,8 +2096,8 @@ static int run(mddev_t *mddev) conf->far_copies = fc; conf->copies = nc*fc; conf->far_offset = fo; - conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; - conf->chunk_shift = ffz(~mddev->chunk_size) - 9; + conf->chunk_mask = mddev->chunk_sectors - 1; + conf->chunk_shift = ffz(~mddev->chunk_sectors); size = mddev->dev_sectors >> conf->chunk_shift; sector_div(size, fc); size = size * conf->raid_disks; @@ -2205,7 +2205,8 @@ static int run(mddev_t *mddev) * maybe... */ { - int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); + int stripe = conf->raid_disks * + ((mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index be4e62f611bc..1e4fd5e8bfdd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3352,13 +3352,13 @@ static int raid5_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ - if (mddev->new_chunk < mddev->chunk_size) + if (mddev->new_chunk < mddev->chunk_sectors << 9) chunk_sectors = mddev->new_chunk >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; @@ -3372,10 +3372,10 @@ static int raid5_mergeable_bvec(struct request_queue *q, static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) { sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bio->bi_size >> 9; - if (mddev->new_chunk < mddev->chunk_size) + if (mddev->new_chunk < mddev->chunk_sectors << 9) chunk_sectors = mddev->new_chunk >> 9; return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); @@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * If old and new chunk sizes differ, we need to process the * largest of these */ - if (mddev->new_chunk > mddev->chunk_size) + if (mddev->new_chunk > mddev->chunk_sectors << 9) reshape_sectors = mddev->new_chunk / 512; else - reshape_sectors = mddev->chunk_size / 512; + reshape_sectors = mddev->chunk_sectors; /* we update the metadata when there is more than 3Meg * in the block range (that is rather arbitrary, should @@ -4303,7 +4303,7 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) raid_disks = conf->previous_raid_disks; } - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); sectors &= ~((sector_t)mddev->new_chunk/512 - 1); return sectors * (raid_disks - conf->max_degraded); } @@ -4412,7 +4412,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->max_nr_stripes = NR_STRIPES; conf->reshape_progress = mddev->reshape_position; if (conf->reshape_progress != MaxSector) { - conf->prev_chunk = mddev->chunk_size; + conf->prev_chunk = mddev->chunk_sectors << 9; conf->prev_algo = mddev->layout; } @@ -4484,7 +4484,7 @@ static int run(mddev_t *mddev) } /* here_new is the stripe we will write to */ here_old = mddev->reshape_position; - sector_div(here_old, (mddev->chunk_size>>9)* + sector_div(here_old, mddev->chunk_sectors * (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ @@ -4499,7 +4499,7 @@ static int run(mddev_t *mddev) } else { BUG_ON(mddev->level != mddev->new_level); BUG_ON(mddev->layout != mddev->new_layout); - BUG_ON(mddev->chunk_size != mddev->new_chunk); + BUG_ON(mddev->chunk_sectors << 9 != mddev->new_chunk); BUG_ON(mddev->delta_disks != 0); } @@ -4533,7 +4533,7 @@ static int run(mddev_t *mddev) } /* device size must be a multiple of chunk size */ - mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); + mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > 0 && @@ -4582,7 +4582,7 @@ static int run(mddev_t *mddev) { int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * - (mddev->chunk_size / PAGE_SIZE); + ((mddev->chunk_sectors << 9) / PAGE_SIZE); if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -4679,7 +4679,8 @@ static void status(struct seq_file *seq, mddev_t *mddev) raid5_conf_t *conf = (raid5_conf_t *) mddev->private; int i; - seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout); + seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, + mddev->chunk_sectors / 2, mddev->layout); seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", @@ -4827,7 +4828,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) * any io in the removed space completes, but it hardly seems * worth it. */ - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); md_set_array_sectors(mddev, raid5_size(mddev, sectors, mddev->raid_disks)); if (mddev->array_sectors > @@ -4850,7 +4851,7 @@ static int raid5_check_reshape(mddev_t *mddev) if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && - mddev->new_chunk == mddev->chunk_size) + mddev->new_chunk == mddev->chunk_sectors << 9) return -EINVAL; /* nothing to do */ if (mddev->bitmap) /* Cannot grow a bitmap yet */ @@ -4878,10 +4879,11 @@ static int raid5_check_reshape(mddev_t *mddev) * If the chunk size is greater, user-space should request more * stripe_heads first. */ - if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || + if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes || (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", - (max(mddev->chunk_size, mddev->new_chunk) + (max(mddev->chunk_sectors << 9, mddev->new_chunk) / STRIPE_SIZE)*4); return -ENOSPC; } @@ -5054,7 +5056,7 @@ static void raid5_finish_reshape(mddev_t *mddev) raid5_remove_disk(mddev, d); } mddev->layout = conf->algorithm; - mddev->chunk_size = conf->chunk_size; + mddev->chunk_sectors = conf->chunk_size >> 9; mddev->reshape_position = MaxSector; mddev->delta_disks = 0; } @@ -5183,7 +5185,8 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) } if (new_chunk > 0) { conf->chunk_size = new_chunk; - mddev->chunk_size = mddev->new_chunk = new_chunk; + mddev->new_chunk = new_chunk; + mddev->chunk_sectors = new_chunk >> 9; } set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); -- cgit v1.2.3 From 664e7c413f1e90eceb0b2596dd73a0832faec058 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:45:27 +1000 Subject: md: Convert mddev->new_chunk to sectors. A straight-forward conversion which gets rid of some multiplications/divisions/shifts. The patch also introduces a couple of new ones, most of which are due to conf->chunk_size still being represented in bytes. This will be cleaned up in subsequent patches. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/md.c | 29 +++++++++++++++-------------- drivers/md/md.h | 3 ++- drivers/md/raid1.c | 4 ++-- drivers/md/raid5.c | 45 ++++++++++++++++++++++++--------------------- 4 files changed, 43 insertions(+), 38 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index abcc0fef30e3..f996d8342a85 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -886,13 +886,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = sb->delta_disks; mddev->new_level = sb->new_level; mddev->new_layout = sb->new_layout; - mddev->new_chunk = sb->new_chunk; + mddev->new_chunk_sectors = sb->new_chunk >> 9; } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_sectors << 9; + mddev->new_chunk_sectors = mddev->chunk_sectors; } if (sb->state & (1<new_level = mddev->new_level; sb->delta_disks = mddev->delta_disks; sb->new_layout = mddev->new_layout; - sb->new_chunk = mddev->new_chunk; + sb->new_chunk = mddev->new_chunk_sectors << 9; } mddev->minor_version = sb->minor_version; if (mddev->in_sync) @@ -1304,13 +1304,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = le32_to_cpu(sb->delta_disks); mddev->new_level = le32_to_cpu(sb->new_level); mddev->new_layout = le32_to_cpu(sb->new_layout); - mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; + mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk); } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_sectors << 9; + mddev->new_chunk_sectors = mddev->chunk_sectors; } } else if (mddev->pers == NULL) { @@ -1409,7 +1409,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_layout = cpu_to_le32(mddev->new_layout); sb->delta_disks = cpu_to_le32(mddev->delta_disks); sb->new_level = cpu_to_le32(mddev->new_level); - sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); + sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); } max_dev = 0; @@ -2753,7 +2753,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (IS_ERR(priv)) { mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_sectors << 9; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->raid_disks -= mddev->delta_disks; mddev->delta_disks = 0; module_put(pers->owner); @@ -2771,7 +2771,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->level = mddev->new_level; mddev->layout = mddev->new_layout; - mddev->chunk_sectors = mddev->new_chunk >> 9; + mddev->chunk_sectors = mddev->new_chunk_sectors; mddev->delta_disks = 0; pers->run(mddev); mddev_resume(mddev); @@ -2864,8 +2864,9 @@ static ssize_t chunk_size_show(mddev_t *mddev, char *page) { if (mddev->reshape_position != MaxSector && - mddev->chunk_sectors << 9 != mddev->new_chunk) - return sprintf(page, "%d (%d)\n", mddev->new_chunk, + mddev->chunk_sectors != mddev->new_chunk_sectors) + return sprintf(page, "%d (%d)\n", + mddev->new_chunk_sectors << 9, mddev->chunk_sectors << 9); return sprintf(page, "%d\n", mddev->chunk_sectors << 9); } @@ -2887,7 +2888,7 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) if (err) return err; } else { - mddev->new_chunk = n; + mddev->new_chunk_sectors = n >> 9; if (mddev->reshape_position == MaxSector) mddev->chunk_sectors = n >> 9; } @@ -3665,7 +3666,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_sectors << 9; + mddev->new_chunk_sectors = mddev->chunk_sectors; return len; } @@ -4414,7 +4415,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->delta_disks = 0; mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; - mddev->new_chunk = 0; + mddev->new_chunk_sectors = 0; mddev->curr_resync = 0; mddev->resync_mismatches = 0; mddev->suspend_lo = mddev->suspend_hi = 0; @@ -5083,7 +5084,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) get_random_bytes(mddev->uuid, 16); mddev->new_level = mddev->level; - mddev->new_chunk = mddev->chunk_sectors << 9; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_layout = mddev->layout; mddev->delta_disks = 0; diff --git a/drivers/md/md.h b/drivers/md/md.h index 5d78830043d0..e0a2b8e3985d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -166,7 +166,8 @@ struct mddev_s * If reshape_position is MaxSector, then no reshape is happening (yet). */ sector_t reshape_position; - int delta_disks, new_level, new_layout, new_chunk; + int delta_disks, new_level, new_layout; + int new_chunk_sectors; struct mdk_thread_s *thread; /* management thread */ struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 388635735ae5..12f8f34f17ae 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2161,10 +2161,10 @@ static int raid1_reshape(mddev_t *mddev) int d, d2, err; /* Cannot change chunk_size, layout, or level */ - if (mddev->chunk_sectors << 9 != mddev->new_chunk || + if (mddev->chunk_sectors != mddev->new_chunk_sectors || mddev->layout != mddev->new_layout || mddev->level != mddev->new_level) { - mddev->new_chunk = mddev->chunk_sectors << 9; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_layout = mddev->layout; mddev->new_level = mddev->level; return -EINVAL; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1e4fd5e8bfdd..bc3564cfbba0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3358,8 +3358,8 @@ static int raid5_mergeable_bvec(struct request_queue *q, if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ - if (mddev->new_chunk < mddev->chunk_sectors << 9) - chunk_sectors = mddev->new_chunk >> 9; + if (mddev->new_chunk_sectors < mddev->chunk_sectors) + chunk_sectors = mddev->new_chunk_sectors; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; if (max <= biovec->bv_len && bio_sectors == 0) @@ -3375,8 +3375,8 @@ static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bio->bi_size >> 9; - if (mddev->new_chunk < mddev->chunk_sectors << 9) - chunk_sectors = mddev->new_chunk >> 9; + if (mddev->new_chunk_sectors < mddev->chunk_sectors) + chunk_sectors = mddev->new_chunk_sectors; return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); } @@ -3791,8 +3791,8 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * If old and new chunk sizes differ, we need to process the * largest of these */ - if (mddev->new_chunk > mddev->chunk_sectors << 9) - reshape_sectors = mddev->new_chunk / 512; + if (mddev->new_chunk_sectors > mddev->chunk_sectors) + reshape_sectors = mddev->new_chunk_sectors; else reshape_sectors = mddev->chunk_sectors; @@ -4304,7 +4304,7 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) } sectors &= ~((sector_t)mddev->chunk_sectors - 1); - sectors &= ~((sector_t)mddev->new_chunk/512 - 1); + sectors &= ~((sector_t)mddev->new_chunk_sectors - 1); return sectors * (raid_disks - conf->max_degraded); } @@ -4336,10 +4336,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) return ERR_PTR(-EINVAL); } - if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE || - !is_power_of_2(mddev->new_chunk)) { + if (!mddev->new_chunk_sectors || + (mddev->new_chunk_sectors << 9) % PAGE_SIZE || + !is_power_of_2(mddev->new_chunk_sectors)) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", - mddev->new_chunk, mdname(mddev)); + mddev->new_chunk_sectors << 9, mdname(mddev)); return ERR_PTR(-EINVAL); } @@ -4402,7 +4403,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->fullsync = 1; } - conf->chunk_size = mddev->new_chunk; + conf->chunk_size = mddev->new_chunk_sectors << 9; conf->level = mddev->new_level; if (conf->level == 6) conf->max_degraded = 2; @@ -4476,7 +4477,7 @@ static int run(mddev_t *mddev) * geometry. */ here_new = mddev->reshape_position; - if (sector_div(here_new, (mddev->new_chunk>>9)* + if (sector_div(here_new, mddev->new_chunk_sectors * (mddev->raid_disks - max_degraded))) { printk(KERN_ERR "raid5: reshape_position not " "on a stripe boundary\n"); @@ -4499,7 +4500,7 @@ static int run(mddev_t *mddev) } else { BUG_ON(mddev->level != mddev->new_level); BUG_ON(mddev->layout != mddev->new_layout); - BUG_ON(mddev->chunk_sectors << 9 != mddev->new_chunk); + BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); BUG_ON(mddev->delta_disks != 0); } @@ -4851,7 +4852,7 @@ static int raid5_check_reshape(mddev_t *mddev) if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && - mddev->new_chunk == mddev->chunk_sectors << 9) + mddev->new_chunk_sectors == mddev->chunk_sectors) return -EINVAL; /* nothing to do */ if (mddev->bitmap) /* Cannot grow a bitmap yet */ @@ -4881,9 +4882,11 @@ static int raid5_check_reshape(mddev_t *mddev) */ if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 > conf->max_nr_stripes || - (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes) { printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", - (max(mddev->chunk_sectors << 9, mddev->new_chunk) + (max(mddev->chunk_sectors << 9, + mddev->new_chunk_sectors << 9) / STRIPE_SIZE)*4); return -ENOSPC; } @@ -4929,7 +4932,7 @@ static int raid5_start_reshape(mddev_t *mddev) conf->previous_raid_disks = conf->raid_disks; conf->raid_disks += mddev->delta_disks; conf->prev_chunk = conf->chunk_size; - conf->chunk_size = mddev->new_chunk; + conf->chunk_size = mddev->new_chunk_sectors << 9; conf->prev_algo = conf->algorithm; conf->algorithm = mddev->new_layout; if (mddev->delta_disks < 0) @@ -5114,7 +5117,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev) mddev->new_level = 5; mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; - mddev->new_chunk = chunksect << 9; + mddev->new_chunk_sectors = chunksect; return setup_conf(mddev); } @@ -5185,7 +5188,7 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) } if (new_chunk > 0) { conf->chunk_size = new_chunk; - mddev->new_chunk = new_chunk; + mddev->new_chunk_sectors = new_chunk >> 9; mddev->chunk_sectors = new_chunk >> 9; } set_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -5194,7 +5197,7 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) if (new_layout >= 0) mddev->new_layout = new_layout; if (new_chunk > 0) - mddev->new_chunk = new_chunk; + mddev->new_chunk_sectors = new_chunk >> 9; } return 0; } @@ -5219,7 +5222,7 @@ static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) if (new_layout >= 0) mddev->new_layout = new_layout; if (new_chunk > 0) - mddev->new_chunk = new_chunk; + mddev->new_chunk_sectors = new_chunk >> 9; return 0; } -- cgit v1.2.3 From 09c9e5fa1b93ad5b81c9dcf8ce3a5b9ae2ac31e4 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:45:55 +1000 Subject: md: convert conf->chunk_size and conf->prev_chunk to sectors. This kills some more shifts. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid5.c | 27 +++++++++++++-------------- drivers/md/raid5.h | 6 ++++-- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bc3564cfbba0..eaa2d3ee2b5d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1274,8 +1274,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, sector_t new_sector; int algorithm = previous ? conf->prev_algo : conf->algorithm; - int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + int sectors_per_chunk = previous ? conf->prev_chunk_sectors + : conf->chunk_sectors; int raid_disks = previous ? conf->previous_raid_disks : conf->raid_disks; int data_disks = raid_disks - conf->max_degraded; @@ -1480,8 +1480,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) int raid_disks = sh->disks; int data_disks = raid_disks - conf->max_degraded; sector_t new_sector = sh->sector, check; - int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + int sectors_per_chunk = previous ? conf->prev_chunk_sectors + : conf->chunk_sectors; int algorithm = previous ? conf->prev_algo : conf->algorithm; sector_t stripe; @@ -1997,8 +1997,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, struct stripe_head *sh) { int sectors_per_chunk = - previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + previous ? conf->prev_chunk_sectors : conf->chunk_sectors; int dd_idx; int chunk_offset = sector_div(stripe, sectors_per_chunk); int disks = previous ? conf->previous_raid_disks : conf->raid_disks; @@ -3917,7 +3916,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped 1, &dd_idx, NULL); last_sector = raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) - *(new_data_disks) - 1), + * new_data_disks - 1), 1, &dd_idx, NULL); if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; @@ -4403,7 +4402,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->fullsync = 1; } - conf->chunk_size = mddev->new_chunk_sectors << 9; + conf->chunk_sectors = mddev->new_chunk_sectors; conf->level = mddev->new_level; if (conf->level == 6) conf->max_degraded = 2; @@ -4413,7 +4412,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->max_nr_stripes = NR_STRIPES; conf->reshape_progress = mddev->reshape_position; if (conf->reshape_progress != MaxSector) { - conf->prev_chunk = mddev->chunk_sectors << 9; + conf->prev_chunk_sectors = mddev->chunk_sectors; conf->prev_algo = mddev->layout; } @@ -4931,8 +4930,8 @@ static int raid5_start_reshape(mddev_t *mddev) spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; conf->raid_disks += mddev->delta_disks; - conf->prev_chunk = conf->chunk_size; - conf->chunk_size = mddev->new_chunk_sectors << 9; + conf->prev_chunk_sectors = conf->chunk_sectors; + conf->chunk_sectors = mddev->new_chunk_sectors; conf->prev_algo = conf->algorithm; conf->algorithm = mddev->new_layout; if (mddev->delta_disks < 0) @@ -5014,7 +5013,7 @@ static void end_reshape(raid5_conf_t *conf) */ { int data_disks = conf->raid_disks - conf->max_degraded; - int stripe = data_disks * (conf->chunk_size + int stripe = data_disks * ((conf->chunk_sectors << 9) / PAGE_SIZE); if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; @@ -5059,7 +5058,7 @@ static void raid5_finish_reshape(mddev_t *mddev) raid5_remove_disk(mddev, d); } mddev->layout = conf->algorithm; - mddev->chunk_sectors = conf->chunk_size >> 9; + mddev->chunk_sectors = conf->chunk_sectors; mddev->reshape_position = MaxSector; mddev->delta_disks = 0; } @@ -5187,7 +5186,7 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) mddev->layout = mddev->new_layout = new_layout; } if (new_chunk > 0) { - conf->chunk_size = new_chunk; + conf->chunk_sectors = new_chunk >> 9; mddev->new_chunk_sectors = new_chunk >> 9; mddev->chunk_sectors = new_chunk >> 9; } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 1a25c9e252b4..9459689c4ea0 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -334,7 +334,8 @@ struct raid5_private_data { struct hlist_head *stripe_hashtbl; mddev_t *mddev; struct disk_info *spare; - int chunk_size, level, algorithm; + int chunk_sectors; + int level, algorithm; int max_degraded; int raid_disks; int max_nr_stripes; @@ -350,7 +351,8 @@ struct raid5_private_data { */ sector_t reshape_safe; int previous_raid_disks; - int prev_chunk, prev_algo; + int prev_chunk_sectors; + int prev_algo; short generation; /* increments with every reshape */ unsigned long reshape_checkpoint; /* Time we last updated * metadata */ -- cgit v1.2.3 From 0ba459d26260d4d13346c76642f461b2bf607eef Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:46:10 +1000 Subject: md/raid5: Use is_power_of_2() in raid5_reconfig()/raid6_reconfig(). Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid5.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index eaa2d3ee2b5d..72e8a324dcfb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5167,8 +5167,7 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) return -EINVAL; if (new_chunk > 0) { - if (new_chunk & (new_chunk-1)) - /* not a power of 2 */ + if (!is_power_of_2(new_chunk)) return -EINVAL; if (new_chunk < PAGE_SIZE) return -EINVAL; @@ -5206,8 +5205,7 @@ static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) return -EINVAL; if (new_chunk > 0) { - if (new_chunk & (new_chunk-1)) - /* not a power of 2 */ + if (!is_power_of_2(new_chunk)) return -EINVAL; if (new_chunk < PAGE_SIZE) return -EINVAL; -- cgit v1.2.3 From cdc2ae6d6a30df8fd92c5e300d0e3005e13eb6b0 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:46:47 +1000 Subject: md: fix some comments. 1/ Raid5 has learned to take over also raid4 and raid6 arrays. 2/ new_chunk in mdp_superblock_1 is in sectors, not bytes. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 -- include/linux/raid/md_p.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 72e8a324dcfb..009344156751 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5231,8 +5231,6 @@ static void *raid5_takeover(mddev_t *mddev) * raid1 - if there are two drives. We need to know the chunk size * raid4 - trivial - just use a raid4 layout. * raid6 - Providing it is a *_6 layout - * - * For now, just do raid1 */ if (mddev->level == 1) diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 6ba830fa8538..ffa2efbbe382 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -232,7 +232,7 @@ struct mdp_superblock_1 { __le64 reshape_position; /* next address in array-space for reshape */ __le32 delta_disks; /* change in number of raid_disks */ __le32 new_layout; /* new layout */ - __le32 new_chunk; /* new chunk size (bytes) */ + __le32 new_chunk; /* new chunk size (512byte sectors) */ __u8 pad1[128-124]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ -- cgit v1.2.3 From 01ee22b496c41384eaa6dcae983c86d8bc32fbb8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:47:20 +1000 Subject: md: raid5: check stripe cache is large enough in start_reshape In reshape cases that do not change the number of devices, start_reshape is called without first calling check_reshape. Currently, the check that the stripe_cache is large enough is only done in check_reshape. It should be in start_reshape too. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 009344156751..b84766e347c3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4845,6 +4845,29 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return 0; } +static int check_stripe_cache(mddev_t *mddev) +{ + /* Can only proceed if there are plenty of stripe_heads. + * We need a minimum of one full stripe,, and for sensible progress + * it is best to have about 4 times that. + * If we require 4 times, then the default 256 4K stripe_heads will + * allow for chunk sizes up to 256K, which is probably OK. + * If the chunk size is greater, user-space should request more + * stripe_heads first. + */ + raid5_conf_t *conf = mddev->private; + if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes || + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes) { + printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", + ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) + / STRIPE_SIZE)*4); + return 0; + } + return 1; +} + static int raid5_check_reshape(mddev_t *mddev) { raid5_conf_t *conf = mddev->private; @@ -4871,24 +4894,8 @@ static int raid5_check_reshape(mddev_t *mddev) return -EINVAL; } - /* Can only proceed if there are plenty of stripe_heads. - * We need a minimum of one full stripe,, and for sensible progress - * it is best to have about 4 times that. - * If we require 4 times, then the default 256 4K stripe_heads will - * allow for chunk sizes up to 256K, which is probably OK. - * If the chunk size is greater, user-space should request more - * stripe_heads first. - */ - if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 - > conf->max_nr_stripes || - ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 - > conf->max_nr_stripes) { - printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", - (max(mddev->chunk_sectors << 9, - mddev->new_chunk_sectors << 9) - / STRIPE_SIZE)*4); + if (!check_stripe_cache(mddev)) return -ENOSPC; - } return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); } @@ -4904,6 +4911,9 @@ static int raid5_start_reshape(mddev_t *mddev) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; + if (!check_stripe_cache(mddev)) + return -ENOSPC; + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) -- cgit v1.2.3 From 597a711b69cfff95c4b8f6069037e7ad3fc71f56 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:47:42 +1000 Subject: md: remove unnecessary arguments from ->reconfig method. Passing the new layout and chunksize as args is not necessary as the mddev has fields for new_check and new_layout. This is preparation for combining the check_reshape and reconfig methods Signed-off-by: NeilBrown --- drivers/md/faulty.c | 13 +++++++------ drivers/md/md.c | 23 +++++++++++++++++------ drivers/md/md.h | 2 +- drivers/md/raid5.c | 42 ++++++++++++++++-------------------------- 4 files changed, 41 insertions(+), 39 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 8695809b24b0..6513b7b3e379 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -255,14 +255,14 @@ static void status(struct seq_file *seq, mddev_t *mddev) } -static int reconfig(mddev_t *mddev, int layout, int chunk_size) +static int reconfig(mddev_t *mddev) { - int mode = layout & ModeMask; - int count = layout >> ModeShift; + int mode = mddev->new_layout & ModeMask; + int count = mddev->new_layout >> ModeShift; conf_t *conf = mddev->private; - if (chunk_size != -1) - return -EINVAL; + if (mddev->new_layout < 0) + return 0; /* new layout */ if (mode == ClearFaults) @@ -279,6 +279,7 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) atomic_set(&conf->counters[mode], count); } else return -EINVAL; + mddev->new_layout = -1; mddev->layout = -1; /* makes sure further changes come through */ return 0; } @@ -315,7 +316,7 @@ static int run(mddev_t *mddev) md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); mddev->private = conf; - reconfig(mddev, mddev->layout, -1); + reconfig(mddev); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index f996d8342a85..5caa421c2367 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2809,9 +2809,12 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) int err; if (mddev->pers->reconfig == NULL) return -EBUSY; - err = mddev->pers->reconfig(mddev, n, -1); - if (err) + mddev->new_layout = n; + err = mddev->pers->reconfig(mddev); + if (err) { + mddev->new_layout = mddev->layout; return err; + } } else { mddev->new_layout = n; if (mddev->reshape_position == MaxSector) @@ -2884,9 +2887,12 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) int err; if (mddev->pers->reconfig == NULL) return -EBUSY; - err = mddev->pers->reconfig(mddev, -1, n); - if (err) + mddev->new_chunk_sectors = n >> 9; + err = mddev->pers->reconfig(mddev); + if (err) { + mddev->new_chunk_sectors = mddev->chunk_sectors; return err; + } } else { mddev->new_chunk_sectors = n >> 9; if (mddev->reshape_position == MaxSector) @@ -5220,8 +5226,13 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) */ if (mddev->pers->reconfig == NULL) return -EINVAL; - else - return mddev->pers->reconfig(mddev, info->layout, -1); + else { + mddev->new_layout = info->layout; + rv = mddev->pers->reconfig(mddev); + if (rv) + mddev->new_layout = mddev->layout; + return rv; + } } if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) rv = update_size(mddev, (sector_t)info->size * 2); diff --git a/drivers/md/md.h b/drivers/md/md.h index e0a2b8e3985d..815013f8da6c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -326,7 +326,7 @@ struct mdk_personality int (*check_reshape) (mddev_t *mddev); int (*start_reshape) (mddev_t *mddev); void (*finish_reshape) (mddev_t *mddev); - int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); + int (*reconfig) (mddev_t *mddev); /* quiesce moves between quiescence states * 0 - fully active * 1 - no new requests allowed diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b84766e347c3..136051bc6725 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5165,7 +5165,7 @@ static void *raid5_takeover_raid6(mddev_t *mddev) } -static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) +static int raid5_reconfig(mddev_t *mddev) { /* For a 2-drive array, the layout and chunk size can be changed * immediately as not restriping is needed. @@ -5173,15 +5173,16 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) * to be used by a reshape pass. */ raid5_conf_t *conf = mddev->private; + int new_chunk = mddev->new_chunk_sectors; - if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) + if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { if (!is_power_of_2(new_chunk)) return -EINVAL; - if (new_chunk < PAGE_SIZE) + if (new_chunk < (PAGE_SIZE>>9)) return -EINVAL; - if (mddev->array_sectors & ((new_chunk>>9)-1)) + if (mddev->array_sectors & (new_chunk-1)) /* not factor of array size */ return -EINVAL; } @@ -5189,48 +5190,37 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) /* They look valid */ if (mddev->raid_disks == 2) { - - if (new_layout >= 0) { - conf->algorithm = new_layout; - mddev->layout = mddev->new_layout = new_layout; + /* can make the change immediately */ + if (mddev->new_layout >= 0) { + conf->algorithm = mddev->new_layout; + mddev->layout = mddev->new_layout; } if (new_chunk > 0) { - conf->chunk_sectors = new_chunk >> 9; - mddev->new_chunk_sectors = new_chunk >> 9; - mddev->chunk_sectors = new_chunk >> 9; + conf->chunk_sectors = new_chunk ; + mddev->chunk_sectors = new_chunk; } set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); - } else { - if (new_layout >= 0) - mddev->new_layout = new_layout; - if (new_chunk > 0) - mddev->new_chunk_sectors = new_chunk >> 9; } return 0; } -static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) +static int raid6_reconfig(mddev_t *mddev) { - if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) + int new_chunk = mddev->new_chunk_sectors; + if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { if (!is_power_of_2(new_chunk)) return -EINVAL; - if (new_chunk < PAGE_SIZE) + if (new_chunk < (PAGE_SIZE >> 9)) return -EINVAL; - if (mddev->array_sectors & ((new_chunk>>9)-1)) + if (mddev->array_sectors & (new_chunk-1)) /* not factor of array size */ return -EINVAL; } /* They look valid */ - - if (new_layout >= 0) - mddev->new_layout = new_layout; - if (new_chunk > 0) - mddev->new_chunk_sectors = new_chunk >> 9; - return 0; } -- cgit v1.2.3 From 50ac168a6e0a061bf5346d53aa9e7beb94c97527 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:47:55 +1000 Subject: md: merge reconfig and check_reshape methods. The difference between these two methods is artificial. Both check that a pending reshape is valid, and perform any aspect of it that can be done immediately. 'reconfig' handles chunk size and layout. 'check_reshape' handles raid_disks. So make them just one method. Signed-off-by: NeilBrown --- drivers/md/faulty.c | 6 +++--- drivers/md/md.c | 15 ++++++++------- drivers/md/md.h | 1 - drivers/md/raid5.c | 17 ++++++++--------- 4 files changed, 19 insertions(+), 20 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 6513b7b3e379..6e83b38d931d 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -255,7 +255,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) } -static int reconfig(mddev_t *mddev) +static int reshape(mddev_t *mddev) { int mode = mddev->new_layout & ModeMask; int count = mddev->new_layout >> ModeShift; @@ -316,7 +316,7 @@ static int run(mddev_t *mddev) md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); mddev->private = conf; - reconfig(mddev); + reshape(mddev); return 0; } @@ -339,7 +339,7 @@ static struct mdk_personality faulty_personality = .run = run, .stop = stop, .status = status, - .reconfig = reconfig, + .check_reshape = reshape, .size = faulty_size, }; diff --git a/drivers/md/md.c b/drivers/md/md.c index 5caa421c2367..80f039ec3ac2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2807,10 +2807,10 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { int err; - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EBUSY; mddev->new_layout = n; - err = mddev->pers->reconfig(mddev); + err = mddev->pers->check_reshape(mddev); if (err) { mddev->new_layout = mddev->layout; return err; @@ -2885,10 +2885,10 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { int err; - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EBUSY; mddev->new_chunk_sectors = n >> 9; - err = mddev->pers->reconfig(mddev); + err = mddev->pers->check_reshape(mddev); if (err) { mddev->new_chunk_sectors = mddev->chunk_sectors; return err; @@ -5224,11 +5224,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) * we don't need to do anything at the md level, the * personality will take care of it all. */ - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EINVAL; else { mddev->new_layout = info->layout; - rv = mddev->pers->reconfig(mddev); + rv = mddev->pers->check_reshape(mddev); if (rv) mddev->new_layout = mddev->layout; return rv; @@ -6731,7 +6731,8 @@ void md_check_recovery(mddev_t *mddev) */ if (mddev->reshape_position != MaxSector) { - if (mddev->pers->check_reshape(mddev) != 0) + if (mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev) != 0) /* Cannot proceed */ goto unlock; set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); diff --git a/drivers/md/md.h b/drivers/md/md.h index 815013f8da6c..bac7c2bf8616 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -326,7 +326,6 @@ struct mdk_personality int (*check_reshape) (mddev_t *mddev); int (*start_reshape) (mddev_t *mddev); void (*finish_reshape) (mddev_t *mddev); - int (*reconfig) (mddev_t *mddev); /* quiesce moves between quiescence states * 0 - fully active * 1 - no new requests allowed diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 136051bc6725..5ea2bdece278 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4868,14 +4868,14 @@ static int check_stripe_cache(mddev_t *mddev) return 1; } -static int raid5_check_reshape(mddev_t *mddev) +static int check_reshape(mddev_t *mddev) { raid5_conf_t *conf = mddev->private; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && mddev->new_chunk_sectors == mddev->chunk_sectors) - return -EINVAL; /* nothing to do */ + return 0; /* nothing to do */ if (mddev->bitmap) /* Cannot grow a bitmap yet */ return -EBUSY; @@ -5165,7 +5165,7 @@ static void *raid5_takeover_raid6(mddev_t *mddev) } -static int raid5_reconfig(mddev_t *mddev) +static int raid5_check_reshape(mddev_t *mddev) { /* For a 2-drive array, the layout and chunk size can be changed * immediately as not restriping is needed. @@ -5202,12 +5202,13 @@ static int raid5_reconfig(mddev_t *mddev) set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); } - return 0; + return check_reshape(mddev); } -static int raid6_reconfig(mddev_t *mddev) +static int raid6_check_reshape(mddev_t *mddev) { int new_chunk = mddev->new_chunk_sectors; + if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { @@ -5221,7 +5222,7 @@ static int raid6_reconfig(mddev_t *mddev) } /* They look valid */ - return 0; + return check_reshape(mddev); } static void *raid5_takeover(mddev_t *mddev) @@ -5312,12 +5313,11 @@ static struct mdk_personality raid6_personality = .sync_request = sync_request, .resize = raid5_resize, .size = raid5_size, - .check_reshape = raid5_check_reshape, + .check_reshape = raid6_check_reshape, .start_reshape = raid5_start_reshape, .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid6_takeover, - .reconfig = raid6_reconfig, }; static struct mdk_personality raid5_personality = { @@ -5340,7 +5340,6 @@ static struct mdk_personality raid5_personality = .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid5_takeover, - .reconfig = raid5_reconfig, }; static struct mdk_personality raid4_personality = -- cgit v1.2.3 From 8c6ac868b107ed50a46204f6d14e2ad9443ff146 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:48:06 +1000 Subject: md: Push down reconstruction log message to personality code. Currently, the md layer checks in analyze_sbs() if the raid level supports reconstruction (mddev->level >= 1) and if reconstruction is in progress (mddev->recovery_cp != MaxSector). Move that printk into the personality code of those raid levels that care (levels 1, 4, 5, 6, 10). Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/md.c | 9 --------- drivers/md/raid1.c | 4 ++++ drivers/md/raid10.c | 4 ++++ drivers/md/raid5.c | 4 ++++ 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 80f039ec3ac2..90147370bfd7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2604,15 +2604,6 @@ static void analyze_sbs(mddev_t * mddev) clear_bit(In_sync, &rdev->flags); } } - - - - if (mddev->recovery_cp != MaxSector && - mddev->level >= 1) - printk(KERN_ERR "md: %s: raid array is not clean" - " -- starting background reconstruction\n", - mdname(mddev)); - } static void md_safemode_timeout(unsigned long data); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 12f8f34f17ae..89939a7aef57 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2052,6 +2052,10 @@ static int run(mddev_t *mddev) goto out_free_conf; } + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid1: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); printk(KERN_INFO "raid1: raid set %s active with %d out of %d mirrors\n", mdname(mddev), mddev->raid_disks - mddev->degraded, diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 30029a312cf5..ae12ceafe10c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2186,6 +2186,10 @@ static int run(mddev_t *mddev) goto out_free_conf; } + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid10: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); printk(KERN_INFO "raid10: raid set %s active with %d out of %d devices\n", mdname(mddev), mddev->raid_disks - mddev->degraded, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5ea2bdece278..8f2e3740a05a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4454,6 +4454,10 @@ static int run(mddev_t *mddev) int working_disks = 0; mdk_rdev_t *rdev; + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid5: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); if (mddev->reshape_position != MaxSector) { /* Check that we can continue the reshape. * Currently only disks can change, it must -- cgit v1.2.3 From 7a3ab908948b6296ee7e81d42f7c176361c51975 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 16 Jun 2009 16:00:33 -0700 Subject: md/raid5: add missing call to schedule() after prepare_to_wait() In the unlikely event that reshape progresses past the current request while it is waiting for a stripe we need to schedule() before retrying for 2 reasons: 1/ Prevent list corruption from duplicated list_add() calls without intervening list_del(). 2/ Give the reshape code a chance to make some progress to resolve the conflict. Cc: Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/raid5.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8f2e3740a05a..59e29c2983cb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3695,6 +3695,7 @@ static int make_request(struct request_queue *q, struct bio * bi) spin_unlock_irq(&conf->device_lock); if (must_retry) { release_stripe(sh); + schedule(); goto retry; } } -- cgit v1.2.3 From 48606a9f2fc034f0b308d088c1f7ab6d407c462c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 09:14:12 +1000 Subject: md/raid5: correctly update sync_completed when we reach max_resync At the end of reshape_request we update cyrr_resync_completed if we are about to pause due to reaching resync_max. However we update it to the wrong value. We need to add the "reshape_sectors" that have just been reshaped. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 59e29c2983cb..f9f991e6e138 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3946,7 +3946,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; - mddev->curr_resync_completed = mddev->curr_resync; + mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); -- cgit v1.2.3 From 8f6c2e4b325a8e9f8f47febb2fd0ed4fae7d45a9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Jul 2009 11:13:45 +1000 Subject: md: Use new topology calls to indicate alignment and I/O sizes Switch MD over to the new disk_stack_limits() function which checks for aligment and adjusts preferred I/O sizes when stacking. Also indicate preferred I/O sizes where applicable. Signed-off-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: NeilBrown --- drivers/md/linear.c | 4 ++-- drivers/md/multipath.c | 7 ++++--- drivers/md/raid0.c | 9 +++++++-- drivers/md/raid1.c | 9 ++++----- drivers/md/raid10.c | 19 +++++++++++++------ drivers/md/raid5.c | 10 +++++++++- 6 files changed, 39 insertions(+), 19 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 15c8b7b25a9b..5810fa906af0 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) rdev->sectors = sectors * mddev->chunk_sectors; } - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index cbe368fa6598..237fe3fd235c 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for (path = first; path <= last; path++) if ((p=conf->multipaths+path)->rdev == NULL) { q = rdev->bdev->bd_disk->queue; - blk_queue_stack_limits(mddev->queue, q); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as @@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev) disk = conf->multipaths + disk_idx; disk->rdev = rdev; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); /* as we don't honour merge_bvec_fn, we must never risk * violating it, not that we ever expect a device with * a merge_bvec_fn to be involved in multipath */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ab4a489d8695..335f490dcad6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev) } dev[j] = rdev1; - blk_queue_stack_limits(mddev->queue, - rdev1->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev1->bdev, + rdev1->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev) mddev->chunk_sectors << 9); goto abort; } + + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + blk_queue_io_opt(mddev->queue, + (mddev->chunk_sectors << 9) * mddev->raid_disks); + printk(KERN_INFO "raid0: done.\n"); mddev->private = conf; return 0; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 89939a7aef57..0569efba0c02 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for (mirror = first; mirror <= last; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev) disk = conf->mirrors + disk_idx; disk->rdev = rdev; - - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ae12ceafe10c..7298a5e5a183 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for ( ; mirror <= last ; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) static int run(mddev_t *mddev) { conf_t *conf; - int i, disk_idx; + int i, disk_idx, chunk_size; mirror_info_t *disk; mdk_rdev_t *rdev; int nc, fc, fo; @@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev) spin_lock_init(&conf->device_lock); mddev->queue->queue_lock = &conf->device_lock; + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + if (conf->raid_disks % conf->near_copies) + blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); + else + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks / conf->near_copies)); + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks @@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev) disk = conf->mirrors + disk_idx; disk->rdev = rdev; - - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f9f991e6e138..92ef9b6abfc7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4452,7 +4452,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) static int run(mddev_t *mddev) { raid5_conf_t *conf; - int working_disks = 0; + int working_disks = 0, chunk_size; mdk_rdev_t *rdev; if (mddev->recovery_cp != MaxSector) @@ -4607,6 +4607,14 @@ static int run(mddev_t *mddev) md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks - conf->max_degraded)); + + list_for_each_entry(rdev, &mddev->disks, same_set) + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); return 0; abort: -- cgit v1.2.3 From a5c308d4d1659b1f4833b863394e3e24cdbdfc6e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 13:15:35 +1000 Subject: md/raid5: suspend shouldn't affect read requests. md allows write to regions on an array to be suspended temporarily. This allows user-space to participate is aspects of reshape. In particular, data can be copied with not risk of a race. We should not be blocking read requests though, so don't. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ef9b6abfc7..1f444ae07f89 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3702,7 +3702,8 @@ static int make_request(struct request_queue *q, struct bio * bi) /* FIXME what if we get a false positive because these * are being updated. */ - if (logical_sector >= mddev->suspend_lo && + if (bio_data_dir(bi) == WRITE && + logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { release_stripe(sh); schedule(); -- cgit v1.2.3 From e62e58a5ffdc98ac28d8dbd070c857620d541f99 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 13:15:35 +1000 Subject: md: use interruptible wait when duration is controlled by userspace. User space can set various limits on an md array so that resync waits when it gets to a certain point, or so that I/O is blocked for a short while. When md is waiting against one of these limit, it should use an interruptible wait so as not to add to the load average, and so are not to trigger a warning if the wait goes on for too long. Signed-off-by: NeilBrown --- drivers/md/md.c | 14 ++++++++++---- drivers/md/raid5.c | 15 +++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 65fe35b5e34a..0f4a70c43ffc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6336,10 +6336,16 @@ void md_do_sync(mddev_t *mddev) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } - if (j >= mddev->resync_max) - wait_event(mddev->recovery_wait, - mddev->resync_max > j - || kthread_should_stop()); + while (j >= mddev->resync_max && !kthread_should_stop()) { + /* As this condition is controlled by user-space, + * we can block indefinitely, so use '_interruptible' + * to avoid triggering warnings. + */ + flush_signals(current); /* just in case */ + wait_event_interruptible(mddev->recovery_wait, + mddev->resync_max > j + || kthread_should_stop()); + } if (kthread_should_stop()) goto interrupted; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1f444ae07f89..37835538b58e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3699,14 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi) goto retry; } } - /* FIXME what if we get a false positive because these - * are being updated. - */ + if (bio_data_dir(bi) == WRITE && logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { release_stripe(sh); - schedule(); + /* As the suspend_* range is controlled by + * userspace, we want an interruptible + * wait. + */ + flush_signals(current); + prepare_to_wait(&conf->wait_for_overlap, + &w, TASK_INTERRUPTIBLE); + if (logical_sector >= mddev->suspend_lo && + logical_sector < mddev->suspend_hi) + schedule(); goto retry; } -- cgit v1.2.3 From 95fc17aac45300f45968aacd97a536ddd8db8101 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 31 Jul 2009 12:39:15 +1000 Subject: md/raid6: release spare page at ->stop() Add missing call to safe_put_page from stop() by unifying open coded raid5_conf_t de-allocation under free_conf(). Cc: Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/raid5.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37835538b58e..39374230a463 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4316,6 +4316,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) return sectors * (raid_disks - conf->max_degraded); } +static void free_conf(raid5_conf_t *conf) +{ + shrink_stripes(conf); + safe_put_page(conf->spare_page); + kfree(conf->disks); + kfree(conf->stripe_hashtbl); + kfree(conf); +} + static raid5_conf_t *setup_conf(mddev_t *mddev) { raid5_conf_t *conf; @@ -4447,11 +4456,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) abort: if (conf) { - shrink_stripes(conf); - safe_put_page(conf->spare_page); - kfree(conf->disks); - kfree(conf->stripe_hashtbl); - kfree(conf); + free_conf(conf); return ERR_PTR(-EIO); } else return ERR_PTR(-ENOMEM); @@ -4629,12 +4634,8 @@ abort: md_unregister_thread(mddev->thread); mddev->thread = NULL; if (conf) { - shrink_stripes(conf); print_raid5_conf(conf); - safe_put_page(conf->spare_page); - kfree(conf->disks); - kfree(conf->stripe_hashtbl); - kfree(conf); + free_conf(conf); } mddev->private = NULL; printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); @@ -4649,13 +4650,10 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; - shrink_stripes(conf); - kfree(conf->stripe_hashtbl); mddev->queue->backing_dev_info.congested_fn = NULL; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); - kfree(conf->disks); - kfree(conf); + free_conf(conf); mddev->private = NULL; return 0; } -- cgit v1.2.3 From e516402c0d4fc02be4af9fa8c18954d4f9deb44e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:57 +1000 Subject: md/raid5: set reshape_position correctly when reshape starts. As the internal reshape_progress counter is the main driver for reshape, the fact that reshape_position sometimes starts with the wrong value has minimal effect. It is visible in sysfs and that is all. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 39374230a463..659151e5eda4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5000,7 +5000,7 @@ static int raid5_start_reshape(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); } mddev->raid_disks = conf->raid_disks; - mddev->reshape_position = 0; + mddev->reshape_position = conf->reshape_progress; set_bit(MD_CHANGE_DEVS, &mddev->flags); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); -- cgit v1.2.3 From 64bd660b51b2da92e99a5e97349f6558349f11c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:58 +1000 Subject: md: allow raid5_quiesce to work properly when reshape is happening. The ->quiesce method is not supposed to stop resync/recovery/reshape, just normal IO. But in raid5 we don't have a way to know which stripes are being used for normal IO and which for resync etc, so we need to wait for all stripes to be idle to be sure that all writes have completed. However reshape keeps at least some stripe busy for an extended period of time, so a call to raid5_quiesce can block for several seconds needlessly. So arrange for reshape etc to pause briefly while raid5_quiesce is trying to quiesce the array so that the active_stripes count can drop to zero. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 659151e5eda4..2dc35b4c20ac 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski return 0; } + /* Allow raid5_quiesce to complete */ + wait_event(conf->wait_for_overlap, conf->quiesce != 2); + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) return reshape_request(mddev, sector_nr, skipped); @@ -5104,12 +5107,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) case 1: /* stop all writes */ spin_lock_irq(&conf->device_lock); - conf->quiesce = 1; + /* '2' tells resync/reshape to pause so that all + * active stripes can drain + */ + conf->quiesce = 2; wait_event_lock_irq(conf->wait_for_stripe, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, conf->device_lock, /* nothing */); + conf->quiesce = 1; spin_unlock_irq(&conf->device_lock); + /* allow reshape to continue */ + wake_up(&conf->wait_for_overlap); break; case 0: /* re-enable writes */ -- cgit v1.2.3 From 449aad3e25358812c43afc60918c5ad3819488e7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:58 +1000 Subject: md: Use revalidate_disk to effect changes in size of device. As revalidate_disk calls check_disk_size_change, it will cause any capacity change of a gendisk to be propagated to the blockdev inode. So use that instead of mucking about with locks and i_size_write. Also add a call to revalidate_disk in do_md_run and a few other places where the gendisk capacity is changed. Signed-off-by: NeilBrown --- drivers/md/linear.c | 1 + drivers/md/md.c | 28 +++++----------------------- drivers/md/raid1.c | 1 + drivers/md/raid5.c | 12 ++---------- 4 files changed, 9 insertions(+), 33 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 54c8677f1e59..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -257,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); call_rcu(&oldconf->rcu, free_conf); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 52c988b072d0..5b98bea4ff9b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3741,17 +3741,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) mddev->array_sectors = sectors; set_capacity(mddev->gendisk, mddev->array_sectors); - if (mddev->pers) { - struct block_device *bdev = bdget_disk(mddev->gendisk, 0); - - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } - } + if (mddev->pers) + revalidate_disk(mddev->gendisk); return len; } @@ -4241,6 +4232,7 @@ static int do_md_run(mddev_t * mddev) md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ + revalidate_disk(mddev->gendisk); mddev->changed = 1; md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); @@ -5139,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) return -ENOSPC; } rv = mddev->pers->resize(mddev, num_sectors); - if (!rv) { - struct block_device *bdev; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } - } + if (!rv) + revalidate_disk(mddev->gendisk); return rv; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 67e794d0097f..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2134,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) return -EINVAL; set_capacity(mddev->gendisk, mddev->array_sectors); mddev->changed = 1; + revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { mddev->recovery_cp = mddev->dev_sectors; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2dc35b4c20ac..2b521ee67dfa 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4858,6 +4858,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return -EINVAL; set_capacity(mddev->gendisk, mddev->array_sectors); mddev->changed = 1; + revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { mddev->recovery_cp = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5058,7 +5059,6 @@ static void end_reshape(raid5_conf_t *conf) */ static void raid5_finish_reshape(mddev_t *mddev) { - struct block_device *bdev; raid5_conf_t *conf = mddev->private; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5067,15 +5067,7 @@ static void raid5_finish_reshape(mddev_t *mddev) md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev->changed = 1; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } + revalidate_disk(mddev->gendisk); } else { int d; mddev->degraded = conf->raid_disks; -- cgit v1.2.3 From 67ac6011db5d2b0c853d573ff474b25c85dfb644 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:06:24 +1000 Subject: md/raid5: allow new reshape modes to be restarted in the middle. md/raid5 doesn't allow a reshape to restart if it involves writing over the same part of disk that it would be reading from. This happens at the beginning of a reshape that increases the number of devices, at the end of a reshape that decreases the number of devices, and continuously for a reshape that does not change the number of devices. The current code is correct for the "increase number of devices" case as the critical section at the start is handled by userspace performing a backup. It does not work for reducing the number of devices, or the no-change case. For 'reducing', we need to invert the test. For no-change we cannot really be sure things will be safe, so simply require the array to be read-only, which is how the user-space code which carefully starts such arrays works. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2b521ee67dfa..b8a22a2205cf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev) (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ - if (here_new >= here_old) { + if (mddev->delta_disks == 0) { + /* We cannot be sure it is safe to start an in-place + * reshape. It is only safe if user-space if monitoring + * and taking constant backups. + * mdadm always starts a situation like this in + * readonly mode so it can take control before + * allowing any writes. So just check for that. + */ + if ((here_new * mddev->new_chunk_sectors != + here_old * mddev->chunk_sectors) || + mddev->ro == 0) { + printk(KERN_ERR "raid5: in-place reshape must be started" + " in read-only mode - aborting\n"); + return -EINVAL; + } + } else if (mddev->delta_disks < 0 + ? (here_new * mddev->new_chunk_sectors <= + here_old * mddev->chunk_sectors) + : (here_new * mddev->new_chunk_sectors >= + here_old * mddev->chunk_sectors)) { /* Reading from the same stripe as writing to - bad */ printk(KERN_ERR "raid5: reshape_position too early for " "auto-recovery - aborting.\n"); -- cgit v1.2.3 From a639755cf885e437b2fe4168d35157fa90d530ab Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:13:00 +1000 Subject: md/raid5: make sure a reshape restarts at the correct address. This "if" don't allow for the possibility that the number of devices doesn't change, and so sector_nr isn't set correctly in that case. So change '>' to '>='. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b8a22a2205cf..94a74cb5cccb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_progress < raid5_size(mddev, 0, 0)) { sector_nr = raid5_size(mddev, 0, 0) - conf->reshape_progress; - } else if (mddev->delta_disks > 0 && + } else if (mddev->delta_disks >= 0 && conf->reshape_progress > 0) sector_nr = conf->reshape_progress; sector_div(sector_nr, new_data_disks); -- cgit v1.2.3 From 1a67dde0abba36421a1257d01ba9de2f6d1c160a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:41:49 +1000 Subject: md/raid5: Properly remove excess drives after shrinking a raid5/6 We were removing the drives, from the array, but not removing symlinks from /sys/.... and not marking the device as having been removed. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 94a74cb5cccb..b8a2c5dc67ba 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5097,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev) mddev->degraded--; for (d = conf->raid_disks ; d < conf->raid_disks - mddev->delta_disks; - d++) - raid5_remove_disk(mddev, d); + d++) { + mdk_rdev_t *rdev = conf->disks[d].rdev; + if (rdev && raid5_remove_disk(mddev, d) == 0) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } } mddev->layout = conf->algorithm; mddev->chunk_sectors = conf->chunk_sectors; -- cgit v1.2.3