From 7dd5e7c3dbe8c4ffb507ddc0ea8fab07c8b11b0b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Feb 2007 20:11:35 -0800 Subject: [PATCH] md: move warning about creating a raid array on partitions of the one device md tries to warn the user if they e.g. create a raid1 using two partitions of the same device, as this does not provide true redundancy. However it also warns if a raid0 is created like this, and there is nothing wrong with that. At the place where the warning is currently printer, we don't necessarily know what level the array will be, so move the warning from the point where the device is added to the point where the array is started. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 63 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 26 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 05febfd9f071..c8c40c361532 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1296,27 +1296,17 @@ static struct super_type super_types[] = { .sync_super = super_1_sync, }, }; - -static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev) -{ - struct list_head *tmp; - mdk_rdev_t *rdev; - - ITERATE_RDEV(mddev,rdev,tmp) - if (rdev->bdev->bd_contains == dev->bdev->bd_contains) - return rdev; - - return NULL; -} static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) { - struct list_head *tmp; - mdk_rdev_t *rdev; + struct list_head *tmp, *tmp2; + mdk_rdev_t *rdev, *rdev2; ITERATE_RDEV(mddev1,rdev,tmp) - if (match_dev_unit(mddev2, rdev)) - return 1; + ITERATE_RDEV(mddev2, rdev2, tmp2) + if (rdev->bdev->bd_contains == + rdev2->bdev->bd_contains) + return 1; return 0; } @@ -1325,8 +1315,7 @@ static LIST_HEAD(pending_raid_disks); static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) { - mdk_rdev_t *same_pdev; - char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; + char b[BDEVNAME_SIZE]; struct kobject *ko; char *s; @@ -1342,14 +1331,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) else mddev->size = rdev->size; } - same_pdev = match_dev_unit(mddev, rdev); - if (same_pdev) - printk(KERN_WARNING - "%s: WARNING: %s appears to be on the same physical" - " disk as %s. True\n protection against single-disk" - " failure might be compromised.\n", - mdname(mddev), bdevname(rdev->bdev,b), - bdevname(same_pdev->bdev,b2)); /* Verify rdev->desc_nr is unique. * If it is -1, assign a free number, else @@ -3109,6 +3090,36 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } + if (pers->sync_request) { + /* Warn if this is a potentially silly + * configuration. + */ + char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; + mdk_rdev_t *rdev2; + struct list_head *tmp2; + int warned = 0; + ITERATE_RDEV(mddev, rdev, tmp) { + ITERATE_RDEV(mddev, rdev2, tmp2) { + if (rdev < rdev2 && + rdev->bdev->bd_contains == + rdev2->bdev->bd_contains) { + printk(KERN_WARNING + "%s: WARNING: %s appears to be" + " on the same physical disk as" + " %s.\n", + mdname(mddev), + bdevname(rdev->bdev,b), + bdevname(rdev2->bdev,b2)); + warned = 1; + } + } + } + if (warned) + printk(KERN_WARNING + "True protection against single-disk" + " failure might be compromised.\n"); + } + mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ mddev->barriers_work = 1; -- cgit v1.2.3 From d1b5380c7f794da16e815c34e54ee7641db8a288 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Feb 2007 20:11:42 -0800 Subject: [PATCH] md: clean out unplug and other queue function on md shutdown The mddev and queue might be used for another array which does not set these, so they need to be cleared. Signed-off-by: NeilBrown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index c8c40c361532..b5744b1bd2ba 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3322,6 +3322,9 @@ static int do_md_stop(mddev_t * mddev, int mode) set_disk_ro(disk, 0); blk_queue_make_request(mddev->queue, md_fail_request); mddev->pers->stop(mddev); + mddev->queue->merge_bvec_fn = NULL; + mddev->queue->unplug_fn = NULL; + mddev->queue->issue_flush_fn = NULL; if (mddev->pers->sync_request) sysfs_remove_group(&mddev->kobj, &md_redundancy_group); -- cgit v1.2.3 From b4c4c7b8095298ff4ce20b40bf180ada070812d0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Feb 2007 20:11:48 -0800 Subject: [PATCH] md: restart a (raid5) reshape that has been aborted due to a read/write error An error always aborts any resync/recovery/reshape on the understanding that it will immediately be restarted if that still makes sense. However a reshape currently doesn't get restarted. With this patch it does. To avoid restarting when it is not possible to do work, we call into the personality to check that a reshape is ok, and strengthen raid5_check_reshape to fail if there are too many failed devices. We also break some code out into a separate function: remove_and_add_spares as the indent level for that code was getting crazy. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 74 +++++++++++++++++++++++++++++++++--------------------- drivers/md/raid5.c | 2 ++ 2 files changed, 47 insertions(+), 29 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index b5744b1bd2ba..6c06e825cff5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5357,6 +5357,44 @@ void md_do_sync(mddev_t *mddev) EXPORT_SYMBOL_GPL(md_do_sync); +static int remove_and_add_spares(mddev_t *mddev) +{ + mdk_rdev_t *rdev; + struct list_head *rtmp; + int spares = 0; + + ITERATE_RDEV(mddev,rdev,rtmp) + if (rdev->raid_disk >= 0 && + (test_bit(Faulty, &rdev->flags) || + ! test_bit(In_sync, &rdev->flags)) && + atomic_read(&rdev->nr_pending)==0) { + if (mddev->pers->hot_remove_disk( + mddev, rdev->raid_disk)==0) { + char nm[20]; + sprintf(nm,"rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } + + if (mddev->degraded) { + ITERATE_RDEV(mddev,rdev,rtmp) + if (rdev->raid_disk < 0 + && !test_bit(Faulty, &rdev->flags)) { + rdev->recovery_offset = 0; + if (mddev->pers->hot_add_disk(mddev,rdev)) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_create_link(&mddev->kobj, + &rdev->kobj, nm); + spares++; + md_new_event(mddev); + } else + break; + } + } + return spares; +} /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -5411,7 +5449,7 @@ void md_check_recovery(mddev_t *mddev) return; if (mddev_trylock(mddev)) { - int spares =0; + int spares = 0; spin_lock_irq(&mddev->write_lock); if (mddev->safemode && !atomic_read(&mddev->writes_pending) && @@ -5474,35 +5512,13 @@ void md_check_recovery(mddev_t *mddev) * Spare are also removed and re-added, to allow * the personality to fail the re-add. */ - ITERATE_RDEV(mddev,rdev,rtmp) - if (rdev->raid_disk >= 0 && - (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && - atomic_read(&rdev->nr_pending)==0) { - if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) { - char nm[20]; - sprintf(nm,"rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - rdev->raid_disk = -1; - } - } - - if (mddev->degraded) { - ITERATE_RDEV(mddev,rdev,rtmp) - if (rdev->raid_disk < 0 - && !test_bit(Faulty, &rdev->flags)) { - rdev->recovery_offset = 0; - if (mddev->pers->hot_add_disk(mddev,rdev)) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); - spares++; - md_new_event(mddev); - } else - break; - } - } - if (spares) { + if (mddev->reshape_position != MaxSector) { + if (mddev->pers->check_reshape(mddev) != 0) + /* Cannot proceed */ + goto unlock; + set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); + } else if ((spares = remove_and_add_spares(mddev))) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); } else if (mddev->recovery_cp < MaxSector) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 11c3d7bfa797..29fc06b47d4e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3814,6 +3814,8 @@ static int raid5_check_reshape(mddev_t *mddev) if (err) return err; + if (mddev->degraded > conf->max_degraded) + return -EINVAL; /* looks like we might be able to manage this */ return 0; } -- cgit v1.2.3 From 041ae52e265fc432ea5525b1c66720385c2d11f0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 26 Mar 2007 21:32:14 -0800 Subject: [PATCH] md: clear the congested_fn when stopping a raid5 If this mddev and queue got reused for another array that doesn't register a congested_fn, this function would get called incorretly. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 + drivers/md/raid5.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 6c06e825cff5..a9852dbdfd66 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3325,6 +3325,7 @@ static int do_md_stop(mddev_t * mddev, int mode) mddev->queue->merge_bvec_fn = NULL; mddev->queue->unplug_fn = NULL; mddev->queue->issue_flush_fn = NULL; + mddev->queue->backing_dev_info.congested_fn = NULL; if (mddev->pers->sync_request) sysfs_remove_group(&mddev->kobj, &md_redundancy_group); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ae30e87bf505..4dd252864f52 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3646,8 +3646,8 @@ static int run(mddev_t *mddev) mddev->queue->unplug_fn = raid5_unplug_device; mddev->queue->issue_flush_fn = raid5_issue_flush; - mddev->queue->backing_dev_info.congested_fn = raid5_congested; mddev->queue->backing_dev_info.congested_data = mddev; + mddev->queue->backing_dev_info.congested_fn = raid5_congested; mddev->array_size = mddev->size * (conf->previous_raid_disks - conf->max_degraded); @@ -3678,6 +3678,7 @@ static int stop(mddev_t *mddev) mddev->thread = NULL; shrink_stripes(conf); kfree(conf->stripe_hashtbl); + mddev->queue->backing_dev_info.congested_fn = NULL; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); kfree(conf->disks); -- cgit v1.2.3 From 5e55e2f5fc95b355d8aa649f346cff69904c8ade Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 26 Mar 2007 21:32:14 -0800 Subject: [PATCH] md: convert compile time warnings into runtime warnings ... still not sure why we need this .... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 41 +++++++++++++++++++++++++++++++---------- drivers/md/raid5.c | 12 ++++++++++-- 2 files changed, 41 insertions(+), 12 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index a9852dbdfd66..2a9b6a07e3a2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1318,6 +1318,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) char b[BDEVNAME_SIZE]; struct kobject *ko; char *s; + int err; if (rdev->mddev) { MD_BUG(); @@ -1352,20 +1353,29 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL) *s = '!'; - list_add(&rdev->same_set, &mddev->disks); rdev->mddev = mddev; printk(KERN_INFO "md: bind<%s>\n", b); rdev->kobj.parent = &mddev->kobj; - kobject_add(&rdev->kobj); + if ((err = kobject_add(&rdev->kobj))) + goto fail; if (rdev->bdev->bd_part) ko = &rdev->bdev->bd_part->kobj; else ko = &rdev->bdev->bd_disk->kobj; - sysfs_create_link(&rdev->kobj, ko, "block"); + if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { + kobject_del(&rdev->kobj); + goto fail; + } + list_add(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); return 0; + + fail: + printk(KERN_WARNING "md: failed to register dev-%s for %s\n", + b, mdname(mddev)); + return err; } static void unbind_rdev_from_array(mdk_rdev_t * rdev) @@ -2966,7 +2976,9 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) mddev->kobj.k_name = NULL; snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); mddev->kobj.ktype = &md_ktype; - kobject_register(&mddev->kobj); + if (kobject_register(&mddev->kobj)) + printk(KERN_WARNING "md: cannot register %s/md - name in use\n", + disk->disk_name); return NULL; } @@ -3144,9 +3156,12 @@ static int do_md_run(mddev_t * mddev) bitmap_destroy(mddev); return err; } - if (mddev->pers->sync_request) - sysfs_create_group(&mddev->kobj, &md_redundancy_group); - else if (mddev->ro == 2) /* auto-readonly not meaningful */ + if (mddev->pers->sync_request) { + if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) + printk(KERN_WARNING + "md: cannot register extra attributes for %s\n", + mdname(mddev)); + } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; atomic_set(&mddev->writes_pending,0); @@ -3160,7 +3175,9 @@ static int do_md_run(mddev_t * mddev) if (rdev->raid_disk >= 0) { char nm[20]; sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); + if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) + printk("md: cannot register %s for %s\n", + nm, mdname(mddev)); } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5386,8 +5403,12 @@ static int remove_and_add_spares(mddev_t *mddev) if (mddev->pers->hot_add_disk(mddev,rdev)) { char nm[20]; sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm); + if (sysfs_create_link(&mddev->kobj, + &rdev->kobj, nm)) + printk(KERN_WARNING + "md: cannot register " + "%s for %s\n", + nm, mdname(mddev)); spares++; md_new_event(mddev); } else diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4dd252864f52..8d59914f2057 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3642,7 +3642,10 @@ static int run(mddev_t *mddev) } /* Ok, everything is just fine now */ - sysfs_create_group(&mddev->kobj, &raid5_attrs_group); + if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) + printk(KERN_WARNING + "raid5: failed to create sysfs attributes for %s\n", + mdname(mddev)); mddev->queue->unplug_fn = raid5_unplug_device; mddev->queue->issue_flush_fn = raid5_issue_flush; @@ -3951,7 +3954,12 @@ static int raid5_start_reshape(mddev_t *mddev) added_devices++; rdev->recovery_offset = 0; sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); + if (sysfs_create_link(&mddev->kobj, + &rdev->kobj, nm)) + printk(KERN_WARNING + "raid5: failed to create " + " link %s for %s\n", + nm, mdname(mddev)); } else break; } -- cgit v1.2.3 From 5792a2856a63cdc568e08a7d6f9b2413d9217b3e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 4 Apr 2007 19:08:18 -0700 Subject: [PATCH] md: avoid a deadlock when removing a device from an md array via sysfs A device can be removed from an md array via e.g. echo remove > /sys/block/md3/md/dev-sde/state This will try to remove the 'dev-sde' subtree which will deadlock since commit e7b0d26a86943370c04d6833c6edba2a72a6e240 With this patch we run the kobject_del via schedule_work so as to avoid the deadlock. Cc: Alan Stern Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 16 +++++++++++++++- include/linux/raid/md_k.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 2a9b6a07e3a2..509171ca7fa8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1378,6 +1378,12 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) return err; } +static void delayed_delete(struct work_struct *ws) +{ + mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); + kobject_del(&rdev->kobj); +} + static void unbind_rdev_from_array(mdk_rdev_t * rdev) { char b[BDEVNAME_SIZE]; @@ -1390,7 +1396,12 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); - kobject_del(&rdev->kobj); + + /* We need to delay this, otherwise we can deadlock when + * writing to 'remove' to "dev/state" + */ + INIT_WORK(&rdev->del_work, delayed_delete); + schedule_work(&rdev->del_work); } /* @@ -3389,6 +3400,9 @@ static int do_md_stop(mddev_t * mddev, int mode) sysfs_remove_link(&mddev->kobj, nm); } + /* make sure all delayed_delete calls have finished */ + flush_scheduled_work(); + export_array(mddev); mddev->array_size = 0; diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8245c282168b..de72c49747c8 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -104,6 +104,7 @@ struct mdk_rdev_s * for reporting to userspace and storing * in superblock. */ + struct work_struct del_work; /* used for delayed sysfs removal */ }; struct mddev_s -- cgit v1.2.3