Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b4c4c7b8 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds
Browse files

[PATCH] md: restart a (raid5) reshape that has been aborted due to a read/write error



An error always aborts any resync/recovery/reshape on the understanding that
it will immediately be restarted if that still makes sense.  However a reshape
currently doesn't get restarted.  With this patch it does.

To avoid restarting when it is not possible to do work, we call into the
personality to check that a reshape is ok, and strengthen raid5_check_reshape
to fail if there are too many failed devices.

We also break some code out into a separate function: remove_and_add_spares as
the indent level for that code was getting crazy.

Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d1b5380c
Loading
Loading
Loading
Loading
+45 −29
Original line number Diff line number Diff line
@@ -5357,6 +5357,44 @@ void md_do_sync(mddev_t *mddev)
EXPORT_SYMBOL_GPL(md_do_sync);


static int remove_and_add_spares(mddev_t *mddev)
{
	mdk_rdev_t *rdev;
	struct list_head *rtmp;
	int spares = 0;

	ITERATE_RDEV(mddev,rdev,rtmp)
		if (rdev->raid_disk >= 0 &&
		    (test_bit(Faulty, &rdev->flags) ||
		     ! test_bit(In_sync, &rdev->flags)) &&
		    atomic_read(&rdev->nr_pending)==0) {
			if (mddev->pers->hot_remove_disk(
				    mddev, rdev->raid_disk)==0) {
				char nm[20];
				sprintf(nm,"rd%d", rdev->raid_disk);
				sysfs_remove_link(&mddev->kobj, nm);
				rdev->raid_disk = -1;
			}
		}

	if (mddev->degraded) {
		ITERATE_RDEV(mddev,rdev,rtmp)
			if (rdev->raid_disk < 0
			    && !test_bit(Faulty, &rdev->flags)) {
				rdev->recovery_offset = 0;
				if (mddev->pers->hot_add_disk(mddev,rdev)) {
					char nm[20];
					sprintf(nm, "rd%d", rdev->raid_disk);
					sysfs_create_link(&mddev->kobj,
							  &rdev->kobj, nm);
					spares++;
					md_new_event(mddev);
				} else
					break;
			}
	}
	return spares;
}
/*
 * This routine is regularly called by all per-raid-array threads to
 * deal with generic issues like resync and super-block update.
@@ -5474,35 +5512,13 @@ void md_check_recovery(mddev_t *mddev)
		 * Spare are also removed and re-added, to allow
		 * the personality to fail the re-add.
		 */
		ITERATE_RDEV(mddev,rdev,rtmp)
			if (rdev->raid_disk >= 0 &&
			    (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) &&
			    atomic_read(&rdev->nr_pending)==0) {
				if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
					char nm[20];
					sprintf(nm,"rd%d", rdev->raid_disk);
					sysfs_remove_link(&mddev->kobj, nm);
					rdev->raid_disk = -1;
				}
			}

		if (mddev->degraded) {
			ITERATE_RDEV(mddev,rdev,rtmp)
				if (rdev->raid_disk < 0
				    && !test_bit(Faulty, &rdev->flags)) {
					rdev->recovery_offset = 0;
					if (mddev->pers->hot_add_disk(mddev,rdev)) {
						char nm[20];
						sprintf(nm, "rd%d", rdev->raid_disk);
						sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
						spares++;
						md_new_event(mddev);
					} else
						break;
				}
		}

		if (spares) {
		if (mddev->reshape_position != MaxSector) {
			if (mddev->pers->check_reshape(mddev) != 0)
				/* Cannot proceed */
				goto unlock;
			set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
		} else if ((spares = remove_and_add_spares(mddev))) {
			clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
			clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
		} else if (mddev->recovery_cp < MaxSector) {
+2 −0
Original line number Diff line number Diff line
@@ -3814,6 +3814,8 @@ static int raid5_check_reshape(mddev_t *mddev)
	if (err)
		return err;

	if (mddev->degraded > conf->max_degraded)
		return -EINVAL;
	/* looks like we might be able to manage this */
	return 0;
}