Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 24afd80d authored by NeilBrown's avatar NeilBrown
Browse files

md/raid10: handle recovery of replacement devices.



If there is a replacement device, then recover to it,
reading from any drives - maybe the one being replaced, maybe not.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 9ad1aefc
Loading
Loading
Loading
Loading
+80 −30
Original line number Diff line number Diff line
@@ -1843,7 +1843,7 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
{
	struct r10conf *conf = mddev->private;
	int d;
	struct bio *wbio;
	struct bio *wbio, *wbio2;

	if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
		fix_recovery_read_error(r10_bio);
@@ -1855,13 +1855,21 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
	 * share the pages with the first bio
	 * and submit the write request
	 */
	wbio = r10_bio->devs[1].bio;
	d = r10_bio->devs[1].devnum;

	wbio = r10_bio->devs[1].bio;
	wbio2 = r10_bio->devs[1].repl_bio;
	if (wbio->bi_end_io) {
		atomic_inc(&conf->mirrors[d].rdev->nr_pending);
		md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
		generic_make_request(wbio);
	}
	if (wbio2 && wbio2->bi_end_io) {
		atomic_inc(&conf->mirrors[d].replacement->nr_pending);
		md_sync_acct(conf->mirrors[d].replacement->bdev,
			     wbio2->bi_size >> 9);
		generic_make_request(wbio2);
	}
}


/*
@@ -2590,23 +2598,30 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
			sector_t sect;
			int must_sync;
			int any_working;
			struct mirror_info *mirror = &conf->mirrors[i];

			if (conf->mirrors[i].rdev == NULL ||
			    test_bit(In_sync, &conf->mirrors[i].rdev->flags)) 
			if ((mirror->rdev == NULL ||
			     test_bit(In_sync, &mirror->rdev->flags))
			    &&
			    (mirror->replacement == NULL ||
			     test_bit(Faulty,
				      &mirror->replacement->flags)))
				continue;

			still_degraded = 0;
			/* want to reconstruct this device */
			rb2 = r10_bio;
			sect = raid10_find_virt(conf, sector_nr, i);
			/* Unless we are doing a full sync, we only need
			 * to recover the block if it is set in the bitmap
			/* Unless we are doing a full sync, or a replacement
			 * we only need to recover the block if it is set in
			 * the bitmap
			 */
			must_sync = bitmap_start_sync(mddev->bitmap, sect,
						      &sync_blocks, 1);
			if (sync_blocks < max_sync)
				max_sync = sync_blocks;
			if (!must_sync &&
			    mirror->replacement == NULL &&
			    !conf->fullsync) {
				/* yep, skip the sync_blocks here, but don't assume
				 * that there will never be anything to do here
@@ -2676,33 +2691,60 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
				bio->bi_end_io = end_sync_read;
				bio->bi_rw = READ;
				from_addr = r10_bio->devs[j].addr;
				bio->bi_sector = from_addr +
					conf->mirrors[d].rdev->data_offset;
				bio->bi_bdev = conf->mirrors[d].rdev->bdev;
				atomic_inc(&conf->mirrors[d].rdev->nr_pending);
				atomic_inc(&r10_bio->remaining);
				/* and we write to 'i' */
				bio->bi_sector = from_addr + rdev->data_offset;
				bio->bi_bdev = rdev->bdev;
				atomic_inc(&rdev->nr_pending);
				/* and we write to 'i' (if not in_sync) */

				for (k=0; k<conf->copies; k++)
					if (r10_bio->devs[k].devnum == i)
						break;
				BUG_ON(k == conf->copies);
				to_addr = r10_bio->devs[k].addr;
				r10_bio->devs[0].devnum = d;
				r10_bio->devs[0].addr = from_addr;
				r10_bio->devs[1].devnum = i;
				r10_bio->devs[1].addr = to_addr;

				rdev = mirror->rdev;
				if (!test_bit(In_sync, &rdev->flags)) {
					bio = r10_bio->devs[1].bio;
					bio->bi_next = biolist;
					biolist = bio;
					bio->bi_private = r10_bio;
					bio->bi_end_io = end_sync_write;
					bio->bi_rw = WRITE;
				to_addr = r10_bio->devs[k].addr;
				bio->bi_sector = to_addr +
					conf->mirrors[i].rdev->data_offset;
				bio->bi_bdev = conf->mirrors[i].rdev->bdev;

				r10_bio->devs[0].devnum = d;
				r10_bio->devs[0].addr = from_addr;
				r10_bio->devs[1].devnum = i;
				r10_bio->devs[1].addr = to_addr;
					bio->bi_sector = to_addr
						+ rdev->data_offset;
					bio->bi_bdev = rdev->bdev;
					atomic_inc(&r10_bio->remaining);
				} else
					r10_bio->devs[1].bio->bi_end_io = NULL;

				/* and maybe write to replacement */
				bio = r10_bio->devs[1].repl_bio;
				if (bio)
					bio->bi_end_io = NULL;
				rdev = mirror->replacement;
				/* Note: if rdev != NULL, then bio
				 * cannot be NULL as r10buf_pool_alloc will
				 * have allocated it.
				 * So the second test here is pointless.
				 * But it keeps semantic-checkers happy, and
				 * this comment keeps human reviewers
				 * happy.
				 */
				if (rdev == NULL || bio == NULL ||
				    test_bit(Faulty, &rdev->flags))
					break;
				bio->bi_next = biolist;
				biolist = bio;
				bio->bi_private = r10_bio;
				bio->bi_end_io = end_sync_write;
				bio->bi_rw = WRITE;
				bio->bi_sector = to_addr + rdev->data_offset;
				bio->bi_bdev = rdev->bdev;
				atomic_inc(&r10_bio->remaining);
				break;
			}
			if (j == conf->copies) {
@@ -2720,8 +2762,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
					for (k = 0; k < conf->copies; k++)
						if (r10_bio->devs[k].devnum == i)
							break;
					if (!rdev_set_badblocks(
						    conf->mirrors[i].rdev,
					if (!test_bit(In_sync,
						      &mirror->rdev->flags)
					    && !rdev_set_badblocks(
						    mirror->rdev,
						    r10_bio->devs[k].addr,
						    max_sync, 0))
						any_working = 0;
					if (mirror->replacement &&
					    !rdev_set_badblocks(
						    mirror->replacement,
						    r10_bio->devs[k].addr,
						    max_sync, 0))
						any_working = 0;
@@ -2732,7 +2782,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
						printk(KERN_INFO "md/raid10:%s: insufficient "
						       "working devices for recovery.\n",
						       mdname(mddev));
					conf->mirrors[i].recovery_disabled
					mirror->recovery_disabled
						= mddev->recovery_disabled;
				}
				break;