Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d8f05d29 authored by NeilBrown's avatar NeilBrown
Browse files

md/raid1: record badblocks found during resync etc.



If we find a bad block while writing as part of resync/recovery we
need to report that back to raid1d which must record the bad block,
or fail the device.

Similarly when fixing a read error, a further error should just
record a bad block if possible rather than failing the device.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
Reviewed-by: default avatarNamhyung Kim <namhyung@gmail.com>
parent cd5ff9a1
Loading
Loading
Loading
Loading
+51 −30
Original line number Diff line number Diff line
@@ -1386,7 +1386,9 @@ static void end_sync_write(struct bio *bio, int error)
			s += sync_blocks;
			sectors_to_go -= sync_blocks;
		} while (sectors_to_go > 0);
		md_error(mddev, conf->mirrors[mirror].rdev);
		set_bit(WriteErrorSeen,
			&conf->mirrors[mirror].rdev->flags);
		set_bit(R1BIO_WriteError, &r1_bio->state);
	} else if (is_badblock(conf->mirrors[mirror].rdev,
			       r1_bio->sector,
			       r1_bio->sectors,
@@ -1397,7 +1399,8 @@ static void end_sync_write(struct bio *bio, int error)

	if (atomic_dec_and_test(&r1_bio->remaining)) {
		int s = r1_bio->sectors;
		if (test_bit(R1BIO_MadeGood, &r1_bio->state))
		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
		    test_bit(R1BIO_WriteError, &r1_bio->state))
			reschedule_retry(r1_bio);
		else {
			put_buf(r1_bio);
@@ -1406,6 +1409,20 @@ static void end_sync_write(struct bio *bio, int error)
	}
}

static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
			    int sectors, struct page *page, int rw)
{
	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
		/* success */
		return 1;
	if (rw == WRITE)
		set_bit(WriteErrorSeen, &rdev->flags);
	/* need to record an error - either for the block or the device */
	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
		md_error(rdev->mddev, rdev);
	return 0;
}

static int fix_sync_read_error(r1bio_t *r1_bio)
{
	/* Try some synchronous reads of other devices to get
@@ -1477,12 +1494,11 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
				continue;
			rdev = conf->mirrors[d].rdev;
			if (sync_page_io(rdev, sect, s<<9,
			if (r1_sync_page_io(rdev, sect, s,
					    bio->bi_io_vec[idx].bv_page,
					 WRITE, false) == 0) {
					    WRITE) == 0) {
				r1_bio->bios[d]->bi_end_io = NULL;
				rdev_dec_pending(rdev, mddev);
				md_error(mddev, rdev);
			}
		}
		d = start;
@@ -1493,11 +1509,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
				continue;
			rdev = conf->mirrors[d].rdev;
			if (sync_page_io(rdev, sect, s<<9,
			if (r1_sync_page_io(rdev, sect, s,
					    bio->bi_io_vec[idx].bv_page,
					 READ, false) == 0)
				md_error(mddev, rdev);
			else
					    READ) != 0)
				atomic_add(s, &rdev->corrected_errors);
		}
		sectors -= s;
@@ -1682,8 +1696,10 @@ static void fix_read_error(conf_t *conf, int read_disk,
		} while (!success && d != read_disk);

		if (!success) {
			/* Cannot read from anywhere -- bye bye array */
			md_error(mddev, conf->mirrors[read_disk].rdev);
			/* Cannot read from anywhere - mark it bad */
			mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
			if (!rdev_set_badblocks(rdev, sect, s, 0))
				md_error(mddev, rdev);
			break;
		}
		/* write it back and re-read */
@@ -1694,13 +1710,9 @@ static void fix_read_error(conf_t *conf, int read_disk,
			d--;
			rdev = conf->mirrors[d].rdev;
			if (rdev &&
			    test_bit(In_sync, &rdev->flags)) {
				if (sync_page_io(rdev, sect, s<<9,
						 conf->tmppage, WRITE, false)
				    == 0)
					/* Well, this device is dead */
					md_error(mddev, rdev);
			}
			    test_bit(In_sync, &rdev->flags))
				r1_sync_page_io(rdev, sect, s,
						conf->tmppage, WRITE);
		}
		d = start;
		while (d != read_disk) {
@@ -1711,12 +1723,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
			rdev = conf->mirrors[d].rdev;
			if (rdev &&
			    test_bit(In_sync, &rdev->flags)) {
				if (sync_page_io(rdev, sect, s<<9,
						 conf->tmppage, READ, false)
				    == 0)
					/* Well, this device is dead */
					md_error(mddev, rdev);
				else {
				if (r1_sync_page_io(rdev, sect, s,
						    conf->tmppage, READ)) {
					atomic_add(s, &rdev->corrected_errors);
					printk(KERN_INFO
					       "md/raid1:%s: read error corrected "
@@ -1860,20 +1868,33 @@ static void raid1d(mddev_t *mddev)
		mddev = r1_bio->mddev;
		conf = mddev->private;
		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
			if (test_bit(R1BIO_MadeGood, &r1_bio->state)) {
			if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
			    test_bit(R1BIO_WriteError, &r1_bio->state)) {
				int m;
				int s = r1_bio->sectors;
				for (m = 0; m < conf->raid_disks ; m++) {
					mdk_rdev_t *rdev
						= conf->mirrors[m].rdev;
					struct bio *bio = r1_bio->bios[m];
					if (bio->bi_end_io != NULL &&
					    test_bit(BIO_UPTODATE,
					if (bio->bi_end_io == NULL)
						continue;
					if (test_bit(BIO_UPTODATE,
						     &bio->bi_flags)) {
						rdev = conf->mirrors[m].rdev;
						rdev_clear_badblocks(
							rdev,
							r1_bio->sector,
							r1_bio->sectors);
					}
					if (!test_bit(BIO_UPTODATE,
						      &bio->bi_flags) &&
					    test_bit(R1BIO_WriteError,
						     &r1_bio->state)) {
						if (!rdev_set_badblocks(
							    rdev,
							    r1_bio->sector,
							    r1_bio->sectors, 0))
							md_error(mddev, rdev);
					}
				}
				put_buf(r1_bio);
				md_done_sync(mddev, s, 1);