Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 58c54fcc authored by NeilBrown's avatar NeilBrown
Browse files

md/raid10: handle further errors during fix_read_error better.



If we find more read/write errors we should record a bad block before
failing the device.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 5e570289
Loading
Loading
Loading
Loading
+44 −15
Original line number Diff line number Diff line
@@ -1749,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
		atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}

static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
			    int sectors, struct page *page, int rw)
{
	sector_t first_bad;
	int bad_sectors;

	if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
	    && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
		return -1;
	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
		/* success */
		return 1;
	if (rw == WRITE)
		set_bit(WriteErrorSeen, &rdev->flags);
	/* need to record an error - either for the block or the device */
	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
		md_error(rdev->mddev, rdev);
	return 0;
}

/*
 * This is a kernel thread which:
 *
@@ -1832,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
		rcu_read_unlock();

		if (!success) {
			/* Cannot read from anywhere -- bye bye array */
			/* Cannot read from anywhere, just mark the block
			 * as bad on the first device to discourage future
			 * reads.
			 */
			int dn = r10_bio->devs[r10_bio->read_slot].devnum;
			md_error(mddev, conf->mirrors[dn].rdev);
			rdev = conf->mirrors[dn].rdev;

			if (!rdev_set_badblocks(
				    rdev,
				    r10_bio->devs[r10_bio->read_slot].addr
				    + sect,
				    s, 0))
				md_error(mddev, rdev);
			break;
		}

@@ -1855,10 +1885,10 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)

			atomic_inc(&rdev->nr_pending);
			rcu_read_unlock();
			if (sync_page_io(rdev,
			if (r10_sync_page_io(rdev,
					     r10_bio->devs[sl].addr +
					     sect,
					 s<<9, conf->tmppage, WRITE, false)
					     s<<9, conf->tmppage, WRITE)
			    == 0) {
				/* Well, this device is dead */
				printk(KERN_NOTICE
@@ -1873,7 +1903,6 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
				       "drive\n",
				       mdname(mddev),
				       bdevname(rdev->bdev, b));
				md_error(mddev, rdev);
			}
			rdev_dec_pending(rdev, mddev);
			rcu_read_lock();
@@ -1893,11 +1922,12 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)

			atomic_inc(&rdev->nr_pending);
			rcu_read_unlock();
			if (sync_page_io(rdev,
			switch (r10_sync_page_io(rdev,
					     r10_bio->devs[sl].addr +
					     sect,
					     s<<9, conf->tmppage,
					 READ, false) == 0) {
						 READ)) {
			case 0:
				/* Well, this device is dead */
				printk(KERN_NOTICE
				       "md/raid10:%s: unable to read back "
@@ -1911,9 +1941,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
				       "drive\n",
				       mdname(mddev),
				       bdevname(rdev->bdev, b));

				md_error(mddev, rdev);
			} else {
				break;
			case 1:
				printk(KERN_INFO
				       "md/raid10:%s: read error corrected"
				       " (%d sectors at %llu on %s)\n",