Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 76073054 authored by NeilBrown's avatar NeilBrown
Browse files

md/raid1: clean up read_balance.



read_balance has two loops which both look for a 'best'
device based on slightly different criteria.
This is clumsy and makes is hard to add extra criteria.

So replace it all with a single loop that combines everything.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 56d99121
Loading
Loading
Loading
Loading
+34 −49
Original line number Original line Diff line number Diff line
@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
{
	const sector_t this_sector = r1_bio->sector;
	const sector_t this_sector = r1_bio->sector;
	const int sectors = r1_bio->sectors;
	const int sectors = r1_bio->sectors;
	int new_disk = -1;
	int start_disk;
	int start_disk;
	int best_disk;
	int i;
	int i;
	sector_t new_distance, current_distance;
	sector_t best_dist;
	mdk_rdev_t *rdev;
	mdk_rdev_t *rdev;
	int choose_first;
	int choose_first;


@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
	 * We take the first readable disk when above the resync window.
	 * We take the first readable disk when above the resync window.
	 */
	 */
 retry:
 retry:
	best_disk = -1;
	best_dist = MaxSector;
	if (conf->mddev->recovery_cp < MaxSector &&
	if (conf->mddev->recovery_cp < MaxSector &&
	    (this_sector + sectors >= conf->next_resync)) {
	    (this_sector + sectors >= conf->next_resync)) {
		choose_first = 1;
		choose_first = 1;
@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
		start_disk = conf->last_used;
		start_disk = conf->last_used;
	}
	}


	/* make sure the disk is operational */
	for (i = 0 ; i < conf->raid_disks ; i++) {
	for (i = 0 ; i < conf->raid_disks ; i++) {
		sector_t dist;
		int disk = start_disk + i;
		int disk = start_disk + i;
		if (disk >= conf->raid_disks)
		if (disk >= conf->raid_disks)
			disk -= conf->raid_disks;
			disk -= conf->raid_disks;
@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
		if (r1_bio->bios[disk] == IO_BLOCKED
		    || rdev == NULL
		    || rdev == NULL
		    || !test_bit(In_sync, &rdev->flags))
		    || test_bit(Faulty, &rdev->flags))
			continue;
		if (!test_bit(In_sync, &rdev->flags) &&
		    rdev->recovery_offset < this_sector + sectors)
			continue;
		if (test_bit(WriteMostly, &rdev->flags)) {
			/* Don't balance among write-mostly, just
			 * use the first as a last resort */
			if (best_disk < 0)
				best_disk = disk;
			continue;
			continue;

		new_disk = disk;
		if (!test_bit(WriteMostly, &rdev->flags))
			break;
		}
		}

		/* This is a reasonable device to use.  It might
	if (new_disk < 0 || choose_first)
		 * even be best.
		goto rb_out;

	/*
	 * Don't change to another disk for sequential reads:
		 */
		 */
	if (conf->next_seq_sect == this_sector)
		dist = abs(this_sector - conf->mirrors[disk].head_position);
		goto rb_out;
		if (choose_first
	if (this_sector == conf->mirrors[new_disk].head_position)
		    /* Don't change to another disk for sequential reads */
		goto rb_out;
		    || conf->next_seq_sect == this_sector

		    || dist == 0
	current_distance = abs(this_sector 
		    /* If device is idle, use it */
			       - conf->mirrors[new_disk].head_position);
		    || atomic_read(&rdev->nr_pending) == 0) {

			best_disk = disk;
	/* look for a better disk - i.e. head is closer */
	start_disk = new_disk;
	for (i = 1; i < conf->raid_disks; i++) {
		int disk = start_disk + 1;
		if (disk >= conf->raid_disks)
			disk -= conf->raid_disks;

		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
		    || rdev == NULL
		    || !test_bit(In_sync, &rdev->flags)
		    || test_bit(WriteMostly, &rdev->flags))
			continue;

		if (!atomic_read(&rdev->nr_pending)) {
			new_disk = disk;
			break;
			break;
		}
		}
		new_distance = abs(this_sector - conf->mirrors[disk].head_position);
		if (dist < best_dist) {
		if (new_distance < current_distance) {
			best_dist = dist;
			current_distance = new_distance;
			best_disk = disk;
			new_disk = disk;
		}
		}
	}
	}


 rb_out:
	if (best_disk >= 0) {
	if (new_disk >= 0) {
		rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
		rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
		if (!rdev)
		if (!rdev)
			goto retry;
			goto retry;
		atomic_inc(&rdev->nr_pending);
		atomic_inc(&rdev->nr_pending);
		if (!test_bit(In_sync, &rdev->flags)) {
		if (test_bit(Faulty, &rdev->flags)) {
			/* cannot risk returning a device that failed
			/* cannot risk returning a device that failed
			 * before we inc'ed nr_pending
			 * before we inc'ed nr_pending
			 */
			 */
@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
			goto retry;
			goto retry;
		}
		}
		conf->next_seq_sect = this_sector + sectors;
		conf->next_seq_sect = this_sector + sectors;
		conf->last_used = new_disk;
		conf->last_used = best_disk;
	}
	}
	rcu_read_unlock();
	rcu_read_unlock();


	return new_disk;
	return best_disk;
}
}


static int raid1_congested(void *data, int bits)
static int raid1_congested(void *data, int bits)