Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6b740b8d authored by NeilBrown's avatar NeilBrown
Browse files

md/raid1: handle merge_bvec_fn in member devices.



Currently we don't honour merge_bvec_fn in member devices so if there
is one, we force all requests to be single-page at most.
This is not ideal.

So create a raid1 merge_bvec_fn to check that function in children
as well.

This introduces a small problem.  There is no locking around calls
the ->merge_bvec_fn and subsequent calls to ->make_request.  So a
device added between these could end up getting a request which
violates its merge_bvec_fn.

Currently the best we can do is synchronize_sched().  This will work
providing no preemption happens.  If there is is preemption, we just
have to hope that new devices are largely consistent with old devices.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 050b6615
Loading
Loading
Loading
Loading
+56 −21
Original line number Diff line number Diff line
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
		    || rdev == NULL
		    || test_bit(Unmerged, &rdev->flags)
		    || test_bit(Faulty, &rdev->flags))
			continue;
		if (!test_bit(In_sync, &rdev->flags) &&
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
	return best_disk;
}

static int raid1_mergeable_bvec(struct request_queue *q,
				struct bvec_merge_data *bvm,
				struct bio_vec *biovec)
{
	struct mddev *mddev = q->queuedata;
	struct r1conf *conf = mddev->private;
	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
	int max = biovec->bv_len;

	if (mddev->merge_check_needed) {
		int disk;
		rcu_read_lock();
		for (disk = 0; disk < conf->raid_disks * 2; disk++) {
			struct md_rdev *rdev = rcu_dereference(
				conf->mirrors[disk].rdev);
			if (rdev && !test_bit(Faulty, &rdev->flags)) {
				struct request_queue *q =
					bdev_get_queue(rdev->bdev);
				if (q->merge_bvec_fn) {
					bvm->bi_sector = sector +
						rdev->data_offset;
					bvm->bi_bdev = rdev->bdev;
					max = min(max, q->merge_bvec_fn(
							  q, bvm, biovec));
				}
			}
		}
		rcu_read_unlock();
	}
	return max;

}

int md_raid1_congested(struct mddev *mddev, int bits)
{
	struct r1conf *conf = mddev->private;
@@ -1015,7 +1049,8 @@ read_again:
			break;
		}
		r1_bio->bios[i] = NULL;
		if (!rdev || test_bit(Faulty, &rdev->flags)) {
		if (!rdev || test_bit(Faulty, &rdev->flags)
		    || test_bit(Unmerged, &rdev->flags)) {
			if (i < conf->raid_disks)
				set_bit(R1BIO_Degraded, &r1_bio->state);
			continue;
@@ -1335,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
	struct mirror_info *p;
	int first = 0;
	int last = conf->raid_disks - 1;
	struct request_queue *q = bdev_get_queue(rdev->bdev);

	if (mddev->recovery_disabled == conf->recovery_disabled)
		return -EBUSY;
@@ -1342,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
	if (rdev->raid_disk >= 0)
		first = last = rdev->raid_disk;

	if (q->merge_bvec_fn) {
		set_bit(Unmerged, &rdev->flags);
		mddev->merge_check_needed = 1;
	}

	for (mirror = first; mirror <= last; mirror++) {
		p = conf->mirrors+mirror;
		if (!p->rdev) {

			disk_stack_limits(mddev->gendisk, rdev->bdev,
					  rdev->data_offset << 9);
			/* as we don't honour merge_bvec_fn, we must
			 * never risk violating it, so limit
			 * ->max_segments to one lying with a single
			 * page, as a one page request is never in
			 * violation.
			 */
			if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
				blk_queue_max_segments(mddev->queue, 1);
				blk_queue_segment_boundary(mddev->queue,
							   PAGE_CACHE_SIZE - 1);
			}

			p->head_position = 0;
			rdev->raid_disk = mirror;
@@ -1383,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
			break;
		}
	}
	if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
		/* Some requests might not have seen this new
		 * merge_bvec_fn.  We must wait for them to complete
		 * before merging the device fully.
		 * First we make sure any code which has tested
		 * our function has submitted the request, then
		 * we wait for all outstanding requests to complete.
		 */
		synchronize_sched();
		raise_barrier(conf);
		lower_barrier(conf);
		clear_bit(Unmerged, &rdev->flags);
	}
	md_integrity_add_rdev(rdev, mddev);
	print_conf(conf);
	return err;
@@ -2627,15 +2670,6 @@ static int run(struct mddev *mddev)
			continue;
		disk_stack_limits(mddev->gendisk, rdev->bdev,
				  rdev->data_offset << 9);
		/* as we don't honour merge_bvec_fn, we must never risk
		 * violating it, so limit ->max_segments to 1 lying within
		 * a single page, as a one page request is never in violation.
		 */
		if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
			blk_queue_max_segments(mddev->queue, 1);
			blk_queue_segment_boundary(mddev->queue,
						   PAGE_CACHE_SIZE - 1);
		}
	}

	mddev->degraded = 0;
@@ -2669,6 +2703,7 @@ static int run(struct mddev *mddev)
	if (mddev->queue) {
		mddev->queue->backing_dev_info.congested_fn = raid1_congested;
		mddev->queue->backing_dev_info.congested_data = mddev;
		blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
	}
	return md_integrity_register(mddev);
}