Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit af6d7b76 authored by NeilBrown's avatar NeilBrown
Browse files

md/raid1: improve handling of pages allocated for write-behind.



The current handling and freeing of these pages is a bit fragile.
We only keep the list of allocated pages in each bio, so we need to
still have a valid bio when freeing the pages, which is a bit clumsy.

So simply store the allocated page list in the r1_bio so it can easily
be found and freed when we are finished with the r1_bio.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 7ca78d57
Loading
Loading
Loading
Loading
+26 −29
Original line number Diff line number Diff line
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}

static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv,
			      int behind)
static void r1_bio_write_done(r1bio_t *r1_bio)
{
	if (atomic_dec_and_test(&r1_bio->remaining))
	{
		/* it really is the end of this request */
		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
			/* free extra copy of the data pages */
			int i = vcnt;
			int i = r1_bio->behind_page_count;
			while (i--)
				safe_put_page(bv[i].bv_page);
				safe_put_page(r1_bio->behind_pages[i]);
			kfree(r1_bio->behind_pages);
			r1_bio->behind_pages = NULL;
		}
		/* clear the bitmap if all writes complete successfully */
		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
				r1_bio->sectors,
				!test_bit(R1BIO_Degraded, &r1_bio->state),
				behind);
				test_bit(R1BIO_BehindIO, &r1_bio->state));
		md_write_end(r1_bio->mddev);
		raid_end_bio_io(r1_bio);
	}
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
	 * Let's see if all mirrored write operations have finished
	 * already.
	 */
	r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind);
	r1_bio_write_done(r1_bio);

	if (to_put)
		bio_put(to_put);
@@ -660,37 +661,36 @@ static void unfreeze_array(conf_t *conf)


/* duplicate the data pages for behind I/O 
 * We return a list of bio_vec rather than just page pointers
 * as it makes freeing easier
 */
static struct bio_vec *alloc_behind_pages(struct bio *bio)
static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
{
	int i;
	struct bio_vec *bvec;
	struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
	struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
					GFP_NOIO);
	if (unlikely(!pages))
		goto do_sync_io;
		return;

	bio_for_each_segment(bvec, bio, i) {
		pages[i].bv_page = alloc_page(GFP_NOIO);
		if (unlikely(!pages[i].bv_page))
		pages[i] = alloc_page(GFP_NOIO);
		if (unlikely(!pages[i]))
			goto do_sync_io;
		memcpy(kmap(pages[i].bv_page) + bvec->bv_offset,
		memcpy(kmap(pages[i]) + bvec->bv_offset,
			kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
		kunmap(pages[i].bv_page);
		kunmap(pages[i]);
		kunmap(bvec->bv_page);
	}

	return pages;
	r1_bio->behind_pages = pages;
	r1_bio->behind_page_count = bio->bi_vcnt;
	set_bit(R1BIO_BehindIO, &r1_bio->state);
	return;

do_sync_io:
	if (pages)
		for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++)
			put_page(pages[i].bv_page);
	for (i = 0; i < bio->bi_vcnt; i++)
		if (pages[i])
			put_page(pages[i]);
	kfree(pages);
	PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
	return NULL;
}

static int make_request(mddev_t *mddev, struct bio * bio)
@@ -702,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
	int i, targets = 0, disks;
	struct bitmap *bitmap;
	unsigned long flags;
	struct bio_vec *behind_pages = NULL;
	const int rw = bio_data_dir(bio);
	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -855,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
	if (bitmap &&
	    (atomic_read(&bitmap->behind_writes)
	     < mddev->bitmap_info.max_write_behind) &&
	    !waitqueue_active(&bitmap->behind_wait) &&
	    (behind_pages = alloc_behind_pages(bio)) != NULL)
		set_bit(R1BIO_BehindIO, &r1_bio->state);
	    !waitqueue_active(&bitmap->behind_wait))
		alloc_behind_pages(bio, r1_bio);

	atomic_set(&r1_bio->remaining, 1);
	atomic_set(&r1_bio->behind_remaining, 0);
@@ -878,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
		mbio->bi_private = r1_bio;

		if (behind_pages) {
		if (r1_bio->behind_pages) {
			struct bio_vec *bvec;
			int j;

@@ -890,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
			 * them all
			 */
			__bio_for_each_segment(bvec, mbio, j, 0)
				bvec->bv_page = behind_pages[j].bv_page;
				bvec->bv_page = r1_bio->behind_pages[j];
			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
				atomic_inc(&r1_bio->behind_remaining);
		}
@@ -900,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
		bio_list_add(&conf->pending_bio_list, mbio);
		spin_unlock_irqrestore(&conf->device_lock, flags);
	}
	r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
	kfree(behind_pages); /* the behind pages are attached to the bios now */
	r1_bio_write_done(r1_bio);

	/* In case raid1d snuck in to freeze_array */
	wake_up(&conf->wait_barrier);
+3 −1
Original line number Diff line number Diff line
@@ -94,7 +94,9 @@ struct r1bio_s {
	int			read_disk;

	struct list_head	retry_list;
	struct bitmap_update	*bitmap_update;
	/* Next two are only valid when R1BIO_BehindIO is set */
	struct page		**behind_pages;
	int			behind_page_count;
	/*
	 * if the IO is in WRITE direction, then multiple bios are used.
	 * We choose the number when they are allocated.