Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d7bd398e authored by Song Liu's avatar Song Liu Committed by Shaohua Li
Browse files

md/r5cache: handle alloc_page failure



RMW of r5c write back cache uses an extra page to store old data for
prexor. handle_stripe_dirtying() allocates this page by calling
alloc_page(). However, alloc_page() may fail.

To handle alloc_page() failures, this patch adds an extra page to
disk_info. When alloc_page fails, handle_stripe() trys to use these
pages. When these pages are used by other stripe (R5C_EXTRA_PAGE_IN_USE),
the stripe is added to delayed_list.

Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
Reviewed-by: default avatarNeilBrown <neilb@suse.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent 034e33f5
Loading
Loading
Loading
Loading
+26 −1
Original line number Diff line number Diff line
@@ -2326,15 +2326,40 @@ int r5c_try_caching_write(struct r5conf *conf,
 */
void r5c_release_extra_page(struct stripe_head *sh)
{
	struct r5conf *conf = sh->raid_conf;
	int i;
	bool using_disk_info_extra_page;

	using_disk_info_extra_page =
		sh->dev[0].orig_page == conf->disks[0].extra_page;

	for (i = sh->disks; i--; )
		if (sh->dev[i].page != sh->dev[i].orig_page) {
			struct page *p = sh->dev[i].orig_page;

			sh->dev[i].orig_page = sh->dev[i].page;
			if (!using_disk_info_extra_page)
				put_page(p);
		}

	if (using_disk_info_extra_page) {
		clear_bit(R5C_EXTRA_PAGE_IN_USE, &conf->cache_state);
		md_wakeup_thread(conf->mddev->thread);
	}
}

void r5c_use_extra_page(struct stripe_head *sh)
{
	struct r5conf *conf = sh->raid_conf;
	int i;
	struct r5dev *dev;

	for (i = sh->disks; i--; ) {
		dev = &sh->dev[i];
		if (dev->orig_page != dev->page)
			put_page(dev->orig_page);
		dev->orig_page = conf->disks[i].extra_page;
	}
}

/*
+66 −12
Original line number Diff line number Diff line
@@ -876,6 +876,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)

	if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) {
		/* writing out phase */
		if (s->waiting_extra_page)
			return;
		if (r5l_write_stripe(conf->log, sh) == 0)
			return;
	} else {  /* caching phase */
@@ -2007,6 +2009,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
		INIT_LIST_HEAD(&sh->batch_list);
		INIT_LIST_HEAD(&sh->lru);
		INIT_LIST_HEAD(&sh->r5c);
		INIT_LIST_HEAD(&sh->log_list);
		atomic_set(&sh->count, 1);
		sh->log_start = MaxSector;
		for (i = 0; i < disks; i++) {
@@ -2253,10 +2256,24 @@ static int resize_stripes(struct r5conf *conf, int newsize)
	 */
	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
	if (ndisks) {
		for (i=0; i<conf->raid_disks; i++)
		for (i = 0; i < conf->pool_size; i++)
			ndisks[i] = conf->disks[i];

		for (i = conf->pool_size; i < newsize; i++) {
			ndisks[i].extra_page = alloc_page(GFP_NOIO);
			if (!ndisks[i].extra_page)
				err = -ENOMEM;
		}

		if (err) {
			for (i = conf->pool_size; i < newsize; i++)
				if (ndisks[i].extra_page)
					put_page(ndisks[i].extra_page);
			kfree(ndisks);
		} else {
			kfree(conf->disks);
			conf->disks = ndisks;
		}
	} else
		err = -ENOMEM;

@@ -3580,7 +3597,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
		break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
}

static void handle_stripe_dirtying(struct r5conf *conf,
static int handle_stripe_dirtying(struct r5conf *conf,
				  struct stripe_head *sh,
				  struct stripe_head_state *s,
				  int disks)
@@ -3649,12 +3666,32 @@ static void handle_stripe_dirtying(struct r5conf *conf,
			    dev->page == dev->orig_page &&
			    !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) {
				/* alloc page for prexor */
				dev->orig_page = alloc_page(GFP_NOIO);
				struct page *p = alloc_page(GFP_NOIO);

				/* will handle failure in a later patch*/
				BUG_ON(!dev->orig_page);
				if (p) {
					dev->orig_page = p;
					continue;
				}

				/*
				 * alloc_page() failed, try use
				 * disk_info->extra_page
				 */
				if (!test_and_set_bit(R5C_EXTRA_PAGE_IN_USE,
						      &conf->cache_state)) {
					r5c_use_extra_page(sh);
					break;
				}

				/* extra_page in use, add to delayed_list */
				set_bit(STRIPE_DELAYED, &sh->state);
				s->waiting_extra_page = 1;
				return -EAGAIN;
			}
		}

		for (i = disks; i--; ) {
			struct r5dev *dev = &sh->dev[i];
			if ((dev->towrite ||
			     i == sh->pd_idx || i == sh->qd_idx ||
			     test_bit(R5_InJournal, &dev->flags)) &&
@@ -3730,6 +3767,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
	    (s->locked == 0 && (rcw == 0 || rmw == 0) &&
	     !test_bit(STRIPE_BIT_DELAY, &sh->state)))
		schedule_reconstruction(sh, s, rcw == 0, 0);
	return 0;
}

static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
@@ -4545,8 +4583,12 @@ static void handle_stripe(struct stripe_head *sh)
			if (ret == -EAGAIN ||
			    /* stripe under reclaim: !caching && injournal */
			    (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
			     s.injournal > 0))
				handle_stripe_dirtying(conf, sh, &s, disks);
			     s.injournal > 0)) {
				ret = handle_stripe_dirtying(conf, sh, &s,
							     disks);
				if (ret == -EAGAIN)
					goto finish;
			}
		}
	}

@@ -6458,6 +6500,8 @@ static void raid5_free_percpu(struct r5conf *conf)

static void free_conf(struct r5conf *conf)
{
	int i;

	if (conf->log)
		r5l_exit_log(conf->log);
	if (conf->shrinker.nr_deferred)
@@ -6466,6 +6510,9 @@ static void free_conf(struct r5conf *conf)
	free_thread_groups(conf);
	shrink_stripes(conf);
	raid5_free_percpu(conf);
	for (i = 0; i < conf->pool_size; i++)
		if (conf->disks[i].extra_page)
			put_page(conf->disks[i].extra_page);
	kfree(conf->disks);
	kfree(conf->stripe_hashtbl);
	kfree(conf);
@@ -6612,9 +6659,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)

	conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
			      GFP_KERNEL);

	if (!conf->disks)
		goto abort;

	for (i = 0; i < max_disks; i++) {
		conf->disks[i].extra_page = alloc_page(GFP_KERNEL);
		if (!conf->disks[i].extra_page)
			goto abort;
	}

	conf->mddev = mddev;

	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
+6 −0
Original line number Diff line number Diff line
@@ -276,6 +276,7 @@ struct stripe_head_state {
	struct md_rdev *blocked_rdev;
	int handle_bad_blocks;
	int log_failed;
	int waiting_extra_page;
};

/* Flags for struct r5dev.flags */
@@ -439,6 +440,7 @@ enum {

struct disk_info {
	struct md_rdev	*rdev, *replacement;
	struct page	*extra_page; /* extra page to use in prexor */
};

/*
@@ -559,6 +561,9 @@ enum r5_cache_state {
				 * only process stripes that are already
				 * occupying the log
				 */
	R5C_EXTRA_PAGE_IN_USE,	/* a stripe is using disk_info.extra_page
				 * for prexor
				 */
};

struct r5conf {
@@ -765,6 +770,7 @@ extern void
r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
			    struct stripe_head_state *s);
extern void r5c_release_extra_page(struct stripe_head *sh);
extern void r5c_use_extra_page(struct stripe_head *sh);
extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
extern void r5c_handle_cached_data_endio(struct r5conf *conf,
	struct stripe_head *sh, int disks, struct bio_list *return_bi);