Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 48b8d208 authored by Hans Holmberg's avatar Hans Holmberg Committed by Jens Axboe
Browse files

lightnvm: pblk: garbage collect lines with failed writes



Write failures should not happen under normal circumstances,
so in order to bring the chunk back into a known state as soon
as possible, evacuate all the valid data out of the line and let the
fw judge if the block can be written to in the next reset cycle.

Do this by introducing a new gc list for lines with failed writes,
and ensure that the rate limiter allocates a small portion of
the write bandwidth to get the job done.

The lba list is saved in memory for use during gc as we
cannot gurantee that the emeta data is readable if a write
error occurred.

Signed-off-by: default avatarHans Holmberg <hans.holmberg@cnexlabs.com>
Reviewed-by: default avatarJavier González <javier@cnexlabs.com>
Signed-off-by: default avatarMatias Bjørling <mb@lightnvm.io>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 6a3abf5b
Loading
Loading
Loading
Loading
+43 −2
Original line number Diff line number Diff line
@@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)

	lockdep_assert_held(&line->lock);

	if (!vsc) {
	if (line->w_err_gc->has_write_err) {
		if (line->gc_group != PBLK_LINEGC_WERR) {
			line->gc_group = PBLK_LINEGC_WERR;
			move_list = &l_mg->gc_werr_list;
			pblk_rl_werr_line_in(&pblk->rl);
		}
	} else if (!vsc) {
		if (line->gc_group != PBLK_LINEGC_FULL) {
			line->gc_group = PBLK_LINEGC_FULL;
			move_list = &l_mg->gc_full_list;
@@ -1589,8 +1595,13 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
	line->state = PBLK_LINESTATE_FREE;
	line->gc_group = PBLK_LINEGC_NONE;
	pblk_line_free(line);
	spin_unlock(&line->lock);

	if (line->w_err_gc->has_write_err) {
		pblk_rl_werr_line_out(&pblk->rl);
		line->w_err_gc->has_write_err = 0;
	}

	spin_unlock(&line->lock);
	atomic_dec(&gc->pipeline_gc);

	spin_lock(&l_mg->free_lock);
@@ -1753,11 +1764,34 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)

	spin_lock(&l_mg->close_lock);
	spin_lock(&line->lock);

	/* Update the in-memory start address for emeta, in case it has
	 * shifted due to write errors
	 */
	if (line->emeta_ssec != line->cur_sec)
		line->emeta_ssec = line->cur_sec;

	list_add_tail(&line->list, &l_mg->emeta_list);
	spin_unlock(&line->lock);
	spin_unlock(&l_mg->close_lock);

	pblk_line_should_sync_meta(pblk);


}

static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
{
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	unsigned int lba_list_size = lm->emeta_len[2];
	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
	struct pblk_emeta *emeta = line->emeta;

	w_err_gc->lba_list = pblk_malloc(lba_list_size,
					 l_mg->emeta_alloc_type, GFP_KERNEL);
	memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
				lba_list_size);
}

void pblk_line_close_ws(struct work_struct *work)
@@ -1766,6 +1800,13 @@ void pblk_line_close_ws(struct work_struct *work)
									ws);
	struct pblk *pblk = line_ws->pblk;
	struct pblk_line *line = line_ws->line;
	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;

	/* Write errors makes the emeta start address stored in smeta invalid,
	 * so keep a copy of the lba list until we've gc'd the line
	 */
	if (w_err_gc->has_write_err)
		pblk_save_lba_list(pblk, line);

	pblk_line_close(pblk, line);
	mempool_free(line_ws, &pblk->gen_ws_pool);
+65 −37
Original line number Diff line number Diff line
@@ -129,6 +129,53 @@ static void pblk_gc_line_ws(struct work_struct *work)
	kfree(gc_rq_ws);
}

static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
				       struct pblk_line *line)
{
	struct line_emeta *emeta_buf;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	unsigned int lba_list_size = lm->emeta_len[2];
	__le64 *lba_list;
	int ret;

	emeta_buf = pblk_malloc(lm->emeta_len[0],
				l_mg->emeta_alloc_type, GFP_KERNEL);
	if (!emeta_buf)
		return NULL;

	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
	if (ret) {
		pr_err("pblk: line %d read emeta failed (%d)\n",
				line->id, ret);
		pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
		return NULL;
	}

	/* If this read fails, it means that emeta is corrupted.
	 * For now, leave the line untouched.
	 * TODO: Implement a recovery routine that scans and moves
	 * all sectors on the line.
	 */

	ret = pblk_recov_check_emeta(pblk, emeta_buf);
	if (ret) {
		pr_err("pblk: inconsistent emeta (line %d)\n",
				line->id);
		pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
		return NULL;
	}

	lba_list = pblk_malloc(lba_list_size,
			       l_mg->emeta_alloc_type, GFP_KERNEL);
	if (lba_list)
		memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);

	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);

	return lba_list;
}

static void pblk_gc_line_prepare_ws(struct work_struct *work)
{
	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
@@ -138,46 +185,26 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_gc *gc = &pblk->gc;
	struct line_emeta *emeta_buf;
	struct pblk_line_ws *gc_rq_ws;
	struct pblk_gc_rq *gc_rq;
	__le64 *lba_list;
	unsigned long *invalid_bitmap;
	int sec_left, nr_secs, bit;
	int ret;

	invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
	if (!invalid_bitmap)
		goto fail_free_ws;

	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
								GFP_KERNEL);
	if (!emeta_buf) {
		pr_err("pblk: cannot use GC emeta\n");
		goto fail_free_bitmap;
	}

	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
	if (ret) {
		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
		goto fail_free_emeta;
	}

	/* If this read fails, it means that emeta is corrupted. For now, leave
	 * the line untouched. TODO: Implement a recovery routine that scans and
	 * moves all sectors on the line.
	 */

	ret = pblk_recov_check_emeta(pblk, emeta_buf);
	if (ret) {
		pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
		goto fail_free_emeta;
	}

	lba_list = emeta_to_lbas(pblk, emeta_buf);
	if (line->w_err_gc->has_write_err) {
		lba_list = line->w_err_gc->lba_list;
		line->w_err_gc->lba_list = NULL;
	} else {
		lba_list = get_lba_list_from_emeta(pblk, line);
		if (!lba_list) {
		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
		goto fail_free_emeta;
			pr_err("pblk: could not interpret emeta (line %d)\n",
					line->id);
			goto fail_free_ws;
		}
	}

	spin_lock(&line->lock);
@@ -187,14 +214,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)

	if (sec_left < 0) {
		pr_err("pblk: corrupted GC line (%d)\n", line->id);
		goto fail_free_emeta;
		goto fail_free_lba_list;
	}

	bit = -1;
next_rq:
	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
	if (!gc_rq)
		goto fail_free_emeta;
		goto fail_free_lba_list;

	nr_secs = 0;
	do {
@@ -240,7 +267,7 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
		goto next_rq;

out:
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
	pblk_mfree(lba_list, l_mg->emeta_alloc_type);
	kfree(line_ws);
	kfree(invalid_bitmap);

@@ -251,9 +278,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)

fail_free_gc_rq:
	kfree(gc_rq);
fail_free_emeta:
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
fail_free_bitmap:
fail_free_lba_list:
	pblk_mfree(lba_list, l_mg->emeta_alloc_type);
	kfree(invalid_bitmap);
fail_free_ws:
	kfree(line_ws);
@@ -349,12 +375,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
	unsigned int nr_blocks_free, nr_blocks_need;
	unsigned int werr_lines = atomic_read(&rl->werr_lines);

	nr_blocks_need = pblk_rl_high_thrs(rl);
	nr_blocks_free = pblk_rl_nr_free_blks(rl);

	/* This is not critical, no need to take lock here */
	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
	return ((werr_lines > 0) ||
		((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
}

void pblk_gc_free_full_lines(struct pblk *pblk)
+31 −15
Original line number Diff line number Diff line
@@ -493,11 +493,17 @@ static void pblk_line_mg_free(struct pblk *pblk)
	}
}

static void pblk_line_meta_free(struct pblk_line *line)
static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
				struct pblk_line *line)
{
	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;

	kfree(line->blk_bitmap);
	kfree(line->erase_bitmap);
	kfree(line->chks);

	pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type);
	kfree(w_err_gc);
}

static void pblk_lines_free(struct pblk *pblk)
@@ -511,7 +517,7 @@ static void pblk_lines_free(struct pblk *pblk)
		line = &pblk->lines[i];

		pblk_line_free(line);
		pblk_line_meta_free(line);
		pblk_line_meta_free(l_mg, line);
	}
	spin_unlock(&l_mg->free_lock);

@@ -813,22 +819,30 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
		return -ENOMEM;

	line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
	if (!line->erase_bitmap) {
		kfree(line->blk_bitmap);
		return -ENOMEM;
	}
	if (!line->erase_bitmap)
		goto free_blk_bitmap;


	line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
								GFP_KERNEL);
	if (!line->chks) {
	if (!line->chks)
		goto free_erase_bitmap;

	line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
	if (!line->w_err_gc)
		goto free_chks;

	return 0;

free_chks:
	kfree(line->chks);
free_erase_bitmap:
	kfree(line->erase_bitmap);
free_blk_bitmap:
	kfree(line->blk_bitmap);
	return -ENOMEM;
}

	return 0;
}

static int pblk_line_mg_init(struct pblk *pblk)
{
	struct nvm_tgt_dev *dev = pblk->dev;
@@ -851,12 +865,14 @@ static int pblk_line_mg_init(struct pblk *pblk)
	INIT_LIST_HEAD(&l_mg->gc_mid_list);
	INIT_LIST_HEAD(&l_mg->gc_low_list);
	INIT_LIST_HEAD(&l_mg->gc_empty_list);
	INIT_LIST_HEAD(&l_mg->gc_werr_list);

	INIT_LIST_HEAD(&l_mg->emeta_list);

	l_mg->gc_lists[0] = &l_mg->gc_high_list;
	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
	l_mg->gc_lists[2] = &l_mg->gc_low_list;
	l_mg->gc_lists[0] = &l_mg->gc_werr_list;
	l_mg->gc_lists[1] = &l_mg->gc_high_list;
	l_mg->gc_lists[2] = &l_mg->gc_mid_list;
	l_mg->gc_lists[3] = &l_mg->gc_low_list;

	spin_lock_init(&l_mg->free_lock);
	spin_lock_init(&l_mg->close_lock);
@@ -1063,7 +1079,7 @@ static int pblk_lines_init(struct pblk *pblk)

fail_free_lines:
	while (--i >= 0)
		pblk_line_meta_free(&pblk->lines[i]);
		pblk_line_meta_free(l_mg, &pblk->lines[i]);
	kfree(pblk->lines);
fail_free_chunk_meta:
	kfree(chunk_meta);
+25 −4
Original line number Diff line number Diff line
@@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
	pblk_rl_kick_u_timer(rl);
}

void pblk_rl_werr_line_in(struct pblk_rl *rl)
{
	atomic_inc(&rl->werr_lines);
}

void pblk_rl_werr_line_out(struct pblk_rl *rl)
{
	atomic_dec(&rl->werr_lines);
}

void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
{
	atomic_add(nr_entries, &rl->rb_gc_cnt);
@@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
{
	struct pblk *pblk = container_of(rl, struct pblk, rl);
	int max = rl->rb_budget;
	int werr_gc_needed = atomic_read(&rl->werr_lines);

	if (free_blocks >= rl->high) {
		if (werr_gc_needed) {
			/* Allocate a small budget for recovering
			 * lines with write errors
			 */
			rl->rb_gc_max = 1 << rl->rb_windows_pw;
			rl->rb_user_max = max - rl->rb_gc_max;
			rl->rb_state = PBLK_RL_WERR;
		} else {
			rl->rb_user_max = max;
			rl->rb_gc_max = 0;
		rl->rb_state = PBLK_RL_HIGH;
			rl->rb_state = PBLK_RL_OFF;
		}
	} else if (free_blocks < rl->high) {
		int shift = rl->high_pw - rl->rb_windows_pw;
		int user_windows = free_blocks >> shift;
@@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
		rl->rb_state = PBLK_RL_LOW;
	}

	if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW))
	if (rl->rb_state != PBLK_RL_OFF)
		pblk_gc_should_start(pblk);
	else
		pblk_gc_should_stop(pblk);
@@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
	atomic_set(&rl->rb_user_cnt, 0);
	atomic_set(&rl->rb_gc_cnt, 0);
	atomic_set(&rl->rb_space, -1);
	atomic_set(&rl->werr_lines, 0);

	timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);

+13 −2
Original line number Diff line number Diff line
@@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
	int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
	int d_line_cnt = 0, l_line_cnt = 0;
	int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
	int gc_werr = 0;

	int bad = 0, cor = 0;
	int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
	int map_weight = 0, meta_weight = 0;
@@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
		gc_empty++;
	}

	list_for_each_entry(line, &l_mg->gc_werr_list, list) {
		if (line->type == PBLK_LINETYPE_DATA)
			d_line_cnt++;
		else if (line->type == PBLK_LINETYPE_LOG)
			l_line_cnt++;
		closed_line_cnt++;
		gc_werr++;
	}

	list_for_each_entry(line, &l_mg->bad_list, list)
		bad++;
	list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
					l_mg->nr_lines);

	sz += snprintf(page + sz, PAGE_SIZE - sz,
		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
			gc_full, gc_high, gc_mid, gc_low, gc_empty,
		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
			gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
			atomic_read(&pblk->gc.read_inflight_gc));

	sz += snprintf(page + sz, PAGE_SIZE - sz,
Loading