Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dd2a4343 authored by Javier González's avatar Javier González Committed by Jens Axboe
Browse files

lightnvm: pblk: sched. metadata on write thread



At the moment, line metadata is persisted on a separate work queue, that
is kicked each time that a line is closed. The assumption when designing
this was that freeing the write thread from creating a new write request
was better than the potential impact of writes colliding on the media
(user I/O and metadata I/O). Experimentation has proven that this
assumption is wrong; collision can cause up to 25% of bandwidth and
introduce long tail latencies on the write thread, which potentially
cause user write threads to spend more time spinning to get a free entry
on the write buffer.

This patch moves the metadata logic to the write thread. When a line is
closed, remaining metadata is written in memory and is placed on a
metadata queue. The write thread then takes the metadata corresponding
to the previous line, creates the write request and schedules it to
minimize collisions on the media. Using this approach, we see that we
can saturate the media's bandwidth, which helps reducing both write
latencies and the spinning time for user writer threads.

Signed-off-by: default avatarJavier González <javier@cnexlabs.com>
Signed-off-by: default avatarMatias Bjørling <matias@cnexlabs.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 084ec9ba
Loading
Loading
Loading
Loading
+137 −79
Original line number Diff line number Diff line
@@ -87,7 +87,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
		spin_unlock(&line->lock);
		return;
	}
	line->vsc--;
	le32_add_cpu(line->vsc, -1);

	if (line->state == PBLK_LINESTATE_CLOSED)
		move_list = pblk_line_gc_list(pblk, line);
@@ -306,28 +306,29 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct list_head *move_list = NULL;
	int vsc = le32_to_cpu(*line->vsc);

	if (!line->vsc) {
	if (!vsc) {
		if (line->gc_group != PBLK_LINEGC_FULL) {
			line->gc_group = PBLK_LINEGC_FULL;
			move_list = &l_mg->gc_full_list;
		}
	} else if (line->vsc < lm->mid_thrs) {
	} else if (vsc < lm->mid_thrs) {
		if (line->gc_group != PBLK_LINEGC_HIGH) {
			line->gc_group = PBLK_LINEGC_HIGH;
			move_list = &l_mg->gc_high_list;
		}
	} else if (line->vsc < lm->high_thrs) {
	} else if (vsc < lm->high_thrs) {
		if (line->gc_group != PBLK_LINEGC_MID) {
			line->gc_group = PBLK_LINEGC_MID;
			move_list = &l_mg->gc_mid_list;
		}
	} else if (line->vsc < line->sec_in_line) {
	} else if (vsc < line->sec_in_line) {
		if (line->gc_group != PBLK_LINEGC_LOW) {
			line->gc_group = PBLK_LINEGC_LOW;
			move_list = &l_mg->gc_low_list;
		}
	} else if (line->vsc == line->sec_in_line) {
	} else if (vsc == line->sec_in_line) {
		if (line->gc_group != PBLK_LINEGC_EMPTY) {
			line->gc_group = PBLK_LINEGC_EMPTY;
			move_list = &l_mg->gc_empty_list;
@@ -337,7 +338,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
		line->gc_group = PBLK_LINEGC_NONE;
		move_list =  &l_mg->corrupt_list;
		pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
						line->id, line->vsc,
						line->id, vsc,
						line->sec_in_line,
						lm->high_thrs, lm->mid_thrs);
	}
@@ -496,8 +497,20 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
	return secs_to_sync;
}

static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
			     int nr_secs)
void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
	u64 addr;
	int i;

	addr = find_next_zero_bit(line->map_bitmap,
					pblk->lm.sec_per_line, line->cur_sec);
	line->cur_sec = addr - nr_secs;

	for (i = 0; i < nr_secs; i++, line->cur_sec--)
		WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
}

u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
	u64 addr;
	int i;
@@ -532,12 +545,24 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
	return addr;
}

u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
{
	u64 paddr;

	spin_lock(&line->lock);
	paddr = find_next_zero_bit(line->map_bitmap,
					pblk->lm.sec_per_line, line->cur_sec);
	spin_unlock(&line->lock);

	return paddr;
}

/*
 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
 * taking the per LUN semaphore.
 */
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
				     u64 paddr, int dir)
				     void *emeta_buf, u64 paddr, int dir)
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
@@ -546,9 +571,8 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
	struct nvm_rq rqd;
	struct ppa_addr *ppa_list;
	dma_addr_t dma_ppa_list;
	void *emeta = line->emeta;
	int min = pblk->min_write_pgs;
	int left_ppas = lm->emeta_sec;
	int left_ppas = lm->emeta_sec[0];
	int id = line->id;
	int rq_ppas, rq_len;
	int cmd_op, bio_op;
@@ -578,7 +602,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
	rq_len = rq_ppas * geo->sec_size;

	bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
	bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, GFP_KERNEL);
	if (IS_ERR(bio)) {
		ret = PTR_ERR(bio);
		goto free_rqd_dma;
@@ -660,7 +684,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
			pblk_log_read_err(pblk, &rqd);
	}

	emeta += rq_len;
	emeta_buf += rq_len;
	left_ppas -= rq_ppas;
	if (left_ppas)
		goto next_rq;
@@ -701,7 +725,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
		bio_op = REQ_OP_WRITE;
		cmd_op = NVM_OP_PWRITE;
		flags = pblk_set_progr_mode(pblk, WRITE);
		lba_list = pblk_line_emeta_to_lbas(line->emeta);
		lba_list = emeta_to_lbas(pblk, line->emeta->buf);
	} else if (dir == READ) {
		bio_op = REQ_OP_READ;
		cmd_op = NVM_OP_PREAD;
@@ -775,9 +799,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
	return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
}

int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
			 void *emeta_buf)
{
	return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
	return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
						line->emeta_ssec, READ);
}

static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -863,18 +889,47 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
	return 0;
}

static void pblk_line_setup_metadata(struct pblk_line *line,
				     struct pblk_line_mgmt *l_mg,
				     struct pblk_line_meta *lm)
{
	int meta_line;

retry_meta:
	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
	if (meta_line == PBLK_DATA_LINES) {
		spin_unlock(&l_mg->free_lock);
		io_schedule();
		spin_lock(&l_mg->free_lock);
		goto retry_meta;
	}

	set_bit(meta_line, &l_mg->meta_bitmap);
	line->meta_line = meta_line;

	line->smeta = l_mg->sline_meta[meta_line];
	line->emeta = l_mg->eline_meta[meta_line];

	memset(line->smeta, 0, lm->smeta_len);
	memset(line->emeta->buf, 0, lm->emeta_len[0]);

	line->emeta->mem = 0;
	atomic_set(&line->emeta->sync, 0);
}

/* For now lines are always assumed full lines. Thus, smeta former and current
 * lun bitmaps are omitted.
 */
static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
				  struct pblk_line *cur)
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct line_smeta *smeta = line->smeta;
	struct line_emeta *emeta = line->emeta;
	struct pblk_emeta *emeta = line->emeta;
	struct line_emeta *emeta_buf = emeta->buf;
	struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
	int nr_blk_line;

	/* After erasing the line, new bad blocks might appear and we risk
@@ -897,42 +952,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
	}

	/* Run-time metadata */
	line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
	line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);

	/* Mark LUNs allocated in this line (all for now) */
	bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);

	smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
	memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
	smeta->header.id = cpu_to_le32(line->id);
	smeta->header.type = cpu_to_le16(line->type);
	smeta->header.version = cpu_to_le16(1);
	smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
	memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
	smeta_buf->header.id = cpu_to_le32(line->id);
	smeta_buf->header.type = cpu_to_le16(line->type);
	smeta_buf->header.version = cpu_to_le16(1);

	/* Start metadata */
	smeta->seq_nr = cpu_to_le64(line->seq_nr);
	smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
	smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
	smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);

	/* Fill metadata among lines */
	if (cur) {
		memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
		smeta->prev_id = cpu_to_le32(cur->id);
		cur->emeta->next_id = cpu_to_le32(line->id);
		smeta_buf->prev_id = cpu_to_le32(cur->id);
		cur->emeta->buf->next_id = cpu_to_le32(line->id);
	} else {
		smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
		smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
	}

	/* All smeta must be set at this point */
	smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
	smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
	smeta_buf->header.crc = cpu_to_le32(
			pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
	smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));

	/* End metadata */
	memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
	emeta->seq_nr = cpu_to_le64(line->seq_nr);
	emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
	emeta->nr_valid_lbas = cpu_to_le64(0);
	emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
	emeta->crc = cpu_to_le32(0);
	emeta->prev_id = smeta->prev_id;
	memcpy(&emeta_buf->header, &smeta_buf->header,
						sizeof(struct line_header));
	emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
	emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
	emeta_buf->nr_valid_lbas = cpu_to_le64(0);
	emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
	emeta_buf->crc = cpu_to_le32(0);
	emeta_buf->prev_id = smeta_buf->prev_id;

	return 1;
}
@@ -987,8 +1044,8 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
	 * blocks to make sure that there are enough sectors to store emeta
	 */
	bit = lm->sec_per_line;
	off = lm->sec_per_line - lm->emeta_sec;
	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
	off = lm->sec_per_line - lm->emeta_sec[0];
	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
	while (nr_bb) {
		off -= geo->sec_per_pl;
		if (!test_bit(off, line->invalid_bitmap)) {
@@ -997,9 +1054,11 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
		}
	}

	line->sec_in_line -= lm->emeta_sec;
	line->sec_in_line -= lm->emeta_sec[0];
	line->emeta_ssec = off;
	line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
	line->nr_valid_lbas = 0;
	line->left_ssecs = line->left_msecs = line->sec_in_line;
	*line->vsc = cpu_to_le32(line->sec_in_line);

	if (lm->sec_per_line - line->sec_in_line !=
		bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1046,6 +1105,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)

	atomic_set(&line->left_eblks, blk_in_line);
	atomic_set(&line->left_seblks, blk_in_line);

	line->meta_distance = lm->meta_distance;
	spin_unlock(&line->lock);

	/* Bad blocks do not need to be erased */
@@ -1170,7 +1231,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line *line;
	int meta_line;
	int is_next = 0;

	spin_lock(&l_mg->free_lock);
@@ -1184,11 +1244,7 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
	line->type = PBLK_LINETYPE_DATA;
	l_mg->data_line = line;

	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
	set_bit(meta_line, &l_mg->meta_bitmap);
	line->smeta = l_mg->sline_meta[meta_line].meta;
	line->emeta = l_mg->eline_meta[meta_line].meta;
	line->meta_line = meta_line;
	pblk_line_setup_metadata(line, l_mg, &pblk->lm);

	/* Allocate next line for preparation */
	l_mg->data_next = pblk_line_get(pblk);
@@ -1207,7 +1263,7 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
		return NULL;

retry_setup:
	if (!pblk_line_set_metadata(pblk, line, NULL)) {
	if (!pblk_line_init_metadata(pblk, line, NULL)) {
		line = pblk_line_retry(pblk, line);
		if (!line)
			return NULL;
@@ -1228,11 +1284,9 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)

struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
{
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line *cur, *new;
	unsigned int left_seblks;
	int meta_line;
	int is_next = 0;

	cur = l_mg->data_line;
@@ -1263,29 +1317,14 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
		is_next = 1;
	}

retry_meta:
	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
	if (meta_line == PBLK_DATA_LINES) {
		spin_unlock(&l_mg->free_lock);
		io_schedule();
		spin_lock(&l_mg->free_lock);
		goto retry_meta;
	}

	set_bit(meta_line, &l_mg->meta_bitmap);
	new->smeta = l_mg->sline_meta[meta_line].meta;
	new->emeta = l_mg->eline_meta[meta_line].meta;
	new->meta_line = meta_line;

	memset(new->smeta, 0, lm->smeta_len);
	memset(new->emeta, 0, lm->emeta_len);
	pblk_line_setup_metadata(new, l_mg, &pblk->lm);
	spin_unlock(&l_mg->free_lock);

	if (is_next)
		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);

retry_setup:
	if (!pblk_line_set_metadata(pblk, new, cur)) {
	if (!pblk_line_init_metadata(pblk, new, cur)) {
		new = pblk_line_retry(pblk, new);
		if (!new)
			return NULL;
@@ -1311,6 +1350,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
	if (line->invalid_bitmap)
		mempool_free(line->invalid_bitmap, pblk->line_meta_pool);

	*line->vsc = cpu_to_le32(EMPTY_ENTRY);

	line->map_bitmap = NULL;
	line->invalid_bitmap = NULL;
	line->smeta = NULL;
@@ -1386,14 +1427,10 @@ int pblk_line_is_full(struct pblk_line *line)
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	struct list_head *move_list;

	line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));

	if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
		pr_err("pblk: line %d close I/O failed\n", line->id);

	WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
	WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
				"pblk: corrupt closed line %d\n", line->id);

	spin_lock(&l_mg->free_lock);
@@ -1417,6 +1454,27 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
	spin_unlock(&l_mg->gc_lock);
}

void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_emeta *emeta = line->emeta;
	struct line_emeta *emeta_buf = emeta->buf;

	/* No need for exact vsc value; avoid a big line lock and tak aprox. */
	memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
	memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);

	emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
	emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));

	spin_lock(&l_mg->close_lock);
	spin_lock(&line->lock);
	list_add_tail(&line->list, &l_mg->emeta_list);
	spin_unlock(&line->lock);
	spin_unlock(&l_mg->close_lock);
}

void pblk_line_close_ws(struct work_struct *work)
{
	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
@@ -1476,7 +1534,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_lun *rlun;
	int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
	int ret;

	/*
@@ -1493,10 +1551,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
	/* If the LUN has been locked for this same request, do no attempt to
	 * lock it again
	 */
	if (test_and_set_bit(lun_id, lun_bitmap))
	if (test_and_set_bit(pos, lun_bitmap))
		return;

	rlun = &pblk->luns[lun_id];
	rlun = &pblk->luns[pos];
	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
	if (ret) {
		switch (ret) {
+21 −20
Original line number Diff line number Diff line
@@ -156,7 +156,8 @@ static void pblk_gc_line_ws(struct work_struct *work)
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line *line = line_ws->line;
	struct pblk_line_meta *lm = &pblk->lm;
	__le64 *lba_list = line_ws->priv;
	struct line_emeta *emeta_buf = line_ws->priv;
	__le64 *lba_list;
	u64 *gc_list;
	int sec_left;
	int nr_ppas, bit;
@@ -164,8 +165,18 @@ static void pblk_gc_line_ws(struct work_struct *work)

	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);

	/* If this read fails, it means that emeta is corrupted. For now, leave
	 * the line untouched. TODO: Implement a recovery routine that scans and
	 * moves all sectors on the line.
	 */
	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
	if (!lba_list) {
		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
		goto out;
	}

	spin_lock(&line->lock);
	sec_left = line->vsc;
	sec_left = le32_to_cpu(*line->vsc);
	if (!sec_left) {
		/* Lines are erased before being used (l_mg->data_/log_next) */
		spin_unlock(&line->lock);
@@ -206,7 +217,7 @@ static void pblk_gc_line_ws(struct work_struct *work)

	if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
		pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
						line->id, line->vsc,
						line->id, *line->vsc,
						nr_ppas, nr_ppas);
		put_line = 0;
		pblk_put_line_back(pblk, line);
@@ -218,7 +229,7 @@ static void pblk_gc_line_ws(struct work_struct *work)
		goto next_rq;

out:
	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
	mempool_free(line_ws, pblk->line_ws_pool);
	atomic_dec(&pblk->gc.inflight_gc);
	if (put_line)
@@ -229,37 +240,27 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	struct line_emeta *emeta_buf;
	struct pblk_line_ws *line_ws;
	__le64 *lba_list;
	int ret;

	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
	line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
								GFP_KERNEL);
	if (!line->emeta) {
	if (!emeta_buf) {
		pr_err("pblk: cannot use GC emeta\n");
		goto fail_free_ws;
	}

	ret = pblk_line_read_emeta(pblk, line);
	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
	if (ret) {
		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
		goto fail_free_emeta;
	}

	/* If this read fails, it means that emeta is corrupted. For now, leave
	 * the line untouched. TODO: Implement a recovery routine that scans and
	 * moves all sectors on the line.
	 */
	lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
	if (!lba_list) {
		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
		goto fail_free_emeta;
	}

	line_ws->pblk = pblk;
	line_ws->line = line;
	line_ws->priv = lba_list;
	line_ws->priv = emeta_buf;

	INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
	queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
@@ -267,7 +268,7 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
	return 0;

fail_free_emeta:
	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
fail_free_ws:
	mempool_free(line_ws, pblk->line_ws_pool);
	pblk_put_line_back(pblk, line);
+147 −93

File changed.

Preview size limit exceeded, changes collapsed.

+7 −6
Original line number Diff line number Diff line
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
			       unsigned int valid_secs)
{
	struct pblk_line *line = pblk_line_get_data(pblk);
	struct line_emeta *emeta = line->emeta;
	struct pblk_emeta *emeta = line->emeta;
	struct pblk_w_ctx *w_ctx;
	__le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
	__le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
	u64 paddr;
	int nr_secs = pblk->min_write_pgs;
	int i;
@@ -51,7 +51,7 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
			w_ctx->ppa = ppa_list[i];
			meta_list[i].lba = cpu_to_le64(w_ctx->lba);
			lba_list[paddr] = cpu_to_le64(w_ctx->lba);
			le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
			line->nr_valid_lbas++;
		} else {
			u64 addr_empty = cpu_to_le64(ADDR_EMPTY);

@@ -61,9 +61,11 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
	}

	if (pblk_line_is_full(line)) {
		struct pblk_line *prev_line = line;
		line = pblk_line_replace_data(pblk);
		if (!line)
			return;
		pblk_line_close_meta(pblk, prev_line);
	}

	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -104,11 +106,10 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
					lun_bitmap, &meta_list[i], map_secs);

		erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
							rqd->ppa_list[i].g.ch;

		/* line can change after page map */
		e_line = pblk_line_get_erase(pblk);
		erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);

		spin_lock(&e_line->lock);
		if (!test_bit(erase_lun, e_line->erase_bitmap)) {
			set_bit(erase_lun, e_line->erase_bitmap);
+35 −32
Original line number Diff line number Diff line
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
	return 0;
}

__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
{
	u32 crc;

	crc = pblk_calc_emeta_crc(pblk, emeta);
	if (le32_to_cpu(emeta->crc) != crc)
	crc = pblk_calc_emeta_crc(pblk, emeta_buf);
	if (le32_to_cpu(emeta_buf->crc) != crc)
		return NULL;

	if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
	if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
		return NULL;

	return pblk_line_emeta_to_lbas(emeta);
	return emeta_to_lbas(pblk, emeta_buf);
}

static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_line_meta *lm = &pblk->lm;
	struct line_emeta *emeta = line->emeta;
	struct pblk_emeta *emeta = line->emeta;
	struct line_emeta *emeta_buf = emeta->buf;
	__le64 *lba_list;
	int data_start;
	int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
	int i;

	lba_list = pblk_recov_get_lba_list(pblk, emeta);
	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
	if (!lba_list)
		return 1;

	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
	nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
	nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
	nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
	nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);

	for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
		struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
			if (test_and_set_bit(i, line->invalid_bitmap))
				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
			else
				line->vsc--;
				le32_add_cpu(line->vsc, -1);
			spin_unlock(&line->lock);

			continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)

	if (nr_valid_lbas != nr_lbas)
		pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
				line->id, line->emeta->nr_valid_lbas, nr_lbas);
				line->id, emeta_buf->nr_valid_lbas, nr_lbas);

	line->left_msecs = 0;

@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
	struct pblk_line_meta *lm = &pblk->lm;
	int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);

	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
				nr_bb * geo->sec_per_blk;
}

@@ -333,7 +334,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
	struct bio *bio;
	void *data;
	dma_addr_t dma_ppa_list, dma_meta_list;
	__le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
	u64 w_ptr = line->cur_sec;
	int left_line_ppas = line->left_msecs;
	int rq_ppas, rq_len;
@@ -770,8 +771,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line *line, *tline, *data_line = NULL;
	struct line_smeta *smeta;
	struct line_emeta *emeta;
	struct pblk_smeta *smeta;
	struct pblk_emeta *emeta;
	struct line_smeta *smeta_buf;
	int found_lines = 0, recovered_lines = 0, open_lines = 0;
	int is_next = 0;
	int meta_line;
@@ -784,8 +786,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
	spin_lock(&l_mg->free_lock);
	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
	set_bit(meta_line, &l_mg->meta_bitmap);
	smeta = l_mg->sline_meta[meta_line].meta;
	emeta = l_mg->eline_meta[meta_line].meta;
	smeta = l_mg->sline_meta[meta_line];
	emeta = l_mg->eline_meta[meta_line];
	smeta_buf = smeta->buf;
	spin_unlock(&l_mg->free_lock);

	/* Order data lines using their sequence number */
@@ -796,33 +799,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)

		memset(smeta, 0, lm->smeta_len);
		line->smeta = smeta;
		line->lun_bitmap = ((void *)(smeta)) +
		line->lun_bitmap = ((void *)(smeta_buf)) +
						sizeof(struct line_smeta);

		/* Lines that cannot be read are assumed as not written here */
		if (pblk_line_read_smeta(pblk, line))
			continue;

		crc = pblk_calc_smeta_crc(pblk, smeta);
		if (le32_to_cpu(smeta->crc) != crc)
		crc = pblk_calc_smeta_crc(pblk, smeta_buf);
		if (le32_to_cpu(smeta_buf->crc) != crc)
			continue;

		if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
		if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
			continue;

		if (le16_to_cpu(smeta->header.version) != 1) {
		if (le16_to_cpu(smeta_buf->header.version) != 1) {
			pr_err("pblk: found incompatible line version %u\n",
					smeta->header.version);
					smeta_buf->header.version);
			return ERR_PTR(-EINVAL);
		}

		/* The first valid instance uuid is used for initialization */
		if (!valid_uuid) {
			memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
			memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
			valid_uuid = 1;
		}

		if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
		if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
			pr_debug("pblk: ignore line %u due to uuid mismatch\n",
					i);
			continue;
@@ -830,9 +833,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)

		/* Update line metadata */
		spin_lock(&line->lock);
		line->id = le32_to_cpu(line->smeta->header.id);
		line->type = le16_to_cpu(line->smeta->header.type);
		line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
		line->id = le32_to_cpu(smeta_buf->header.id);
		line->type = le16_to_cpu(smeta_buf->header.type);
		line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
		spin_unlock(&line->lock);

		/* Update general metadata */
@@ -848,7 +851,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
		pblk_recov_line_add_ordered(&recov_list, line);
		found_lines++;
		pr_debug("pblk: recovering data line %d, seq:%llu\n",
						line->id, smeta->seq_nr);
						line->id, smeta_buf->seq_nr);
	}

	if (!found_lines) {
@@ -868,15 +871,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)

		recovered_lines++;
		/* Calculate where emeta starts based on the line bb */
		off = lm->sec_per_line - lm->emeta_sec;
		off = lm->sec_per_line - lm->emeta_sec[0];
		nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
		off -= nr_bb * geo->sec_per_pl;

		memset(emeta, 0, lm->emeta_len);
		memset(&emeta->buf, 0, lm->emeta_len[0]);
		line->emeta = emeta;
		line->emeta_ssec = off;

		if (pblk_line_read_emeta(pblk, line)) {
		if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
			pblk_recov_l2p_from_oob(pblk, line);
			goto next;
		}
Loading