Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 64a741c1 authored by Steve Wise's avatar Steve Wise Committed by Christoph Hellwig
Browse files

nvme-rdma: support up to 4 segments of inline data



Allow up to 4 segments of inline data for NVMF WRITE operations. This
reduces latency for small WRITEs by removing the need for the target to
issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp.

Also cap the inline segments used based on the limitations of the
device.

Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Reviewed-by: default avatarMax Gurtovoy <maxg@mellanox.com>
Signed-off-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 55eb942e
Loading
Loading
Loading
Loading
+27 −11
Original line number Original line Diff line number Diff line
@@ -40,13 +40,14 @@


#define NVME_RDMA_MAX_SEGMENTS		256
#define NVME_RDMA_MAX_SEGMENTS		256


#define NVME_RDMA_MAX_INLINE_SEGMENTS	1
#define NVME_RDMA_MAX_INLINE_SEGMENTS	4


struct nvme_rdma_device {
struct nvme_rdma_device {
	struct ib_device	*dev;
	struct ib_device	*dev;
	struct ib_pd		*pd;
	struct ib_pd		*pd;
	struct kref		ref;
	struct kref		ref;
	struct list_head	entry;
	struct list_head	entry;
	unsigned int		num_inline_segments;
};
};


struct nvme_rdma_qe {
struct nvme_rdma_qe {
@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
	struct sockaddr_storage src_addr;
	struct sockaddr_storage src_addr;


	struct nvme_ctrl	ctrl;
	struct nvme_ctrl	ctrl;
	bool			use_inline_data;
};
};


static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
	/* +1 for drain */
	/* +1 for drain */
	init_attr.cap.max_recv_wr = queue->queue_size + 1;
	init_attr.cap.max_recv_wr = queue->queue_size + 1;
	init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
	init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	init_attr.qp_type = IB_QPT_RC;
	init_attr.qp_type = IB_QPT_RC;
	init_attr.send_cq = queue->ib_cq;
	init_attr.send_cq = queue->ib_cq;
@@ -374,6 +376,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
		goto out_free_pd;
		goto out_free_pd;
	}
	}


	ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
					ndev->dev->attrs.max_sge - 1);
	list_add(&ndev->entry, &device_list);
	list_add(&ndev->entry, &device_list);
out_unlock:
out_unlock:
	mutex_unlock(&device_list_mutex);
	mutex_unlock(&device_list_mutex);
@@ -925,6 +929,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
	if (ret)
	if (ret)
		goto requeue;
		goto requeue;


	if (ctrl->ctrl.sgls & (1 << 20))
		ctrl->use_inline_data = true;

	if (ctrl->ctrl.queue_count > 1) {
	if (ctrl->ctrl.queue_count > 1) {
		ret = nvme_rdma_configure_io_queues(ctrl, false);
		ret = nvme_rdma_configure_io_queues(ctrl, false);
		if (ret)
		if (ret)
@@ -1090,19 +1097,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
}
}


static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
		struct nvme_rdma_request *req, struct nvme_command *c)
		struct nvme_rdma_request *req, struct nvme_command *c,
		int count)
{
{
	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
	struct scatterlist *sgl = req->sg_table.sgl;
	struct ib_sge *sge = &req->sge[1];
	u32 len = 0;
	int i;


	req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
	for (i = 0; i < count; i++, sgl++, sge++) {
	req->sge[1].length = sg_dma_len(req->sg_table.sgl);
		sge->addr = sg_dma_address(sgl);
	req->sge[1].lkey = queue->device->pd->local_dma_lkey;
		sge->length = sg_dma_len(sgl);
		sge->lkey = queue->device->pd->local_dma_lkey;
		len += sge->length;
	}


	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
	sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
	sg->length = cpu_to_le32(len);
	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;


	req->num_sge++;
	req->num_sge += count;
	return 0;
	return 0;
}
}


@@ -1195,15 +1210,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
		goto out_free_table;
		goto out_free_table;
	}
	}


	if (count == 1) {
	if (count <= dev->num_inline_segments) {
		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
		    queue->ctrl->use_inline_data &&
		    blk_rq_payload_bytes(rq) <=
		    blk_rq_payload_bytes(rq) <=
				nvme_rdma_inline_data_size(queue)) {
				nvme_rdma_inline_data_size(queue)) {
			ret = nvme_rdma_map_sg_inline(queue, req, c);
			ret = nvme_rdma_map_sg_inline(queue, req, c, count);
			goto out;
			goto out;
		}
		}


		if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
		if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
			ret = nvme_rdma_map_sg_single(queue, req, c);
			ret = nvme_rdma_map_sg_single(queue, req, c);
			goto out;
			goto out;
		}
		}