NVMe: avoid kmalloc/kfree for smaller IO (ac3dd5bd) · Commits · e / devices / android_kernel_xiaomi_nabu

drivers/block/nvme-core.c

+88 −31

Original line number	Original line	Diff line number	Diff line
	@@ -144,8 +144,37 @@ struct nvme_cmd_info {
	void *ctx;		void *ctx;
	int aborted;		int aborted;
	struct nvme_queue *nvmeq;		struct nvme_queue *nvmeq;
			struct nvme_iod iod[0];
	};		};

			/*
			* Max size of iod being embedded in the request payload
			*/
			#define NVME_INT_PAGES 2
			#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size)

			/*
			* Will slightly overestimate the number of pages needed. This is OK
			* as it only leads to a small amount of wasted memory for the lifetime of
			* the I/O.
			*/
			static int nvme_npages(unsigned size, struct nvme_dev *dev)
			{
			unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
			return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
			}

			static unsigned int nvme_cmd_size(struct nvme_dev *dev)
			{
			unsigned int ret = sizeof(struct nvme_cmd_info);

			ret += sizeof(struct nvme_iod);
			ret += sizeof(__le64 ) nvme_npages(NVME_INT_BYTES(dev), dev);
			ret += sizeof(struct scatterlist) * NVME_INT_PAGES;

			return ret;
			}

	static int nvme_admin_init_hctx(struct blk_mq_hw_ctx hctx, void data,		static int nvme_admin_init_hctx(struct blk_mq_hw_ctx hctx, void data,
	unsigned int hctx_idx)		unsigned int hctx_idx)
	{		{
	@@ -217,6 +246,19 @@ static void nvme_set_info(struct nvme_cmd_info cmd, void ctx,
	cmd->aborted = 0;		cmd->aborted = 0;
	}		}

			static void iod_get_private(struct nvme_iod iod)
			{
			return (void *) (iod->private & ~0x1UL);
			}

			/*
			* If bit 0 is set, the iod is embedded in the request payload.
			*/
			static bool iod_should_kfree(struct nvme_iod *iod)
			{
			return (iod->private & 0x01) == 0;
			}

	/* Special values must be less than 0x1000 */		/* Special values must be less than 0x1000 */
	#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)		#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
	#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)		#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
	@@ -360,35 +402,53 @@ static __le64 *iod_list(struct nvme_iod iod)
	return ((void *)iod) + iod->offset;		return ((void *)iod) + iod->offset;
	}		}

	/*		static inline void iod_init(struct nvme_iod *iod, unsigned nbytes,
	* Will slightly overestimate the number of pages needed. This is OK		unsigned nseg, unsigned long private)
	* as it only leads to a small amount of wasted memory for the lifetime of
	* the I/O.
	*/
	static int nvme_npages(unsigned size, struct nvme_dev *dev)
	{		{
	unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);		iod->private = private;
	return DIV_ROUND_UP(8 * nprps, dev->page_size - 8);		iod->offset = offsetof(struct nvme_iod, sg[nseg]);
			iod->npages = -1;
			iod->length = nbytes;
			iod->nents = 0;
	}		}

	static struct nvme_iod *		static struct nvme_iod *
	nvme_alloc_iod(unsigned nseg, unsigned nbytes, struct nvme_dev *dev, gfp_t gfp)		__nvme_alloc_iod(unsigned nseg, unsigned bytes, struct nvme_dev *dev,
			unsigned long priv, gfp_t gfp)
	{		{
	struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +		struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
	sizeof(__le64 ) nvme_npages(nbytes, dev) +		sizeof(__le64 ) nvme_npages(bytes, dev) +
	sizeof(struct scatterlist) * nseg, gfp);		sizeof(struct scatterlist) * nseg, gfp);

	if (iod) {		if (iod)
	iod->offset = offsetof(struct nvme_iod, sg[nseg]);		iod_init(iod, bytes, nseg, priv);
	iod->npages = -1;
	iod->length = nbytes;		return iod;
	iod->nents = 0;
	iod->first_dma = 0ULL;
	}		}

			static struct nvme_iod nvme_alloc_iod(struct request rq, struct nvme_dev *dev,
			gfp_t gfp)
			{
			unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
			sizeof(struct nvme_dsm_range);
			unsigned long mask = 0;
			struct nvme_iod *iod;

			if (rq->nr_phys_segments <= NVME_INT_PAGES &&
			size <= NVME_INT_BYTES(dev)) {
			struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);

			iod = cmd->iod;
			mask = 0x01;
			iod_init(iod, size, rq->nr_phys_segments,
			(unsigned long) rq \| 0x01);
	return iod;		return iod;
	}		}

			return __nvme_alloc_iod(rq->nr_phys_segments, size, dev,
			(unsigned long) rq, gfp);
			}

	void nvme_free_iod(struct nvme_dev dev, struct nvme_iod iod)		void nvme_free_iod(struct nvme_dev dev, struct nvme_iod iod)
	{		{
	const int last_prp = dev->page_size / 8 - 1;		const int last_prp = dev->page_size / 8 - 1;
	@@ -404,6 +464,8 @@ void nvme_free_iod(struct nvme_dev dev, struct nvme_iod iod)
	dma_pool_free(dev->prp_page_pool, prp_list, prp_dma);		dma_pool_free(dev->prp_page_pool, prp_list, prp_dma);
	prp_dma = next_prp_dma;		prp_dma = next_prp_dma;
	}		}

			if (iod_should_kfree(iod))
	kfree(iod);		kfree(iod);
	}		}

	@@ -423,7 +485,7 @@ static void req_completion(struct nvme_queue nvmeq, void ctx,
	struct nvme_completion *cqe)		struct nvme_completion *cqe)
	{		{
	struct nvme_iod *iod = ctx;		struct nvme_iod *iod = ctx;
	struct request *req = iod->private;		struct request *req = iod_get_private(iod);
	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);		struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);

	u16 status = le16_to_cpup(&cqe->status) >> 1;		u16 status = le16_to_cpup(&cqe->status) >> 1;
	@@ -579,7 +641,7 @@ static void nvme_submit_flush(struct nvme_queue nvmeq, struct nvme_ns ns,
	static int nvme_submit_iod(struct nvme_queue nvmeq, struct nvme_iod iod,		static int nvme_submit_iod(struct nvme_queue nvmeq, struct nvme_iod iod,
	struct nvme_ns *ns)		struct nvme_ns *ns)
	{		{
	struct request *req = iod->private;		struct request *req = iod_get_private(iod);
	struct nvme_command *cmnd;		struct nvme_command *cmnd;
	u16 control = 0;		u16 control = 0;
	u32 dsmgmt = 0;		u32 dsmgmt = 0;
	@@ -620,17 +682,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
	struct request *req = bd->rq;		struct request *req = bd->rq;
	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);		struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
	struct nvme_iod *iod;		struct nvme_iod *iod;
	int psegs = req->nr_phys_segments;
	enum dma_data_direction dma_dir;		enum dma_data_direction dma_dir;
	unsigned size = !(req->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(req) :
	sizeof(struct nvme_dsm_range);

	iod = nvme_alloc_iod(psegs, size, ns->dev, GFP_ATOMIC);		iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
	if (!iod)		if (!iod)
	return BLK_MQ_RQ_QUEUE_BUSY;		return BLK_MQ_RQ_QUEUE_BUSY;

	iod->private = req;

	if (req->cmd_flags & REQ_DISCARD) {		if (req->cmd_flags & REQ_DISCARD) {
	void *range;		void *range;
	/*		/*
	@@ -645,10 +702,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
	goto retry_cmd;		goto retry_cmd;
	iod_list(iod)[0] = (__le64 *)range;		iod_list(iod)[0] = (__le64 *)range;
	iod->npages = 0;		iod->npages = 0;
	} else if (psegs) {		} else if (req->nr_phys_segments) {
	dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;		dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;

	sg_init_table(iod->sg, psegs);		sg_init_table(iod->sg, req->nr_phys_segments);
	iod->nents = blk_rq_map_sg(req->q, req, iod->sg);		iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
	if (!iod->nents)		if (!iod->nents)
	goto error_cmd;		goto error_cmd;
	@@ -1362,7 +1419,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
	dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
	dev->admin_tagset.timeout = ADMIN_TIMEOUT;		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
	dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);		dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
	dev->admin_tagset.cmd_size = sizeof(struct nvme_cmd_info);		dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
	dev->admin_tagset.driver_data = dev;		dev->admin_tagset.driver_data = dev;

	if (blk_mq_alloc_tag_set(&dev->admin_tagset))		if (blk_mq_alloc_tag_set(&dev->admin_tagset))
	@@ -1483,7 +1540,7 @@ struct nvme_iod nvme_map_user_pages(struct nvme_dev dev, int write,
	}		}

	err = -ENOMEM;		err = -ENOMEM;
	iod = nvme_alloc_iod(count, length, dev, GFP_KERNEL);		iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL);
	if (!iod)		if (!iod)
	goto put_pages;		goto put_pages;

	@@ -2109,7 +2166,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
	dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);		dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
	dev->tagset.queue_depth =		dev->tagset.queue_depth =
	min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;		min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
	dev->tagset.cmd_size = sizeof(struct nvme_cmd_info);		dev->tagset.cmd_size = nvme_cmd_size(dev);
	dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;		dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
	dev->tagset.driver_data = dev;		dev->tagset.driver_data = dev;

include/linux/nvme.h

+1 −2

Original line number	Original line	Diff line number	Diff line
	@@ -132,13 +132,12 @@ struct nvme_ns {
	* allocated to store the PRP list.		* allocated to store the PRP list.
	*/		*/
	struct nvme_iod {		struct nvme_iod {
	void private; / For the use of the submitter of the I/O */		unsigned long private; /* For the use of the submitter of the I/O */
	int npages; /* In the PRP list. 0 means small pool in use */		int npages; /* In the PRP list. 0 means small pool in use */
	int offset; /* Of PRP list */		int offset; /* Of PRP list */
	int nents; /* Used in scatterlist */		int nents; /* Used in scatterlist */
	int length; /* Of data, in bytes */		int length; /* Of data, in bytes */
	dma_addr_t first_dma;		dma_addr_t first_dma;
	struct list_head node;
	struct scatterlist sg[0];		struct scatterlist sg[0];
	};		};