Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f9da9d07 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-4.18' of git://git.infradead.org/nvme into for-linus

Pull NVMe fixes from Christoph:

"Various relatively small fixes, mostly to fix error handling of various
 sorts."

* 'nvme-4.18' of git://git.infradead.org/nvme:
  nvme-pci: limit max IO size and segments to avoid high order allocations
  nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl
  nvme-fc: release io queues to allow fast fail
  nvmet: reset keep alive timer in controller enable
  nvme-rdma: don't override opts->queue_size
  nvme-rdma: Fix command completion race at error recovery
  nvme-rdma: fix possible free of a non-allocated async event buffer
  nvme-rdma: fix possible double free condition when failing to create a controller
parents 08ba91ee 943e942e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
		u32 max_segments =
			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;

		max_segments = min_not_zero(max_segments, ctrl->max_segments);
		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
	}
+3 −3
Original line number Diff line number Diff line
@@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
	/* re-enable the admin_q so anything new can fast fail */
	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);

	/* resume the io queues so that things will fast fail */
	nvme_start_queues(&ctrl->ctrl);

	nvme_fc_ctlr_inactive_on_rport(ctrl);
}

@@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
	 * waiting for io to terminate
	 */
	nvme_fc_delete_association(ctrl);

	/* resume the io queues so that things will fast fail */
	nvme_start_queues(nctrl);
}

static void
+1 −0
Original line number Diff line number Diff line
@@ -170,6 +170,7 @@ struct nvme_ctrl {
	u64 cap;
	u32 page_size;
	u32 max_hw_sectors;
	u32 max_segments;
	u16 oncs;
	u16 oacs;
	u16 nssa;
+38 −6
Original line number Diff line number Diff line
@@ -38,6 +38,13 @@

#define SGES_PER_PAGE	(PAGE_SIZE / sizeof(struct nvme_sgl_desc))

/*
 * These can be higher, but we need to ensure that any command doesn't
 * require an sg allocation that needs more than a page of data.
 */
#define NVME_MAX_KB_SZ	4096
#define NVME_MAX_SEGS	127

static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);

@@ -100,6 +107,8 @@ struct nvme_dev {
	struct nvme_ctrl ctrl;
	struct completion ioq_wait;

	mempool_t *iod_mempool;

	/* shadow doorbell buffer support: */
	u32 *dbbuf_dbs;
	dma_addr_t dbbuf_dbs_dma_addr;
@@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
	iod->use_sgl = nvme_pci_use_sgls(dev, rq);

	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
		size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
				iod->use_sgl);

		iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
		iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
		if (!iod->sg)
			return BLK_STS_RESOURCE;
	} else {
@@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
	}

	if (iod->sg != iod->inline_sg)
		kfree(iod->sg);
		mempool_free(iod->sg, dev->iod_mempool);
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
		blk_put_queue(dev->ctrl.admin_q);
	kfree(dev->queues);
	free_opal_dev(dev->ctrl.opal_dev);
	mempool_destroy(dev->iod_mempool);
	kfree(dev);
}

@@ -2289,6 +2296,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)

	nvme_get_ctrl(&dev->ctrl);
	nvme_dev_disable(dev, false);
	nvme_kill_queues(&dev->ctrl);
	if (!queue_work(nvme_wq, &dev->remove_work))
		nvme_put_ctrl(&dev->ctrl);
}
@@ -2333,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work)
	if (result)
		goto out;

	/*
	 * Limit the max command size to prevent iod->sg allocations going
	 * over a single page.
	 */
	dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
	dev->ctrl.max_segments = NVME_MAX_SEGS;

	result = nvme_init_identify(&dev->ctrl);
	if (result)
		goto out;
@@ -2405,7 +2420,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
	struct pci_dev *pdev = to_pci_dev(dev->dev);

	nvme_kill_queues(&dev->ctrl);
	if (pci_get_drvdata(pdev))
		device_release_driver(&pdev->dev);
	nvme_put_ctrl(&dev->ctrl);
@@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	int node, result = -ENOMEM;
	struct nvme_dev *dev;
	unsigned long quirks = id->driver_data;
	size_t alloc_size;

	node = dev_to_node(&pdev->dev);
	if (node == NUMA_NO_NODE)
@@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	if (result)
		goto release_pools;

	/*
	 * Double check that our mempool alloc size will cover the biggest
	 * command we support.
	 */
	alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
						NVME_MAX_SEGS, true);
	WARN_ON_ONCE(alloc_size > PAGE_SIZE);

	dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
						mempool_kfree,
						(void *) alloc_size,
						GFP_KERNEL, node);
	if (!dev->iod_mempool) {
		result = -ENOMEM;
		goto release_pools;
	}

	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));

	nvme_get_ctrl(&dev->ctrl);
+37 −36
Original line number Diff line number Diff line
@@ -560,12 +560,6 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
	if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
		return;

	if (nvme_rdma_queue_idx(queue) == 0) {
		nvme_rdma_free_qe(queue->device->dev,
			&queue->ctrl->async_event_sqe,
			sizeof(struct nvme_command), DMA_TO_DEVICE);
	}

	nvme_rdma_destroy_queue_ib(queue);
	rdma_destroy_id(queue->cm_id);
}
@@ -698,7 +692,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
		set = &ctrl->tag_set;
		memset(set, 0, sizeof(*set));
		set->ops = &nvme_rdma_mq_ops;
		set->queue_depth = nctrl->opts->queue_size;
		set->queue_depth = nctrl->sqsize + 1;
		set->reserved_tags = 1; /* fabric connect */
		set->numa_node = NUMA_NO_NODE;
		set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -734,11 +728,12 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
		bool remove)
{
	nvme_rdma_stop_queue(&ctrl->queues[0]);
	if (remove) {
		blk_cleanup_queue(ctrl->ctrl.admin_q);
		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
	}
	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
		sizeof(struct nvme_command), DMA_TO_DEVICE);
	nvme_rdma_free_queue(&ctrl->queues[0]);
}

@@ -755,11 +750,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,

	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);

	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
			sizeof(struct nvme_command), DMA_TO_DEVICE);
	if (error)
		goto out_free_queue;

	if (new) {
		ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
		if (IS_ERR(ctrl->ctrl.admin_tagset)) {
			error = PTR_ERR(ctrl->ctrl.admin_tagset);
			goto out_free_queue;
			goto out_free_async_qe;
		}

		ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
@@ -795,12 +795,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
	if (error)
		goto out_stop_queue;

	error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
			&ctrl->async_event_sqe, sizeof(struct nvme_command),
			DMA_TO_DEVICE);
	if (error)
		goto out_stop_queue;

	return 0;

out_stop_queue:
@@ -811,6 +805,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
out_free_tagset:
	if (new)
		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
out_free_async_qe:
	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
		sizeof(struct nvme_command), DMA_TO_DEVICE);
out_free_queue:
	nvme_rdma_free_queue(&ctrl->queues[0]);
	return error;
@@ -819,7 +816,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
		bool remove)
{
	nvme_rdma_stop_io_queues(ctrl);
	if (remove) {
		blk_cleanup_queue(ctrl->ctrl.connect_q);
		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
@@ -888,9 +884,9 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
	list_del(&ctrl->list);
	mutex_unlock(&nvme_rdma_ctrl_mutex);

	kfree(ctrl->queues);
	nvmf_free_options(nctrl->opts);
free_ctrl:
	kfree(ctrl->queues);
	kfree(ctrl);
}

@@ -949,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
	return;

destroy_admin:
	nvme_rdma_stop_queue(&ctrl->queues[0]);
	nvme_rdma_destroy_admin_queue(ctrl, false);
requeue:
	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
@@ -965,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)

	if (ctrl->ctrl.queue_count > 1) {
		nvme_stop_queues(&ctrl->ctrl);
		nvme_rdma_stop_io_queues(ctrl);
		blk_mq_tagset_busy_iter(&ctrl->tag_set,
					nvme_cancel_request, &ctrl->ctrl);
		nvme_rdma_destroy_io_queues(ctrl, false);
	}

	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
	nvme_rdma_stop_queue(&ctrl->queues[0]);
	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
				nvme_cancel_request, &ctrl->ctrl);
	nvme_rdma_destroy_admin_queue(ctrl, false);
@@ -1736,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
	if (ctrl->ctrl.queue_count > 1) {
		nvme_stop_queues(&ctrl->ctrl);
		nvme_rdma_stop_io_queues(ctrl);
		blk_mq_tagset_busy_iter(&ctrl->tag_set,
					nvme_cancel_request, &ctrl->ctrl);
		nvme_rdma_destroy_io_queues(ctrl, shutdown);
@@ -1747,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
		nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);

	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
	nvme_rdma_stop_queue(&ctrl->queues[0]);
	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
				nvme_cancel_request, &ctrl->ctrl);
	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -1932,11 +1933,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
		goto out_free_ctrl;
	}

	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
				0 /* no quirks, we're perfect! */);
	if (ret)
		goto out_free_ctrl;

	INIT_DELAYED_WORK(&ctrl->reconnect_work,
			nvme_rdma_reconnect_ctrl_work);
	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
@@ -1950,14 +1946,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
				GFP_KERNEL);
	if (!ctrl->queues)
		goto out_uninit_ctrl;
		goto out_free_ctrl;

	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
				0 /* no quirks, we're perfect! */);
	if (ret)
		goto out_kfree_queues;

	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
	WARN_ON_ONCE(!changed);

	ret = nvme_rdma_configure_admin_queue(ctrl, true);
	if (ret)
		goto out_kfree_queues;
		goto out_uninit_ctrl;

	/* sanity check icdoff */
	if (ctrl->ctrl.icdoff) {
@@ -1974,20 +1975,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
		goto out_remove_admin_queue;
	}

	if (opts->queue_size > ctrl->ctrl.maxcmd) {
		/* warn if maxcmd is lower than queue_size */
		dev_warn(ctrl->ctrl.device,
			"queue_size %zu > ctrl maxcmd %u, clamping down\n",
			opts->queue_size, ctrl->ctrl.maxcmd);
		opts->queue_size = ctrl->ctrl.maxcmd;
	}

	/* only warn if argument is too large here, will clamp later */
	if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
		/* warn if sqsize is lower than queue_size */
		dev_warn(ctrl->ctrl.device,
			"queue_size %zu > ctrl sqsize %u, clamping down\n",
			opts->queue_size, ctrl->ctrl.sqsize + 1);
		opts->queue_size = ctrl->ctrl.sqsize + 1;
	}

	/* warn if maxcmd is lower than sqsize+1 */
	if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
		dev_warn(ctrl->ctrl.device,
			"sqsize %u > ctrl maxcmd %u, clamping down\n",
			ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
		ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
	}

	if (opts->nr_io_queues) {
@@ -2013,15 +2013,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
	return &ctrl->ctrl;

out_remove_admin_queue:
	nvme_rdma_stop_queue(&ctrl->queues[0]);
	nvme_rdma_destroy_admin_queue(ctrl, true);
out_kfree_queues:
	kfree(ctrl->queues);
out_uninit_ctrl:
	nvme_uninit_ctrl(&ctrl->ctrl);
	nvme_put_ctrl(&ctrl->ctrl);
	if (ret > 0)
		ret = -EIO;
	return ERR_PTR(ret);
out_kfree_queues:
	kfree(ctrl->queues);
out_free_ctrl:
	kfree(ctrl);
	return ERR_PTR(ret);
Loading