Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 95c7c09f authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-4.18' of git://git.infradead.org/nvme into for-linus

Pull NVMe fixes from Christoph:

"Fix various little regressions introduced in this merge window, plus
 a rework of the fibre channel connect and reconnect path to share the
 code instead of having separate sets of bugs. Last but not least a
 trivial trace point addition from Hannes."

* 'nvme-4.18' of git://git.infradead.org/nvme:
  nvme-fabrics: fix and refine state checks in __nvmf_check_ready
  nvme-fabrics: handle the admin-only case properly in nvmf_check_ready
  nvme-fabrics: refactor queue ready check
  blk-mq: remove blk_mq_tagset_iter
  nvme: remove nvme_reinit_tagset
  nvme-fc: fix nulling of queue data on reconnect
  nvme-fc: remove reinit_request routine
  nvme-fc: change controllers first connect to use reconnect path
  nvme: don't rely on the changed namespace list log
  nvmet: free smart-log buffer after use
  nvme-rdma: fix error flow during mapping request data
  nvme: add bio remapping tracepoint
  nvme: fix NULL pointer dereference in nvme_init_subsystem
parents da661267 35897b92
Loading
Loading
Loading
Loading
+0 −29
Original line number Diff line number Diff line
@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);

int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
			 int (fn)(void *, struct request *))
{
	int i, j, ret = 0;

	if (WARN_ON_ONCE(!fn))
		goto out;

	for (i = 0; i < set->nr_hw_queues; i++) {
		struct blk_mq_tags *tags = set->tags[i];

		if (!tags)
			continue;

		for (j = 0; j < tags->nr_tags; j++) {
			if (!tags->static_rqs[j])
				continue;

			ret = fn(data, tags->static_rqs[j]);
			if (ret)
				goto out;
		}
	}

out:
	return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);

void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
		void *priv)
{
+12 −36
Original line number Diff line number Diff line
@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
		 * Verify that the subsystem actually supports multiple
		 * controllers, else bail out.
		 */
		if (!ctrl->opts->discovery_nqn &&
		if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
		    nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
			dev_err(ctrl->device,
				"ignoring ctrl due to duplicate subnqn (%s).\n",
@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
	nvme_remove_invalid_namespaces(ctrl, nn);
}

static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
{
	size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
	__le32 *log;
	int error, i;
	bool ret = false;
	int error;

	log = kzalloc(log_size, GFP_KERNEL);
	if (!log)
		return false;
		return;

	/*
	 * We need to read the log to clear the AEN, but we don't want to rely
	 * on it for the changed namespace information as userspace could have
	 * raced with us in reading the log page, which could cause us to miss
	 * updates.
	 */
	error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
	if (error) {
	if (error)
		dev_warn(ctrl->device,
			"reading changed ns log failed: %d\n", error);
		goto out_free_log;
	}

	if (log[0] == cpu_to_le32(0xffffffff))
		goto out_free_log;

	for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
		u32 nsid = le32_to_cpu(log[i]);

		if (nsid == 0)
			break;
		dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
		nvme_validate_ns(ctrl, nsid);
	}
	ret = true;

out_free_log:
	kfree(log);
	return ret;
}

static void nvme_scan_work(struct work_struct *work)
@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work)
	WARN_ON_ONCE(!ctrl->tagset);

	if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
		if (nvme_scan_changed_ns_log(ctrl))
			goto out_sort_namespaces;
		dev_info(ctrl->device, "rescanning namespaces.\n");
		nvme_clear_changed_ns_log(ctrl);
	}

	if (nvme_identify_ctrl(ctrl, &id))
@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work)
	nvme_scan_ns_sequential(ctrl, nn);
out_free_id:
	kfree(id);
out_sort_namespaces:
	down_write(&ctrl->namespaces_rwsem);
	list_sort(NULL, &ctrl->namespaces, ns_cmp);
	up_write(&ctrl->namespaces_rwsem);
@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_queues);

int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
{
	if (!ctrl->ops->reinit_request)
		return 0;

	return blk_mq_tagset_iter(set, set->driver_data,
			ctrl->ops->reinit_request);
}
EXPORT_SYMBOL_GPL(nvme_reinit_tagset);

int __init nvme_core_init(void)
{
	int result = -ENOMEM;
+37 −49
Original line number Diff line number Diff line
@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
	return NULL;
}

blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live, bool is_connected)
{
	struct nvme_command *cmd = nvme_req(rq)->cmd;

	if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected))
		return BLK_STS_OK;

	switch (ctrl->state) {
	case NVME_CTRL_NEW:
	case NVME_CTRL_CONNECTING:
	case NVME_CTRL_DELETING:
/*
		 * This is the case of starting a new or deleting an association
		 * but connectivity was lost before it was fully created or torn
		 * down. We need to error the commands used to initialize the
		 * controller so the reconnect can go into a retry attempt.  The
		 * commands should all be marked REQ_FAILFAST_DRIVER, which will
		 * hit the reject path below. Anything else will be queued while
		 * the state settles.
 * For something we're not in a state to send to the device the default action
 * is to busy it and retry it after the controller state is recovered.  However,
 * anything marked for failfast or nvme multipath is immediately failed.
 *
 * Note: commands used to initialize the controller will be marked for failfast.
 * Note: nvme cli/ioctl commands are marked for failfast.
 */
		if (!is_connected)
			break;
blk_status_t nvmf_fail_nonready_command(struct request *rq)
{
	if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
	nvme_req(rq)->status = NVME_SC_ABORT_REQ;
	return BLK_STS_IOERR;
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);

bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live)
{
	struct nvme_request *req = nvme_req(rq);

	/*
		 * If queue is live, allow only commands that are internally
		 * generated pass through.  These are commands on the admin
		 * queue to initialize the controller. This will reject any
		 * ioctl admin cmds received while initializing.
	 * If we are in some state of setup or teardown only allow
	 * internally generated commands.
	 */
		if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
			return BLK_STS_OK;
	if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD))
		return false;

	/*
		 * If the queue is not live, allow only a connect command.  This
		 * will reject any ioctl admin cmd as well as initialization
		 * commands if the controller reverted the queue to non-live.
	 * Only allow commands on a live queue, except for the connect command,
	 * which is require to set the queue live in the appropinquate states.
	 */
		if (!queue_live && blk_rq_is_passthrough(rq) &&
		     cmd->common.opcode == nvme_fabrics_command &&
		     cmd->fabrics.fctype == nvme_fabrics_type_connect)
			return BLK_STS_OK;
	switch (ctrl->state) {
	case NVME_CTRL_NEW:
	case NVME_CTRL_CONNECTING:
		if (req->cmd->common.opcode == nvme_fabrics_command &&
		    req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
			return true;
		break;
	default:
		break;
	case NVME_CTRL_DEAD:
		return false;
	}

	/*
	 * Any other new io is something we're not in a state to send to the
	 * device.  Default action is to busy it and retry it after the
	 * controller state is recovered. However, anything marked for failfast
	 * or nvme multipath is immediately failed.  Note: commands used to
	 * initialize the controller will be marked for failfast.
	 * Note: nvme cli/ioctl commands are marked for failfast.
	 */
	if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
	nvme_req(rq)->status = NVME_SC_ABORT_REQ;
	return BLK_STS_IOERR;
	return queue_live;
}
EXPORT_SYMBOL_GPL(nvmf_check_if_ready);
EXPORT_SYMBOL_GPL(__nvmf_check_ready);

static const match_table_t opt_tokens = {
	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
+12 −2
Original line number Diff line number Diff line
@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl,
	struct request *rq, bool queue_live, bool is_connected);
blk_status_t nvmf_fail_nonready_command(struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live);

static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live)
{
	if (likely(ctrl->state == NVME_CTRL_LIVE ||
		   ctrl->state == NVME_CTRL_ADMIN_ONLY))
		return true;
	return __nvmf_check_ready(ctrl, rq, queue_live);
}

#endif /* _NVME_FABRICS_H */
+56 −88
Original line number Diff line number Diff line
@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
	struct nvme_fc_rport	*rport;
	u32			cnum;

	bool			ioq_live;
	bool			assoc_active;
	u64			association_id;

@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)

static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);

static int
nvme_fc_reinit_request(void *data, struct request *rq)
{
	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;

	memset(cmdiu, 0, sizeof(*cmdiu));
	cmdiu->scsi_id = NVME_CMD_SCSI_ID;
	cmdiu->fc_id = NVME_CMD_FC_ID;
	cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
	memset(&op->rsp_iu, 0, sizeof(op->rsp_iu));

	return 0;
}

static void
__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
		struct nvme_fc_fcp_op *op)
@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
	 */

	queue->connection_id = 0;
	atomic_set(&queue->csn, 1);
}

static void
@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
	struct nvme_command *sqe = &cmdiu->sqe;
	enum nvmefc_fcp_datadir	io_dir;
	bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
	u32 data_len;
	blk_status_t ret;

	ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
		test_bit(NVME_FC_Q_LIVE, &queue->flags),
		ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE);
	if (unlikely(ret))
		return ret;
	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
	    !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
		return nvmf_fail_nonready_command(rq);

	ret = nvme_setup_cmd(ns, rq, sqe);
	if (ret)
@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
	if (ret)
		goto out_delete_hw_queues;

	ctrl->ioq_live = true;

	return 0;

out_delete_hw_queues:
@@ -2480,7 +2468,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
}

static int
nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
{
	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
	unsigned int nr_io_queues;
@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
	if (ctrl->ctrl.queue_count == 1)
		return 0;

	nvme_fc_init_io_queues(ctrl);

	ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
	if (ret)
		goto out_free_io_queues;

	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
	if (ret)
		goto out_free_io_queues;
@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
	 * Create the admin queue
	 */

	nvme_fc_init_queue(ctrl, 0);

	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
				NVME_AQ_DEPTH);
	if (ret)
@@ -2615,7 +2595,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
	if (ret)
		goto out_delete_hw_queue;

	if (ctrl->ctrl.state != NVME_CTRL_NEW)
	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);

	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
	 */

	if (ctrl->ctrl.queue_count > 1) {
		if (ctrl->ctrl.state == NVME_CTRL_NEW)
		if (!ctrl->ioq_live)
			ret = nvme_fc_create_io_queues(ctrl);
		else
			ret = nvme_fc_reinit_io_queues(ctrl);
			ret = nvme_fc_recreate_io_queues(ctrl);
		if (ret)
			goto out_term_aen_ops;
	}
@@ -2776,7 +2755,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
	 * use blk_mq_tagset_busy_itr() and the transport routine to
	 * terminate the exchanges.
	 */
	if (ctrl->ctrl.state != NVME_CTRL_NEW)
	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
				nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
	.submit_async_event	= nvme_fc_submit_async_event,
	.delete_ctrl		= nvme_fc_delete_ctrl,
	.get_address		= nvmf_get_address,
	.reinit_request		= nvme_fc_reinit_request,
};

static void
@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
		nvme_fc_reconnect_or_delete(ctrl, ret);
	else
		dev_info(ctrl->ctrl.device,
			"NVME-FC{%d}: controller reconnect complete\n",
			"NVME-FC{%d}: controller connect complete\n",
			ctrl->cnum);
}

@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
{
	struct nvme_fc_ctrl *ctrl;
	unsigned long flags;
	int ret, idx, retry;
	int ret, idx;

	if (!(rport->remoteport.port_role &
	    (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	}

	ctrl->ctrl.opts = opts;
	ctrl->ctrl.nr_reconnects = 0;
	INIT_LIST_HEAD(&ctrl->ctrl_list);
	ctrl->lport = lport;
	ctrl->rport = rport;
	ctrl->dev = lport->dev;
	ctrl->cnum = idx;
	ctrl->ioq_live = false;
	ctrl->assoc_active = false;
	init_waitqueue_head(&ctrl->ioabort_wait);

@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,

	ctrl->ctrl.sqsize = opts->queue_size - 1;
	ctrl->ctrl.kato = opts->kato;
	ctrl->ctrl.cntlid = 0xffff;

	ret = -ENOMEM;
	ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	if (!ctrl->queues)
		goto out_free_ida;

	nvme_fc_init_queue(ctrl, 0);

	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
@@ -3081,39 +3063,36 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
	spin_unlock_irqrestore(&rport->lock, flags);

	/*
	 * It's possible that transactions used to create the association
	 * may fail. Examples: CreateAssociation LS or CreateIOConnection
	 * LS gets dropped/corrupted/fails; or a frame gets dropped or a
	 * command times out for one of the actions to init the controller
	 * (Connect, Get/Set_Property, Set_Features, etc). Many of these
	 * transport errors (frame drop, LS failure) inherently must kill
	 * the association. The transport is coded so that any command used
	 * to create the association (prior to a LIVE state transition
	 * while NEW or CONNECTING) will fail if it completes in error or
	 * times out.
	 *
	 * As such: as the connect request was mostly likely due to a
	 * udev event that discovered the remote port, meaning there is
	 * not an admin or script there to restart if the connect
	 * request fails, retry the initial connection creation up to
	 * three times before giving up and declaring failure.
	 */
	for (retry = 0; retry < 3; retry++) {
		ret = nvme_fc_create_association(ctrl);
		if (!ret)
			break;
	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
		dev_err(ctrl->ctrl.device,
			"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
		goto fail_ctrl;
	}

	if (ret) {
	nvme_get_ctrl(&ctrl->ctrl);

	if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
		nvme_put_ctrl(&ctrl->ctrl);
		dev_err(ctrl->ctrl.device,
			"NVME-FC{%d}: failed to schedule initial connect\n",
			ctrl->cnum);
		goto fail_ctrl;
	}

	flush_delayed_work(&ctrl->connect_work);

	dev_info(ctrl->ctrl.device,
		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);

	return &ctrl->ctrl;

fail_ctrl:
	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
	cancel_work_sync(&ctrl->ctrl.reset_work);
	cancel_delayed_work_sync(&ctrl->connect_work);

		/* couldn't schedule retry - fail out */
		dev_err(ctrl->ctrl.device,
			"NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);

	ctrl->ctrl.opts = NULL;

	/* initiate nvme ctrl ref counting teardown */
@@ -3131,18 +3110,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	 */
	nvme_fc_rport_get(rport);

		if (ret > 0)
			ret = -EIO;
		return ERR_PTR(ret);
	}

	nvme_get_ctrl(&ctrl->ctrl);

	dev_info(ctrl->ctrl.device,
		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);

	return &ctrl->ctrl;
	return ERR_PTR(-EIO);

out_cleanup_admin_q:
	blk_cleanup_queue(ctrl->ctrl.admin_q);
Loading