Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c786427f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-20180217' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request from Keith, with fixes all over the map for nvme.
   From various folks.

 - Classic polling fix, that avoids a latency issue where we still end
   up waiting for an interrupt in some cases. From Nitesh Shetty.

 - Comment typo fix from Minwoo Im.

* tag 'for-linus-20180217' of git://git.kernel.dk/linux-block:
  block: fix a typo in comment of BLK_MQ_POLL_STATS_BKTS
  nvme-rdma: fix sysfs invoked reset_ctrl error flow
  nvmet: Change return code of discard command if not supported
  nvme-pci: Fix timeouts in connecting state
  nvme-pci: Remap CMB SQ entries on every controller reset
  nvme: fix the deadlock in nvme_update_formats
  blk: optimization for classic polling
  nvme: Don't use a stack buffer for keep-alive command
  nvme_fc: cleanup io completion
  nvme_fc: correct abort race condition on resets
  nvme: Fix discard buffer overrun
  nvme: delete NVME_CTRL_LIVE --> NVME_CTRL_CONNECTING transition
  nvme-rdma: use NVME_CTRL_CONNECTING state to mark init process
  nvme: rename NVME_CTRL_RECONNECTING state to NVME_CTRL_CONNECTING
parents fa2139ef 096392e0
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3164,6 +3164,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
		cpu_relax();
	}

	__set_current_state(TASK_RUNNING);
	return false;
}

+27 −18
Original line number Diff line number Diff line
@@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
	int ret;

	ret = nvme_reset_ctrl(ctrl);
	if (!ret)
	if (!ret) {
		flush_work(&ctrl->reset_work);
		if (ctrl->state != NVME_CTRL_LIVE)
			ret = -ENETRESET;
	}

	return ret;
}
EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
@@ -265,7 +269,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
	switch (new_state) {
	case NVME_CTRL_ADMIN_ONLY:
		switch (old_state) {
		case NVME_CTRL_RECONNECTING:
		case NVME_CTRL_CONNECTING:
			changed = true;
			/* FALLTHRU */
		default:
@@ -276,7 +280,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
		switch (old_state) {
		case NVME_CTRL_NEW:
		case NVME_CTRL_RESETTING:
		case NVME_CTRL_RECONNECTING:
		case NVME_CTRL_CONNECTING:
			changed = true;
			/* FALLTHRU */
		default:
@@ -294,9 +298,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
			break;
		}
		break;
	case NVME_CTRL_RECONNECTING:
	case NVME_CTRL_CONNECTING:
		switch (old_state) {
		case NVME_CTRL_LIVE:
		case NVME_CTRL_NEW:
		case NVME_CTRL_RESETTING:
			changed = true;
			/* FALLTHRU */
@@ -309,7 +313,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
		case NVME_CTRL_LIVE:
		case NVME_CTRL_ADMIN_ONLY:
		case NVME_CTRL_RESETTING:
		case NVME_CTRL_RECONNECTING:
		case NVME_CTRL_CONNECTING:
			changed = true;
			/* FALLTHRU */
		default:
@@ -518,9 +522,11 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
		u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;

		if (n < segments) {
			range[n].cattr = cpu_to_le32(0);
			range[n].nlb = cpu_to_le32(nlb);
			range[n].slba = cpu_to_le64(slba);
		}
		n++;
	}

@@ -794,13 +800,9 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)

static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
	struct nvme_command c;
	struct request *rq;

	memset(&c, 0, sizeof(c));
	c.common.opcode = nvme_admin_keep_alive;

	rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
			NVME_QID_ANY);
	if (IS_ERR(rq))
		return PTR_ERR(rq);
@@ -832,6 +834,8 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
		return;

	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
@@ -1117,14 +1121,19 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,

static void nvme_update_formats(struct nvme_ctrl *ctrl)
{
	struct nvme_ns *ns;
	struct nvme_ns *ns, *next;
	LIST_HEAD(rm_list);

	mutex_lock(&ctrl->namespaces_mutex);
	list_for_each_entry(ns, &ctrl->namespaces, list) {
		if (ns->disk && nvme_revalidate_disk(ns->disk))
			nvme_ns_remove(ns);
		if (ns->disk && nvme_revalidate_disk(ns->disk)) {
			list_move_tail(&ns->list, &rm_list);
		}
	}
	mutex_unlock(&ctrl->namespaces_mutex);

	list_for_each_entry_safe(ns, next, &rm_list, list)
		nvme_ns_remove(ns);
}

static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -2687,7 +2696,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
		[NVME_CTRL_LIVE]	= "live",
		[NVME_CTRL_ADMIN_ONLY]	= "only-admin",
		[NVME_CTRL_RESETTING]	= "resetting",
		[NVME_CTRL_RECONNECTING]= "reconnecting",
		[NVME_CTRL_CONNECTING]	= "connecting",
		[NVME_CTRL_DELETING]	= "deleting",
		[NVME_CTRL_DEAD]	= "dead",
	};
+5 −4
Original line number Diff line number Diff line
@@ -171,13 +171,14 @@ static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
	    cmd->common.opcode != nvme_fabrics_command ||
	    cmd->fabrics.fctype != nvme_fabrics_type_connect) {
		/*
		 * Reconnecting state means transport disruption, which can take
		 * a long time and even might fail permanently, fail fast to
		 * give upper layers a chance to failover.
		 * Connecting state means transport disruption or initial
		 * establishment, which can take a long time and even might
		 * fail permanently, fail fast to give upper layers a chance
		 * to failover.
		 * Deleting state means that the ctrl will never accept commands
		 * again, fail it permanently.
		 */
		if (ctrl->state == NVME_CTRL_RECONNECTING ||
		if (ctrl->state == NVME_CTRL_CONNECTING ||
		    ctrl->state == NVME_CTRL_DELETING) {
			nvme_req(rq)->status = NVME_SC_ABORT_REQ;
			return BLK_STS_IOERR;
+36 −121
Original line number Diff line number Diff line
@@ -55,9 +55,7 @@ struct nvme_fc_queue {

enum nvme_fcop_flags {
	FCOP_FLAGS_TERMIO	= (1 << 0),
	FCOP_FLAGS_RELEASED	= (1 << 1),
	FCOP_FLAGS_COMPLETE	= (1 << 2),
	FCOP_FLAGS_AEN		= (1 << 3),
	FCOP_FLAGS_AEN		= (1 << 1),
};

struct nvmefc_ls_req_op {
@@ -532,7 +530,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
	switch (ctrl->ctrl.state) {
	case NVME_CTRL_NEW:
	case NVME_CTRL_RECONNECTING:
	case NVME_CTRL_CONNECTING:
		/*
		 * As all reconnects were suppressed, schedule a
		 * connect.
@@ -777,7 +775,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
		}
		break;

	case NVME_CTRL_RECONNECTING:
	case NVME_CTRL_CONNECTING:
		/*
		 * The association has already been terminated and the
		 * controller is attempting reconnects.  No need to do anything
@@ -1470,7 +1468,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)

/* *********************** NVME Ctrl Routines **************************** */

static void __nvme_fc_final_op_cleanup(struct request *rq);
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);

static int
@@ -1512,13 +1509,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
static int
__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
{
	int state;
	unsigned long flags;
	int opstate;

	spin_lock_irqsave(&ctrl->lock, flags);
	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
	if (opstate != FCPOP_STATE_ACTIVE)
		atomic_set(&op->state, opstate);
	else if (ctrl->flags & FCCTRL_TERMIO)
		ctrl->iocnt++;
	spin_unlock_irqrestore(&ctrl->lock, flags);

	state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
	if (state != FCPOP_STATE_ACTIVE) {
		atomic_set(&op->state, state);
	if (opstate != FCPOP_STATE_ACTIVE)
		return -ECANCELED;
	}

	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
					&ctrl->rport->remoteport,
@@ -1532,60 +1535,26 @@ static void
nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
{
	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
	unsigned long flags;
	int i, ret;

	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
		if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
			continue;

		spin_lock_irqsave(&ctrl->lock, flags);
		if (ctrl->flags & FCCTRL_TERMIO) {
			ctrl->iocnt++;
			aen_op->flags |= FCOP_FLAGS_TERMIO;
		}
		spin_unlock_irqrestore(&ctrl->lock, flags);

		ret = __nvme_fc_abort_op(ctrl, aen_op);
		if (ret) {
			/*
			 * if __nvme_fc_abort_op failed the io wasn't
			 * active. Thus this call path is running in
			 * parallel to the io complete. Treat as non-error.
			 */
	int i;

			/* back out the flags/counters */
			spin_lock_irqsave(&ctrl->lock, flags);
			if (ctrl->flags & FCCTRL_TERMIO)
				ctrl->iocnt--;
			aen_op->flags &= ~FCOP_FLAGS_TERMIO;
			spin_unlock_irqrestore(&ctrl->lock, flags);
			return;
		}
	}
	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
		__nvme_fc_abort_op(ctrl, aen_op);
}

static inline int
static inline void
__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
		struct nvme_fc_fcp_op *op)
		struct nvme_fc_fcp_op *op, int opstate)
{
	unsigned long flags;
	bool complete_rq = false;

	if (opstate == FCPOP_STATE_ABORTED) {
		spin_lock_irqsave(&ctrl->lock, flags);
	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
		if (ctrl->flags & FCCTRL_TERMIO) {
			if (!--ctrl->iocnt)
				wake_up(&ctrl->ioabort_wait);
		}
	}
	if (op->flags & FCOP_FLAGS_RELEASED)
		complete_rq = true;
	else
		op->flags |= FCOP_FLAGS_COMPLETE;
		spin_unlock_irqrestore(&ctrl->lock, flags);

	return complete_rq;
	}
}

static void
@@ -1601,6 +1570,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
	union nvme_result result;
	bool terminate_assoc = true;
	int opstate;

	/*
	 * WARNING:
@@ -1639,11 +1609,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
	 * association to be terminated.
	 */

	opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);

	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
				sizeof(op->rsp_iu), DMA_FROM_DEVICE);

	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
			op->flags & FCOP_FLAGS_TERMIO)
	if (opstate == FCPOP_STATE_ABORTED)
		status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
	else if (freq->status)
		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
@@ -1708,7 +1679,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
done:
	if (op->flags & FCOP_FLAGS_AEN) {
		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
		__nvme_fc_fcpop_chk_teardowns(ctrl, op);
		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
		atomic_set(&op->state, FCPOP_STATE_IDLE);
		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
		nvme_fc_ctrl_put(ctrl);
@@ -1722,12 +1693,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
	if (status &&
	    (blk_queue_dying(rq->q) ||
	     ctrl->ctrl.state == NVME_CTRL_NEW ||
	     ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
	     ctrl->ctrl.state == NVME_CTRL_CONNECTING))
		status |= cpu_to_le16(NVME_SC_DNR << 1);

	if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
		__nvme_fc_final_op_cleanup(rq);
	else
	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
	nvme_end_request(rq, status, result);

check_error:
@@ -2415,46 +2384,16 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
}

static void
__nvme_fc_final_op_cleanup(struct request *rq)
nvme_fc_complete_rq(struct request *rq)
{
	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
	struct nvme_fc_ctrl *ctrl = op->ctrl;

	atomic_set(&op->state, FCPOP_STATE_IDLE);
	op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
			FCOP_FLAGS_COMPLETE);

	nvme_fc_unmap_data(ctrl, rq, op);
	nvme_complete_rq(rq);
	nvme_fc_ctrl_put(ctrl);

}

static void
nvme_fc_complete_rq(struct request *rq)
{
	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
	struct nvme_fc_ctrl *ctrl = op->ctrl;
	unsigned long flags;
	bool completed = false;

	/*
	 * the core layer, on controller resets after calling
	 * nvme_shutdown_ctrl(), calls complete_rq without our
	 * calling blk_mq_complete_request(), thus there may still
	 * be live i/o outstanding with the LLDD. Means transport has
	 * to track complete calls vs fcpio_done calls to know what
	 * path to take on completes and dones.
	 */
	spin_lock_irqsave(&ctrl->lock, flags);
	if (op->flags & FCOP_FLAGS_COMPLETE)
		completed = true;
	else
		op->flags |= FCOP_FLAGS_RELEASED;
	spin_unlock_irqrestore(&ctrl->lock, flags);

	if (completed)
		__nvme_fc_final_op_cleanup(rq);
}

/*
@@ -2476,35 +2415,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
	struct nvme_ctrl *nctrl = data;
	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
	unsigned long flags;
	int status;

	if (!blk_mq_request_started(req))
		return;

	spin_lock_irqsave(&ctrl->lock, flags);
	if (ctrl->flags & FCCTRL_TERMIO) {
		ctrl->iocnt++;
		op->flags |= FCOP_FLAGS_TERMIO;
	}
	spin_unlock_irqrestore(&ctrl->lock, flags);

	status = __nvme_fc_abort_op(ctrl, op);
	if (status) {
		/*
		 * if __nvme_fc_abort_op failed the io wasn't
		 * active. Thus this call path is running in
		 * parallel to the io complete. Treat as non-error.
		 */

		/* back out the flags/counters */
		spin_lock_irqsave(&ctrl->lock, flags);
		if (ctrl->flags & FCCTRL_TERMIO)
			ctrl->iocnt--;
		op->flags &= ~FCOP_FLAGS_TERMIO;
		spin_unlock_irqrestore(&ctrl->lock, flags);
		return;
	}
	__nvme_fc_abort_op(ctrl, op);
}


@@ -2943,7 +2858,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
	bool recon = true;

	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
		return;

	if (portptr->port_state == FC_OBJSTATE_ONLINE)
@@ -2991,10 +2906,10 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
	/* will block will waiting for io to terminate */
	nvme_fc_delete_association(ctrl);

	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
		dev_err(ctrl->ctrl.device,
			"NVME-FC{%d}: error_recovery: Couldn't change state "
			"to RECONNECTING\n", ctrl->cnum);
			"to CONNECTING\n", ctrl->cnum);
		return;
	}

@@ -3195,7 +3110,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	 * transport errors (frame drop, LS failure) inherently must kill
	 * the association. The transport is coded so that any command used
	 * to create the association (prior to a LIVE state transition
	 * while NEW or RECONNECTING) will fail if it completes in error or
	 * while NEW or CONNECTING) will fail if it completes in error or
	 * times out.
	 *
	 * As such: as the connect request was mostly likely due to a
+2 −1
Original line number Diff line number Diff line
@@ -123,7 +123,7 @@ enum nvme_ctrl_state {
	NVME_CTRL_LIVE,
	NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */
	NVME_CTRL_RESETTING,
	NVME_CTRL_RECONNECTING,
	NVME_CTRL_CONNECTING,
	NVME_CTRL_DELETING,
	NVME_CTRL_DEAD,
};
@@ -183,6 +183,7 @@ struct nvme_ctrl {
	struct work_struct scan_work;
	struct work_struct async_event_work;
	struct delayed_work ka_work;
	struct nvme_command ka_cmd;
	struct work_struct fw_act_work;

	/* Power saving configuration */
Loading