Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 219d27d7 authored by Bart Van Assche's avatar Bart Van Assche Committed by Martin K. Petersen
Browse files

scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands



In the *_done() functions, instead of returning early if sp->ref_count >=
2, only decrement sp->ref_count. In qla2xxx_eh_abort(), instead of deciding
what to do based on the value of sp->ref_count, decide which action to take
depending on the completion status of the firmware abort. Remove srb.cwaitq
and use srb.comp instead. In qla2x00_abort_srb(), call
isp_ops->abort_command() directly instead of calling qla2xxx_eh_abort().

Cc: Himanshu Madhani <hmadhani@marvell.com>
Cc: Giridhar Malavali <gmalavali@marvell.com>
Signed-off-by: default avatarBart Van Assche <bvanassche@acm.org>
Acked-by: default avatarHimanshu Madhani <hmadhani@marvell.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 982cc4be
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -546,7 +546,6 @@ typedef struct srb {
	int rc;
	int retry_count;
	struct completion *comp;
	wait_queue_head_t *cwaitq;
	union {
		struct srb_iocb iocb_cmd;
		struct bsg_job *bsg_job;
+2 −32
Original line number Diff line number Diff line
@@ -137,8 +137,7 @@ static void qla_nvme_sp_ls_done(void *ptr, int res)
		return;
	}

	if (!atomic_dec_and_test(&sp->ref_count))
		return;
	atomic_dec(&sp->ref_count);

	if (res)
		res = -EINVAL;
@@ -161,8 +160,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
	nvme = &sp->u.iocb_cmd;
	fd = nvme->u.nvme.desc;

	if (!atomic_dec_and_test(&sp->ref_count))
		return;
	atomic_dec(&sp->ref_count);

	if (res == QLA_SUCCESS) {
		fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
@@ -599,34 +597,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
	.fcprqst_priv_sz = sizeof(struct nvme_private),
};

#define NVME_ABORT_POLLING_PERIOD    2
static int qla_nvme_wait_on_command(srb_t *sp)
{
	int ret = QLA_SUCCESS;

	wait_event_timeout(sp->nvme_ls_waitq, (atomic_read(&sp->ref_count) > 1),
	    NVME_ABORT_POLLING_PERIOD*HZ);

	if (atomic_read(&sp->ref_count) > 1)
		ret = QLA_FUNCTION_FAILED;

	return ret;
}

void qla_nvme_abort(struct qla_hw_data *ha, struct srb *sp, int res)
{
	int rval;

	if (ha->flags.fw_started) {
		rval = ha->isp_ops->abort_command(sp);
		if (!rval && !qla_nvme_wait_on_command(sp))
			ql_log(ql_log_warn, NULL, 0x2112,
			    "timed out waiting on sp=%p\n", sp);
	} else {
		sp->done(sp, res);
	}
}

static void qla_nvme_unregister_remote_port(struct work_struct *work)
{
	struct fc_port *fcport = container_of(work, struct fc_port,
+0 −1
Original line number Diff line number Diff line
@@ -145,7 +145,6 @@ struct pt_ls4_rx_unsol {
int qla_nvme_register_hba(struct scsi_qla_host *);
int  qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
void qla_nvme_delete(struct scsi_qla_host *);
void qla_nvme_abort(struct qla_hw_data *, struct srb *sp, int res);
void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
    struct req_que *);
void qla24xx_async_gffid_sp_done(void *, int);
+53 −97
Original line number Diff line number Diff line
@@ -714,7 +714,7 @@ qla2x00_sp_compl(void *ptr, int res)
{
	srb_t *sp = ptr;
	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
	wait_queue_head_t *cwaitq = sp->cwaitq;
	struct completion *comp = sp->comp;

	if (atomic_read(&sp->ref_count) == 0) {
		ql_dbg(ql_dbg_io, sp->vha, 0x3015,
@@ -724,15 +724,15 @@ qla2x00_sp_compl(void *ptr, int res)
			WARN_ON(atomic_read(&sp->ref_count) == 0);
		return;
	}
	if (!atomic_dec_and_test(&sp->ref_count))
		return;

	atomic_dec(&sp->ref_count);

	sp->free(sp);
	cmd->result = res;
	CMD_SP(cmd) = NULL;
	cmd->scsi_done(cmd);
	if (cwaitq)
		wake_up(cwaitq);
	if (comp)
		complete(comp);
	qla2x00_rel_sp(sp);
}

@@ -825,7 +825,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
{
	srb_t *sp = ptr;
	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
	wait_queue_head_t *cwaitq = sp->cwaitq;
	struct completion *comp = sp->comp;

	if (atomic_read(&sp->ref_count) == 0) {
		ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
@@ -835,15 +835,15 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
			WARN_ON(atomic_read(&sp->ref_count) == 0);
		return;
	}
	if (!atomic_dec_and_test(&sp->ref_count))
		return;

	atomic_dec(&sp->ref_count);

	sp->free(sp);
	cmd->result = res;
	CMD_SP(cmd) = NULL;
	cmd->scsi_done(cmd);
	if (cwaitq)
		wake_up(cwaitq);
	if (comp)
		complete(comp);
	qla2xxx_rel_qpair_sp(sp->qpair, sp);
}

@@ -1286,7 +1286,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
	unsigned int id;
	uint64_t lun;
	unsigned long flags;
	int rval, wait = 0;
	int rval;
	struct qla_hw_data *ha = vha->hw;
	struct qla_qpair *qpair;

@@ -1299,7 +1299,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
	ret = fc_block_scsi_eh(cmd);
	if (ret != 0)
		return ret;
	ret = SUCCESS;

	sp = (srb_t *) CMD_SP(cmd);
	if (!sp)
@@ -1310,7 +1309,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
		return SUCCESS;

	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
	if (!CMD_SP(cmd)) {
	if (sp->type != SRB_SCSI_CMD || GET_CMD_SP(sp) != cmd) {
		/* there's a chance an interrupt could clear
		   the ptr as part of done & free */
		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
@@ -1331,66 +1330,31 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
	    "Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n",
	    vha->host_no, id, lun, sp, cmd, sp->handle);

	/* Get a reference to the sp and drop the lock.*/
	rval = ha->isp_ops->abort_command(sp);
	if (rval) {
		if (rval == QLA_FUNCTION_PARAMETER_ERROR)
			ret = SUCCESS;
		else
			ret = FAILED;

	ql_dbg(ql_dbg_taskm, vha, 0x8003,
		    "Abort command mbx failed cmd=%p, rval=%x.\n", cmd, rval);
	} else {
		ql_dbg(ql_dbg_taskm, vha, 0x8004,
		    "Abort command mbx success cmd=%p.\n", cmd);
		wait = 1;
	}

	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
	       "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);

	switch (rval) {
	case QLA_SUCCESS:
		/*
	 * Releasing of the SRB and associated command resources
	 * is managed through ref_count.
	 * Whether we need to wait for the abort completion or complete
	 * the abort handler should be based on the ref_count.
	 */
	if (atomic_read(&sp->ref_count) > 1) {
		/*
		 * The command is not yet completed. We need to wait for either
		 * command completion or abort completion.
		 * The command has been aborted. That means that the firmware
		 * won't report a completion.
		 */
		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(eh_waitq);
		uint32_t ratov = ha->r_a_tov/10;

		/* Go ahead and release the extra ref_count obtained earlier */
		sp->done(sp, DID_RESET << 16);
		sp->cwaitq = &eh_waitq;

		if (!wait_event_lock_irq_timeout(eh_waitq,
		    CMD_SP(cmd) == NULL, *qpair->qp_lock_ptr,
		    msecs_to_jiffies(4 * ratov * 1000))) {
		sp->done(sp, DID_ABORT << 16);
		ret = SUCCESS;
		break;
	default:
		/*
			 * The abort got dropped, LOGO will be sent and the
			 * original command will be completed with CS_TIMEOUT
			 * completion
		 * Either abort failed or abort and completion raced. Let
		 * the SCSI core retry the abort in the former case.
		 */
			ql_dbg(ql_dbg_taskm, vha, 0xffff,
			    "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
			    __func__, ha->r_a_tov);
			sp->cwaitq = NULL;
		ret = FAILED;
			goto end;
		}
	} else {
		/* Command completed while processing the abort */
		sp->done(sp, DID_RESET << 16);
		break;
	}
end:
	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);

	ql_log(ql_log_info, vha, 0x801c,
	    "Abort command issued nexus=%ld:%d:%llu --  %d %x.\n",
	    vha->host_no, id, lun, wait, ret);
	    "Abort command issued nexus=%ld:%d:%llu -- %x.\n",
	    vha->host_no, id, lun, ret);

	return ret;
}
@@ -1766,42 +1730,34 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
	__releases(qp->qp_lock_ptr)
	__acquires(qp->qp_lock_ptr)
{
	DECLARE_COMPLETION_ONSTACK(comp);
	scsi_qla_host_t *vha = qp->vha;
	struct qla_hw_data *ha = vha->hw;
	int rval;

	if (sp_get(sp))
		return;

	if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) {
		if (!sp_get(sp)) {
			/* got sp */
	if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
	    (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
	     !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
	     !qla2x00_isp_reg_stat(ha))) {
		sp->comp = &comp;
		rval = ha->isp_ops->abort_command(sp);
		spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
			qla_nvme_abort(ha, sp, res);
			spin_lock_irqsave(qp->qp_lock_ptr, *flags);

		switch (rval) {
		case QLA_SUCCESS:
			sp->done(sp, res);
			break;
		case QLA_FUNCTION_PARAMETER_ERROR:
			wait_for_completion(&comp);
			break;
		}
	} else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
		   !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
		   !qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) {
		/*
		 * Don't abort commands in adapter during EEH recovery as it's
		 * not accessible/responding.
		 *
		 * Get a reference to the sp and drop the lock. The reference
		 * ensures this sp->done() call and not the call in
		 * qla2xxx_eh_abort() ends the SCSI cmd (with result 'res').
		 */
		if (!sp_get(sp)) {
			int status;

			spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
			status = qla2xxx_eh_abort(GET_CMD_SP(sp));
		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
			/*
			 * Get rid of extra reference caused
			 * by early exit from qla2xxx_eh_abort
			 */
			if (status == FAST_IO_FAIL)
				atomic_dec(&sp->ref_count);
		}
		sp->comp = NULL;
	}
	sp->done(sp, res);
}

static void