Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ad689233 authored by Darrick J. Wong's avatar Darrick J. Wong Committed by James Bottomley
Browse files

[SCSI] libsas: Handle SCSI commands that complete with failure codes



This patch moves the code that handles SAS failures out of the main EH
function and into a separate function.  It also detects commands that have
no sas_task (i.e. they completed, but with error data) and sends them into
scsi_error for processing.  This allows us to handle SCSI errors (and
enables auto-spinup as a side effect) instead of dropping them on the
floor and falling into an infinite loop.  It also requires the
implementation of a device reset function, which the SAS failure code has
been modified to employ for REQ_DEVICE_RESET.

Signed-off-by: default avatarDarrick J. Wong <djwong@us.ibm.com>
Signed-off-by: default avatarJames Bottomley <James.Bottomley@SteelEye.com>
parent dca84e46
Loading
Loading
Loading
Loading
+104 −44
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <scsi/scsi_transport_sas.h>
#include "../scsi_sas_internal.h"
#include "../scsi_transport_api.h"
#include "../scsi_priv.h"

#include <linux/err.h>
#include <linux/blkdev.h>
@@ -396,54 +397,80 @@ static int sas_recover_I_T(struct domain_device *dev)
	return res;
}

static int eh_reset_phy_helper(struct sas_phy *phy)
/* Find the sas_phy that's attached to this device */
struct sas_phy *find_local_sas_phy(struct domain_device *dev)
{
	int tmf_resp;
	struct domain_device *pdev = dev->parent;
	struct ex_phy *exphy = NULL;
	int i;

	tmf_resp = sas_phy_reset(phy, 1);
	if (tmf_resp)
		SAS_DPRINTK("Hard reset of phy %d failed 0x%x\n",
			    phy->identify.phy_identifier,
			    tmf_resp);
	/* Directly attached device */
	if (!pdev)
		return dev->port->phy;

	return tmf_resp;
	/* Otherwise look in the expander */
	for (i = 0; i < pdev->ex_dev.num_phys; i++)
		if (!memcmp(dev->sas_addr,
			    pdev->ex_dev.ex_phy[i].attached_sas_addr,
			    SAS_ADDR_SIZE)) {
			exphy = &pdev->ex_dev.ex_phy[i];
			break;
		}

void sas_scsi_recover_host(struct Scsi_Host *shost)
	BUG_ON(!exphy);
	return exphy->phy;
}

/* Attempt to send a target reset message to a device */
int sas_eh_device_reset_handler(struct scsi_cmnd *cmd)
{
	struct domain_device *dev = cmd_to_domain_dev(cmd);
	struct sas_phy *phy = find_local_sas_phy(dev);
	int res;

	res = sas_phy_reset(phy, 1);
	if (res)
		SAS_DPRINTK("Device reset of %s failed 0x%x\n",
			    phy->dev.kobj.k_name,
			    res);
	if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE)
		return SUCCESS;

	return FAILED;
}

/* Try to reset a device */
static int try_to_reset_cmd_device(struct Scsi_Host *shost,
				   struct scsi_cmnd *cmd)
{
	if (!shost->hostt->eh_device_reset_handler)
		return FAILED;

	return shost->hostt->eh_device_reset_handler(cmd);
}

static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
				    struct list_head *work_q,
				    struct list_head *done_q)
{
	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
	unsigned long flags;
	LIST_HEAD(error_q);
	struct scsi_cmnd *cmd, *n;
	enum task_disposition res = TASK_IS_DONE;
	int tmf_resp, need_reset;
	struct sas_internal *i = to_sas_internal(shost->transportt);
	struct sas_phy *task_sas_phy = NULL;

	spin_lock_irqsave(shost->host_lock, flags);
	list_splice_init(&shost->eh_cmd_q, &error_q);
	spin_unlock_irqrestore(shost->host_lock, flags);

	SAS_DPRINTK("Enter %s\n", __FUNCTION__);
	unsigned long flags;
	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);

	/* All tasks on this list were marked SAS_TASK_STATE_ABORTED
	 * by sas_scsi_timed_out() callback.
	 */
Again:
	SAS_DPRINTK("going over list...\n");
	list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
	list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
		struct sas_task *task = TO_SAS_TASK(cmd);
		list_del_init(&cmd->eh_entry);

		if (!task) {
			SAS_DPRINTK("%s: taskless cmd?!\n", __FUNCTION__);
		if (!task)
			continue;
		}

		list_del_init(&cmd->eh_entry);

		spin_lock_irqsave(&task->task_state_lock, flags);
		need_reset = task->task_state_flags & SAS_TASK_NEED_DEV_RESET;
		if (need_reset)
			task_sas_phy = task->dev->port->phy;
		spin_unlock_irqrestore(&task->task_state_lock, flags);

		SAS_DPRINTK("trying to find task 0x%p\n", task);
@@ -457,14 +484,14 @@ Again:
				    task);
			task->task_done(task);
			if (need_reset)
				eh_reset_phy_helper(task_sas_phy);
				try_to_reset_cmd_device(shost, cmd);
			continue;
		case TASK_IS_ABORTED:
			SAS_DPRINTK("%s: task 0x%p is aborted\n",
				    __FUNCTION__, task);
			task->task_done(task);
			if (need_reset)
				eh_reset_phy_helper(task_sas_phy);
				try_to_reset_cmd_device(shost, cmd);
			continue;
		case TASK_IS_AT_LU:
			SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
@@ -476,8 +503,8 @@ Again:
					    cmd->device->lun);
				task->task_done(task);
				if (need_reset)
					eh_reset_phy_helper(task_sas_phy);
				sas_scsi_clear_queue_lu(&error_q, cmd);
					try_to_reset_cmd_device(shost, cmd);
				sas_scsi_clear_queue_lu(work_q, cmd);
				goto Again;
			}
			/* fallthrough */
@@ -491,8 +518,8 @@ Again:
					    SAS_ADDR(task->dev->sas_addr));
				task->task_done(task);
				if (need_reset)
					eh_reset_phy_helper(task_sas_phy);
				sas_scsi_clear_queue_I_T(&error_q, task->dev);
					try_to_reset_cmd_device(shost, cmd);
				sas_scsi_clear_queue_I_T(work_q, task->dev);
				goto Again;
			}
			/* Hammer time :-) */
@@ -506,8 +533,8 @@ Again:
						    "succeeded\n", port->id);
					task->task_done(task);
					if (need_reset)
						eh_reset_phy_helper(task_sas_phy);
					sas_scsi_clear_queue_port(&error_q,
						try_to_reset_cmd_device(shost, cmd);
					sas_scsi_clear_queue_port(work_q,
								  port);
					goto Again;
				}
@@ -520,7 +547,7 @@ Again:
						    "succeeded\n");
					task->task_done(task);
					if (need_reset)
						eh_reset_phy_helper(task_sas_phy);
						try_to_reset_cmd_device(shost, cmd);
					goto out;
				}
			}
@@ -535,21 +562,53 @@ Again:

			task->task_done(task);
			if (need_reset)
				eh_reset_phy_helper(task_sas_phy);
				try_to_reset_cmd_device(shost, cmd);
			goto clear_q;
		}
	}
out:
	scsi_eh_flush_done_q(&ha->eh_done_q);
	SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
	return;
	return list_empty(work_q);
clear_q:
	SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__);
	list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
	list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
		struct sas_task *task = TO_SAS_TASK(cmd);
		list_del_init(&cmd->eh_entry);
		task->task_done(task);
	}
	return list_empty(work_q);
}

void sas_scsi_recover_host(struct Scsi_Host *shost)
{
	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
	unsigned long flags;
	LIST_HEAD(eh_work_q);

	spin_lock_irqsave(shost->host_lock, flags);
	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
	spin_unlock_irqrestore(shost->host_lock, flags);

	SAS_DPRINTK("Enter %s\n", __FUNCTION__);
	/*
	 * Deal with commands that still have SAS tasks (i.e. they didn't
	 * complete via the normal sas_task completion mechanism)
	 */
	if (sas_eh_handle_sas_errors(shost, &eh_work_q, &ha->eh_done_q))
		goto out;

	/*
	 * Now deal with SCSI commands that completed ok but have a an error
	 * code (and hopefully sense data) attached.  This is roughly what
	 * scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any
	 * command we see here has no sas_task and is thus unknown to the HA.
	 */
	if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q))
		scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);

out:
	scsi_eh_flush_done_q(&ha->eh_done_q);
	SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
	return;
}

enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
@@ -914,3 +973,4 @@ EXPORT_SYMBOL_GPL(__sas_task_abort);
EXPORT_SYMBOL_GPL(sas_task_abort);
EXPORT_SYMBOL_GPL(sas_phy_reset);
EXPORT_SYMBOL_GPL(sas_phy_enable);
EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler);
+1 −0
Original line number Diff line number Diff line
@@ -660,5 +660,6 @@ void sas_init_dev(struct domain_device *);

void sas_task_abort(struct sas_task *);
int __sas_task_abort(struct sas_task *);
int sas_eh_device_reset_handler(struct scsi_cmnd *cmd);

#endif /* _SASLIB_H_ */