Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e8e7f271 authored by Sujit Reddy Thumma's avatar Sujit Reddy Thumma Committed by Christoph Hellwig
Browse files

scsi: ufs: Improve UFS fatal error handling



Error handling in UFS driver is broken and resets the host controller
for fatal errors without re-initialization. Correct the fatal error
handling sequence according to UFS Host Controller Interface (HCI)
v1.1 specification.

o Processed requests which are completed w/wo error are reported to
  SCSI layer and any pending commands that are not started are aborted
  in the controller and re-queued into scsi mid-layer queue.

o Upon determining fatal error condition the host controller may hang
  forever until a reset is applied. Block SCSI layer for sending new
  requests and apply reset in a separate error handling work.

o SCSI is informed about the expected Unit-Attention exception from the
  device for the immediate command after a reset so that the SCSI layer
  take necessary steps to establish communication with the device.

Signed-off-by: default avatarSujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: default avatarYaniv Gardi <ygardi@codeaurora.org>
Tested-by: default avatarDolev Raviv <draviv@codeaurora.org>
Acked-by: default avatarVinayak Holikatti <vinholikatti@gmail.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 3441da7d
Loading
Loading
Loading
Loading
+141 −88
Original line number Original line Diff line number Diff line
@@ -84,6 +84,14 @@ enum {
	UFSHCD_EH_IN_PROGRESS = (1 << 0),
	UFSHCD_EH_IN_PROGRESS = (1 << 0),
};
};


/* UFSHCD UIC layer error flags */
enum {
	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
	UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
	UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
	UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
};

/* Interrupt configuration options */
/* Interrupt configuration options */
enum {
enum {
	UFSHCD_INT_DISABLE,
	UFSHCD_INT_DISABLE,
@@ -100,6 +108,8 @@ enum {


static void ufshcd_tmc_handler(struct ufs_hba *hba);
static void ufshcd_tmc_handler(struct ufs_hba *hba);
static void ufshcd_async_scan(void *data, async_cookie_t cookie);
static void ufshcd_async_scan(void *data, async_cookie_t cookie);
static int ufshcd_reset_and_restore(struct ufs_hba *hba);
static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);


/*
/*
 * ufshcd_wait_for_register - wait for register value to change
 * ufshcd_wait_for_register - wait for register value to change
@@ -1735,9 +1745,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
		goto out;
		goto out;
	}
	}


	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
		scsi_unblock_requests(hba->host);

out:
out:
	return err;
	return err;
}
}
@@ -1862,66 +1869,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
	return err;
	return err;
}
}


/**
 * ufshcd_do_reset - reset the host controller
 * @hba: per adapter instance
 *
 * Returns SUCCESS/FAILED
 */
static int ufshcd_do_reset(struct ufs_hba *hba)
{
	struct ufshcd_lrb *lrbp;
	unsigned long flags;
	int tag;

	/* block commands from midlayer */
	scsi_block_requests(hba->host);

	spin_lock_irqsave(hba->host->host_lock, flags);
	hba->ufshcd_state = UFSHCD_STATE_RESET;

	/* send controller to reset state */
	ufshcd_hba_stop(hba);
	spin_unlock_irqrestore(hba->host->host_lock, flags);

	/* abort outstanding commands */
	for (tag = 0; tag < hba->nutrs; tag++) {
		if (test_bit(tag, &hba->outstanding_reqs)) {
			lrbp = &hba->lrb[tag];
			if (lrbp->cmd) {
				scsi_dma_unmap(lrbp->cmd);
				lrbp->cmd->result = DID_RESET << 16;
				lrbp->cmd->scsi_done(lrbp->cmd);
				lrbp->cmd = NULL;
				clear_bit_unlock(tag, &hba->lrb_in_use);
			}
		}
	}

	/* complete device management command */
	if (hba->dev_cmd.complete)
		complete(hba->dev_cmd.complete);

	/* clear outstanding request/task bit maps */
	hba->outstanding_reqs = 0;
	hba->outstanding_tasks = 0;

	/* Host controller enable */
	if (ufshcd_hba_enable(hba)) {
		dev_err(hba->dev,
			"Reset: Controller initialization failed\n");
		return FAILED;
	}

	if (ufshcd_link_startup(hba)) {
		dev_err(hba->dev,
			"Reset: Link start-up failed\n");
		return FAILED;
	}

	return SUCCESS;
}

/**
/**
 * ufshcd_slave_alloc - handle initial SCSI device configurations
 * ufshcd_slave_alloc - handle initial SCSI device configurations
 * @sdev: pointer to SCSI device
 * @sdev: pointer to SCSI device
@@ -1939,6 +1886,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
	sdev->use_10_for_ms = 1;
	sdev->use_10_for_ms = 1;
	scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
	scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);


	/* allow SCSI layer to restart the device in case of errors */
	sdev->allow_restart = 1;

	/*
	/*
	 * Inform SCSI Midlayer that the LUN queue depth is same as the
	 * Inform SCSI Midlayer that the LUN queue depth is same as the
	 * controller queue depth. If a LUN queue depth is less than the
	 * controller queue depth. If a LUN queue depth is less than the
@@ -2134,6 +2084,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
	case OCS_ABORTED:
	case OCS_ABORTED:
		result |= DID_ABORT << 16;
		result |= DID_ABORT << 16;
		break;
		break;
	case OCS_INVALID_COMMAND_STATUS:
		result |= DID_REQUEUE << 16;
		break;
	case OCS_INVALID_CMD_TABLE_ATTR:
	case OCS_INVALID_CMD_TABLE_ATTR:
	case OCS_INVALID_PRDT_ATTR:
	case OCS_INVALID_PRDT_ATTR:
	case OCS_MISMATCH_DATA_BUF_SIZE:
	case OCS_MISMATCH_DATA_BUF_SIZE:
@@ -2451,46 +2404,146 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
}
}


/**
/**
 * ufshcd_fatal_err_handler - handle fatal errors
 * ufshcd_err_handler - handle UFS errors that require s/w attention
 * @hba: per adapter instance
 * @work: pointer to work structure
 */
 */
static void ufshcd_fatal_err_handler(struct work_struct *work)
static void ufshcd_err_handler(struct work_struct *work)
{
{
	struct ufs_hba *hba;
	struct ufs_hba *hba;
	hba = container_of(work, struct ufs_hba, feh_workq);
	unsigned long flags;
	u32 err_xfer = 0;
	u32 err_tm = 0;
	int err = 0;
	int tag;

	hba = container_of(work, struct ufs_hba, eh_work);


	pm_runtime_get_sync(hba->dev);
	pm_runtime_get_sync(hba->dev);
	/* check if reset is already in progress */

	if (hba->ufshcd_state != UFSHCD_STATE_RESET)
	spin_lock_irqsave(hba->host->host_lock, flags);
		ufshcd_do_reset(hba);
	if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		goto out;
	}

	hba->ufshcd_state = UFSHCD_STATE_RESET;
	ufshcd_set_eh_in_progress(hba);

	/* Complete requests that have door-bell cleared by h/w */
	ufshcd_transfer_req_compl(hba);
	ufshcd_tmc_handler(hba);
	spin_unlock_irqrestore(hba->host->host_lock, flags);

	/* Clear pending transfer requests */
	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
		if (ufshcd_clear_cmd(hba, tag))
			err_xfer |= 1 << tag;

	/* Clear pending task management requests */
	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
		if (ufshcd_clear_tm_cmd(hba, tag))
			err_tm |= 1 << tag;

	/* Complete the requests that are cleared by s/w */
	spin_lock_irqsave(hba->host->host_lock, flags);
	ufshcd_transfer_req_compl(hba);
	ufshcd_tmc_handler(hba);
	spin_unlock_irqrestore(hba->host->host_lock, flags);

	/* Fatal errors need reset */
	if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
			((hba->saved_err & UIC_ERROR) &&
			 (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
		err = ufshcd_reset_and_restore(hba);
		if (err) {
			dev_err(hba->dev, "%s: reset and restore failed\n",
					__func__);
			hba->ufshcd_state = UFSHCD_STATE_ERROR;
		}
		/*
		 * Inform scsi mid-layer that we did reset and allow to handle
		 * Unit Attention properly.
		 */
		scsi_report_bus_reset(hba->host, 0);
		hba->saved_err = 0;
		hba->saved_uic_err = 0;
	}
	ufshcd_clear_eh_in_progress(hba);

out:
	scsi_unblock_requests(hba->host);
	pm_runtime_put_sync(hba->dev);
	pm_runtime_put_sync(hba->dev);
}
}


/**
/**
 * ufshcd_err_handler - Check for fatal errors
 * ufshcd_update_uic_error - check and set fatal UIC error flags.
 * @work: pointer to a work queue structure
 * @hba: per-adapter instance
 */
 */
static void ufshcd_err_handler(struct ufs_hba *hba)
static void ufshcd_update_uic_error(struct ufs_hba *hba)
{
{
	u32 reg;
	u32 reg;


	/* PA_INIT_ERROR is fatal and needs UIC reset */
	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;

	/* UIC NL/TL/DME errors needs software retry */
	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
	if (reg)
		hba->uic_error |= UFSHCD_UIC_NL_ERROR;

	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
	if (reg)
		hba->uic_error |= UFSHCD_UIC_TL_ERROR;

	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
	if (reg)
		hba->uic_error |= UFSHCD_UIC_DME_ERROR;

	dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
			__func__, hba->uic_error);
}

/**
 * ufshcd_check_errors - Check for errors that need s/w attention
 * @hba: per-adapter instance
 */
static void ufshcd_check_errors(struct ufs_hba *hba)
{
	bool queue_eh_work = false;

	if (hba->errors & INT_FATAL_ERRORS)
	if (hba->errors & INT_FATAL_ERRORS)
		goto fatal_eh;
		queue_eh_work = true;


	if (hba->errors & UIC_ERROR) {
	if (hba->errors & UIC_ERROR) {
		reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
		hba->uic_error = 0;
		if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
		ufshcd_update_uic_error(hba);
			goto fatal_eh;
		if (hba->uic_error)
			queue_eh_work = true;
	}
	}
	return;

fatal_eh:
	if (queue_eh_work) {
		/* handle fatal errors only when link is functional */
		/* handle fatal errors only when link is functional */
		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
		/* block commands at driver layer until error is handled */
			/* block commands from scsi mid-layer */
			scsi_block_requests(hba->host);

			/* transfer error masks to sticky bits */
			hba->saved_err |= hba->errors;
			hba->saved_uic_err |= hba->uic_error;

			hba->ufshcd_state = UFSHCD_STATE_ERROR;
			hba->ufshcd_state = UFSHCD_STATE_ERROR;
		schedule_work(&hba->feh_workq);
			schedule_work(&hba->eh_work);
		}
		}
	}
	}
	/*
	 * if (!queue_eh_work) -
	 * Other errors are either non-fatal where host recovers
	 * itself without s/w intervention or errors that will be
	 * handled by the SCSI core layer.
	 */
}


/**
/**
 * ufshcd_tmc_handler - handle task management function completion
 * ufshcd_tmc_handler - handle task management function completion
@@ -2514,7 +2567,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
{
{
	hba->errors = UFSHCD_ERROR_MASK & intr_status;
	hba->errors = UFSHCD_ERROR_MASK & intr_status;
	if (hba->errors)
	if (hba->errors)
		ufshcd_err_handler(hba);
		ufshcd_check_errors(hba);


	if (intr_status & UFSHCD_UIC_MASK)
	if (intr_status & UFSHCD_UIC_MASK)
		ufshcd_uic_cmd_compl(hba, intr_status);
		ufshcd_uic_cmd_compl(hba, intr_status);
@@ -2889,12 +2942,12 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
	 */
	 */
	do {
	do {
		spin_lock_irqsave(hba->host->host_lock, flags);
		spin_lock_irqsave(hba->host->host_lock, flags);
		if (!(work_pending(&hba->feh_workq) ||
		if (!(work_pending(&hba->eh_work) ||
				hba->ufshcd_state == UFSHCD_STATE_RESET))
				hba->ufshcd_state == UFSHCD_STATE_RESET))
			break;
			break;
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
		flush_work(&hba->feh_workq);
		flush_work(&hba->eh_work);
	} while (1);
	} while (1);


	hba->ufshcd_state = UFSHCD_STATE_RESET;
	hba->ufshcd_state = UFSHCD_STATE_RESET;
@@ -3130,7 +3183,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
	init_waitqueue_head(&hba->tm_tag_wq);
	init_waitqueue_head(&hba->tm_tag_wq);


	/* Initialize work queues */
	/* Initialize work queues */
	INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);


	/* Initialize UIC command mutex */
	/* Initialize UIC command mutex */
+8 −2
Original line number Original line Diff line number Diff line
@@ -183,9 +183,12 @@ struct ufs_dev_cmd {
 * @eh_flags: Error handling flags
 * @eh_flags: Error handling flags
 * @intr_mask: Interrupt Mask Bits
 * @intr_mask: Interrupt Mask Bits
 * @ee_ctrl_mask: Exception event control mask
 * @ee_ctrl_mask: Exception event control mask
 * @feh_workq: Work queue for fatal controller error handling
 * @eh_work: Worker to handle UFS errors that require s/w attention
 * @eeh_work: Worker to handle exception events
 * @eeh_work: Worker to handle exception events
 * @errors: HBA errors
 * @errors: HBA errors
 * @uic_error: UFS interconnect layer error status
 * @saved_err: sticky error mask
 * @saved_uic_err: sticky UIC error mask
 * @dev_cmd: ufs device management command information
 * @dev_cmd: ufs device management command information
 * @auto_bkops_enabled: to track whether bkops is enabled in device
 * @auto_bkops_enabled: to track whether bkops is enabled in device
 */
 */
@@ -233,11 +236,14 @@ struct ufs_hba {
	u16 ee_ctrl_mask;
	u16 ee_ctrl_mask;


	/* Work Queues */
	/* Work Queues */
	struct work_struct feh_workq;
	struct work_struct eh_work;
	struct work_struct eeh_work;
	struct work_struct eeh_work;


	/* HBA Errors */
	/* HBA Errors */
	u32 errors;
	u32 errors;
	u32 uic_error;
	u32 saved_err;
	u32 saved_uic_err;


	/* Device management request data */
	/* Device management request data */
	struct ufs_dev_cmd dev_cmd;
	struct ufs_dev_cmd dev_cmd;