Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 68ccfa71 authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "scsi: ufs: add error recovery after DL NAC error"

parents 2dc93cbe 2b83d2f9
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -18,6 +18,8 @@
static struct ufs_card_fix ufs_fixups[] = {
static struct ufs_card_fix ufs_fixups[] = {
	/* UFS cards deviations table */
	/* UFS cards deviations table */
	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL, UFS_DEVICE_NO_VCCQ),
	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL, UFS_DEVICE_NO_VCCQ),
	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
		UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS),
	END_FIX
	END_FIX
};
};


+25 −0
Original line number Original line Diff line number Diff line
@@ -74,6 +74,31 @@ struct ufs_card_fix {
 */
 */
#define UFS_DEVICE_NO_VCCQ (1 << 1)
#define UFS_DEVICE_NO_VCCQ (1 << 1)


/*
 * Some vendor's UFS device sends back to back NACs for the DL data frames
 * causing the host controller to raise the DFES error status. Sometimes
 * such UFS devices send back to back NAC without waiting for new
 * retransmitted DL frame from the host and in such cases it might be possible
 * the Host UniPro goes into bad state without raising the DFES error
 * interrupt. If this happens then all the pending commands would timeout
 * only after respective SW command (which is generally too large).
 *
 * We can workaround such device behaviour like this:
 * - As soon as SW sees the DL NAC error, it should schedule the error handler
 * - Error handler would sleep for 50ms to see if there are any fatal errors
 *   raised by UFS controller.
 *    - If there are fatal errors then SW does normal error recovery.
 *    - If there are no fatal errors then SW sends the NOP command to device
 *      to check if link is alive.
 *        - If NOP command times out, SW does normal error recovery
 *        - If NOP command succeed, skip the error handling.
 *
 * If DL NAC error is seen multiple times with some vendor's UFS devices then
 * enable this quirk to initiate quick error recovery and also silence related
 * error logs to reduce spamming of kernel logs.
 */
#define UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS (1 << 2)

struct ufs_hba;
struct ufs_hba;
void ufs_advertise_fixup_device(struct ufs_hba *hba);
void ufs_advertise_fixup_device(struct ufs_hba *hba);
#endif /* UFS_QUIRKS_H_ */
#endif /* UFS_QUIRKS_H_ */
+131 −9
Original line number Original line Diff line number Diff line
@@ -210,9 +210,11 @@ enum {
/* UFSHCD UIC layer error flags */
/* UFSHCD UIC layer error flags */
enum {
enum {
	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
	UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
	UFSHCD_UIC_DL_NAC_RECEIVED_ERROR = (1 << 1), /* Data link layer error */
	UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
	UFSHCD_UIC_DL_TCx_REPLAY_ERROR = (1 << 2), /* Data link layer error */
	UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
	UFSHCD_UIC_NL_ERROR = (1 << 3), /* Network layer error */
	UFSHCD_UIC_TL_ERROR = (1 << 4), /* Transport Layer error */
	UFSHCD_UIC_DME_ERROR = (1 << 5), /* DME error */
};
};


/* Interrupt configuration options */
/* Interrupt configuration options */
@@ -4580,7 +4582,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
		break;
		break;
	} /* end of switch */
	} /* end of switch */


	if (host_byte(result) != DID_OK) {
	if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) {
		print_prdt = (ocs == OCS_INVALID_PRDT_ATTR ||
		print_prdt = (ocs == OCS_INVALID_PRDT_ATTR ||
			ocs == OCS_MISMATCH_DATA_BUF_SIZE);
			ocs == OCS_MISMATCH_DATA_BUF_SIZE);
		ufshcd_print_trs(hba, 1 << lrbp->task_tag, print_prdt);
		ufshcd_print_trs(hba, 1 << lrbp->task_tag, print_prdt);
@@ -4994,6 +4996,101 @@ static void ufshcd_complete_requests(struct ufs_hba *hba)
	ufshcd_transfer_req_compl(hba);
	ufshcd_transfer_req_compl(hba);
	ufshcd_tmc_handler(hba);
	ufshcd_tmc_handler(hba);
}
}

/**
 * ufshcd_quirk_dl_nac_errors - This function checks if error handling is
 *				to recover from the DL NAC errors or not.
 * @hba: per-adapter instance
 *
 * Returns true if error handling is required, false otherwise
 */
static bool ufshcd_quirk_dl_nac_errors(struct ufs_hba *hba)
{
	unsigned long flags;
	bool err_handling = true;

	spin_lock_irqsave(hba->host->host_lock, flags);
	/*
	 * UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS only workaround the
	 * device fatal error and/or DL NAC & REPLAY timeout errors.
	 */
	if (hba->saved_err & (CONTROLLER_FATAL_ERROR | SYSTEM_BUS_FATAL_ERROR))
		goto out;

	if ((hba->saved_err & DEVICE_FATAL_ERROR) ||
	    ((hba->saved_err & UIC_ERROR) &&
	     (hba->saved_uic_err & UFSHCD_UIC_DL_TCx_REPLAY_ERROR))) {
		/*
		 * we have to do error recovery but atleast silence the error
		 * logs.
		 */
		hba->silence_err_logs = true;
		goto out;
	}

	if ((hba->saved_err & UIC_ERROR) &&
	    (hba->saved_uic_err & UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)) {
		int err;
		/*
		 * wait for 50ms to see if we can get any other errors or not.
		 */
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		msleep(50);
		spin_lock_irqsave(hba->host->host_lock, flags);

		/*
		 * now check if we have got any other severe errors other than
		 * DL NAC error?
		 */
		if ((hba->saved_err & INT_FATAL_ERRORS) ||
		    ((hba->saved_err & UIC_ERROR) &&
		    (hba->saved_uic_err & ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR))) {
			if (((hba->saved_err & INT_FATAL_ERRORS) ==
				DEVICE_FATAL_ERROR) || (hba->saved_uic_err &
					~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR))
				hba->silence_err_logs = true;
			goto out;
		}

		/*
		 * As DL NAC is the only error received so far, send out NOP
		 * command to confirm if link is still active or not.
		 *   - If we don't get any response then do error recovery.
		 *   - If we get response then clear the DL NAC error bit.
		 */

		/* silence the error logs from NOP command */
		hba->silence_err_logs = true;
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		err = ufshcd_verify_dev_init(hba);
		spin_lock_irqsave(hba->host->host_lock, flags);
		hba->silence_err_logs = false;

		if (err) {
			hba->silence_err_logs = true;
			goto out;
		}

		/* Link seems to be alive hence ignore the DL NAC errors */
		if (hba->saved_uic_err == UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)
			hba->saved_err &= ~UIC_ERROR;
		/* clear NAC error */
		hba->saved_uic_err &= ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
		if (!hba->saved_uic_err) {
			err_handling = false;
			goto out;
		}
		/*
		 * there seems to be some errors other than NAC, so do error
		 * recovery
		 */
		hba->silence_err_logs = true;
	}
out:
	spin_unlock_irqrestore(hba->host->host_lock, flags);
	return err_handling;
}

/**
/**
 * ufshcd_err_handler - handle UFS errors that require s/w attention
 * ufshcd_err_handler - handle UFS errors that require s/w attention
 * @work: pointer to work structure
 * @work: pointer to work structure
@@ -5023,6 +5120,17 @@ static void ufshcd_err_handler(struct work_struct *work)
	/* Complete requests that have door-bell cleared by h/w */
	/* Complete requests that have door-bell cleared by h/w */
	ufshcd_complete_requests(hba);
	ufshcd_complete_requests(hba);


	if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
		bool ret;

		spin_unlock_irqrestore(hba->host->host_lock, flags);
		/* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
		ret = ufshcd_quirk_dl_nac_errors(hba);
		spin_lock_irqsave(hba->host->host_lock, flags);
		if (!ret)
			goto skip_err_handling;
	}

	/*
	/*
	 * Dump controller state before resetting. Transfer requests state
	 * Dump controller state before resetting. Transfer requests state
	 * will be dump as part of the request completion.
	 * will be dump as part of the request completion.
@@ -5030,17 +5138,21 @@ static void ufshcd_err_handler(struct work_struct *work)
	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x",
		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x",
			__func__, hba->saved_err, hba->saved_uic_err);
			__func__, hba->saved_err, hba->saved_uic_err);
		if (!hba->silence_err_logs) {
			ufshcd_print_host_regs(hba);
			ufshcd_print_host_regs(hba);
			ufshcd_print_pwr_info(hba);
			ufshcd_print_pwr_info(hba);
			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
		}
		}
	}


	if (hba->vops && hba->vops->crypto_engine_get_err)
	if (hba->vops && hba->vops->crypto_engine_get_err)
		crypto_engine_err = hba->vops->crypto_engine_get_err(hba);
		crypto_engine_err = hba->vops->crypto_engine_get_err(hba);


	if ((hba->saved_err & INT_FATAL_ERRORS) || crypto_engine_err ||
	if ((hba->saved_err & INT_FATAL_ERRORS) || crypto_engine_err ||
	    ((hba->saved_err & UIC_ERROR) &&
	    ((hba->saved_err & UIC_ERROR) &&
	    (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR))))
	    (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR |
				   UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
				   UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
		needs_reset = true;
		needs_reset = true;


	/*
	/*
@@ -5125,6 +5237,7 @@ skip_pending_xfer_clear:
			hba->vops->crypto_engine_reset_err(hba);
			hba->vops->crypto_engine_reset_err(hba);
	}
	}


skip_err_handling:
	if (!needs_reset) {
	if (!needs_reset) {
		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
		if (hba->saved_err || hba->saved_uic_err)
		if (hba->saved_err || hba->saved_uic_err)
@@ -5132,6 +5245,7 @@ skip_pending_xfer_clear:
			    __func__, hba->saved_err, hba->saved_uic_err);
			    __func__, hba->saved_err, hba->saved_uic_err);
	}
	}


	hba->silence_err_logs = false;
	ufshcd_clear_eh_in_progress(hba);
	ufshcd_clear_eh_in_progress(hba);
out:
out:
	spin_unlock_irqrestore(hba->host->host_lock, flags);
	spin_unlock_irqrestore(hba->host->host_lock, flags);
@@ -5175,8 +5289,16 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)
	if (reg)
	if (reg)
		ufshcd_update_uic_reg_hist(&hba->ufs_stats.dl_err, reg);
		ufshcd_update_uic_reg_hist(&hba->ufs_stats.dl_err, reg);


	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) {
		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
	} else if (hba->dev_quirks &
		   UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
		if (reg & UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED)
			hba->uic_error |=
				UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
		else if (reg & UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT)
			hba->uic_error |= UFSHCD_UIC_DL_TCx_REPLAY_ERROR;
	}


	/* UIC NL/TL/DME errors needs software retry */
	/* UIC NL/TL/DME errors needs software retry */
	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
+2 −0
Original line number Original line Diff line number Diff line
@@ -173,6 +173,8 @@ enum {
#define UIC_DATA_LINK_LAYER_ERROR		UFS_BIT(31)
#define UIC_DATA_LINK_LAYER_ERROR		UFS_BIT(31)
#define UIC_DATA_LINK_LAYER_ERROR_CODE_MASK	0x7FFF
#define UIC_DATA_LINK_LAYER_ERROR_CODE_MASK	0x7FFF
#define UIC_DATA_LINK_LAYER_ERROR_PA_INIT	0x2000
#define UIC_DATA_LINK_LAYER_ERROR_PA_INIT	0x2000
#define UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED	0x0001
#define UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT 0x0002


/* UECN - Host UIC Error Code Network Layer 40h */
/* UECN - Host UIC Error Code Network Layer 40h */
#define UIC_NETWORK_LAYER_ERROR			UFS_BIT(31)
#define UIC_NETWORK_LAYER_ERROR			UFS_BIT(31)
+1 −0
Original line number Original line Diff line number Diff line
@@ -682,6 +682,7 @@ struct ufs_hba {
	u32 uic_error;
	u32 uic_error;
	u32 saved_err;
	u32 saved_err;
	u32 saved_uic_err;
	u32 saved_uic_err;
	bool silence_err_logs;


	/* Device management request data */
	/* Device management request data */
	struct ufs_dev_cmd dev_cmd;
	struct ufs_dev_cmd dev_cmd;