Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 845a0e40 authored by nagalakshmi.nandigama@lsi.com's avatar nagalakshmi.nandigama@lsi.com Committed by James Bottomley
Browse files

[SCSI] mpt2sas: Better handling DEAD IOC (PCI-E LInk down) error condition



Detection of Dead IOC has been done in fault_reset_work thread.

If IOC Doorbell is 0xFFFFFFFF, it will be detected as non-operation/DEAD IOC.
When a DEAD IOC is detected, the code is modified to remove that IOC and
all its attached devices from OS.
The PCI layer API pci_remove_bus_device() is called to remove the dead IOC.

Signed-off-by: default avatarNagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
Signed-off-by: default avatarJames Bottomley <JBottomley@Parallels.com>
parent 4053a4be
Loading
Loading
Loading
Loading
+59 −0
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@
#include <linux/sort.h>
#include <linux/io.h>
#include <linux/time.h>
#include <linux/kthread.h>
#include <linux/aer.h>

#include "mpt2sas_base.h"
@@ -120,9 +121,33 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
		ioc->fwfault_debug = mpt2sas_fwfault_debug;
	return 0;
}

module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug,
    param_get_int, &mpt2sas_fwfault_debug, 0644);

/**
 *  mpt2sas_remove_dead_ioc_func - kthread context to remove dead ioc
 * @arg: input argument, used to derive ioc
 *
 * Return 0 if controller is removed from pci subsystem.
 * Return -1 for other case.
 */
static int mpt2sas_remove_dead_ioc_func(void *arg)
{
		struct MPT2SAS_ADAPTER *ioc = (struct MPT2SAS_ADAPTER *)arg;
		struct pci_dev *pdev;

		if ((ioc == NULL))
			return -1;

		pdev = ioc->pdev;
		if ((pdev == NULL))
			return -1;
		pci_remove_bus_device(pdev);
		return 0;
}


/**
 * _base_fault_reset_work - workq handling ioc fault conditions
 * @work: input argument, used to derive ioc
@@ -138,6 +163,7 @@ _base_fault_reset_work(struct work_struct *work)
	unsigned long	 flags;
	u32 doorbell;
	int rc;
	struct task_struct *p;

	spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
	if (ioc->shost_recovery)
@@ -145,6 +171,39 @@ _base_fault_reset_work(struct work_struct *work)
	spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);

	doorbell = mpt2sas_base_get_iocstate(ioc, 0);
	if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_MASK) {
		printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n",
			ioc->name, __func__);

		/*
		 * Call _scsih_flush_pending_cmds callback so that we flush all
		 * pending commands back to OS. This call is required to aovid
		 * deadlock at block layer. Dead IOC will fail to do diag reset,
		 * and this call is safe since dead ioc will never return any
		 * command back from HW.
		 */
		ioc->schedule_dead_ioc_flush_running_cmds(ioc);
		/*
		 * Set remove_host flag early since kernel thread will
		 * take some time to execute.
		 */
		ioc->remove_host = 1;
		/*Remove the Dead Host */
		p = kthread_run(mpt2sas_remove_dead_ioc_func, ioc,
		    "mpt2sas_dead_ioc_%d", ioc->id);
		if (IS_ERR(p)) {
			printk(MPT2SAS_ERR_FMT
			"%s: Running mpt2sas_dead_ioc thread failed !!!!\n",
			ioc->name, __func__);
		} else {
		    printk(MPT2SAS_ERR_FMT
			"%s: Running mpt2sas_dead_ioc thread success !!!!\n",
			ioc->name, __func__);
		}

		return; /* don't rearm timer */
	}

	if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
		rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP,
		    FORCE_BIG_HAMMER);
+3 −0
Original line number Diff line number Diff line
@@ -623,6 +623,7 @@ enum mutex_type {
	TM_MUTEX_ON = 1,
};

typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc);
/**
 * struct MPT2SAS_ADAPTER - per adapter struct
 * @list: ioc_list
@@ -665,6 +666,7 @@ enum mutex_type {
 * @msix_vector_count: number msix vectors
 * @cpu_msix_table: table for mapping cpus to msix index
 * @cpu_msix_table_sz: table size
 * @schedule_dead_ioc_flush_running_cmds: callback to flush pending commands
 * @scsi_io_cb_idx: shost generated commands
 * @tm_cb_idx: task management commands
 * @scsih_cb_idx: scsih internal commands
@@ -816,6 +818,7 @@ struct MPT2SAS_ADAPTER {
	resource_size_t	**reply_post_host_index;
	u16		cpu_msix_table_sz;
	u32		ioc_reset_count;
	MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;

	/* internal commands, callback index */
	u8		scsi_io_cb_idx;
+1 −0
Original line number Diff line number Diff line
@@ -7928,6 +7928,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	ioc->tm_tr_volume_cb_idx = tm_tr_volume_cb_idx;
	ioc->tm_sas_control_cb_idx = tm_sas_control_cb_idx;
	ioc->logging_level = logging_level;
	ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds;
	/* misc semaphores and spin locks */
	mutex_init(&ioc->reset_in_progress_mutex);
	spin_lock_init(&ioc->ioc_reset_in_progress_lock);