Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a1f6e021 authored by nickcheng(鄭守謙's avatar nickcheng(鄭守謙 Committed by James Bottomley
Browse files

[SCSI] areca: improve driver stability and compatibility



Description:

    1. Implement PCI-Express error recovery function and AER
       capability, especially thanks to Yanmin Zhang's openhanded help
       about AER

    2. Implement the selection of ARCMSR_MAX_XFER_SECTORS_B=4096 if
       firmware version is latter than 1.42

    3. Add arcmsr_done4_abort_postqueue in arcmsr_iop_reset function
       to improve the stability as hot-unplug/plug

    4. Modify the ISR, arcmsr_interrupt routine, to prevent the
       inconsistency with sg_mod driver if application directly calls
       the arcmsr driver w/o passing through scsi midlayer

Signed-off-by: default avatarNick Cheng <nick.cheng@areca.com.tw>
[jejb: unused variable removal]
Signed-off-by: default avatarJames Bottomley <James.Bottomley@SteelEye.com>
parent 727eead6
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -48,9 +48,10 @@ struct class_device_attribute;

#define ARCMSR_MAX_OUTSTANDING_CMD 						256
#define ARCMSR_MAX_FREECCB_NUM							288
#define ARCMSR_DRIVER_VERSION				"Driver Version 1.20.00.13"
#define ARCMSR_DRIVER_VERSION				"Driver Version 1.20.00.14"
#define ARCMSR_SCSI_INITIATOR_ID						255
#define ARCMSR_MAX_XFER_SECTORS							512
#define ARCMSR_MAX_XFER_SECTORS_B                                              4096
#define ARCMSR_MAX_TARGETID							 17
#define ARCMSR_MAX_TARGETLUN							  8
#define ARCMSR_MAX_CMD_PERLUN				 ARCMSR_MAX_OUTSTANDING_CMD
@@ -469,4 +470,3 @@ extern void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb);
extern struct class_device_attribute *arcmsr_host_attrs[];
extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb);
void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb);
+423 −33
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@
#include <linux/dma-mapping.h>
#include <linux/timer.h>
#include <linux/pci.h>
#include <linux/aer.h>
#include <asm/dma.h>
#include <asm/io.h>
#include <asm/system.h>
@@ -71,7 +72,7 @@
#include "arcmsr.h"

MODULE_AUTHOR("Erich Chen <erich@areca.com.tw>");
MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter");
MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID HOST Adapter");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(ARCMSR_DRIVER_VERSION);

@@ -93,7 +94,9 @@ static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb);
static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb);
static const char *arcmsr_info(struct Scsi_Host *);
static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);

static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
						pci_channel_state_t state);
static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev);
static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth)
{
	if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
@@ -104,7 +107,8 @@ static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_de

static struct scsi_host_template arcmsr_scsi_host_template = {
	.module			= THIS_MODULE,
	.name			= "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION,
	.name			= "ARCMSR ARECA SATA/SAS RAID HOST Adapter"
							ARCMSR_DRIVER_VERSION,
	.info			= arcmsr_info,
	.queuecommand		= arcmsr_queue_command,
	.eh_abort_handler	= arcmsr_abort,
@@ -119,6 +123,10 @@ static struct scsi_host_template arcmsr_scsi_host_template = {
	.use_clustering		= ENABLE_CLUSTERING,
	.shost_attrs		= arcmsr_host_attrs,
};
static struct pci_error_handlers arcmsr_pci_error_handlers = {
	.error_detected		= arcmsr_pci_error_detected,
	.slot_reset		= arcmsr_pci_slot_reset,
};

static struct pci_device_id arcmsr_device_id_table[] = {
	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)},
@@ -144,7 +152,8 @@ static struct pci_driver arcmsr_pci_driver = {
	.id_table		= arcmsr_device_id_table,
	.probe			= arcmsr_probe,
	.remove			= arcmsr_remove,
	.shutdown		= arcmsr_shutdown
	.shutdown		= arcmsr_shutdown,
	.err_handler		= &arcmsr_pci_error_handlers,
};

static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id)
@@ -328,6 +337,8 @@ static int arcmsr_probe(struct pci_dev *pdev,

	arcmsr_iop_init(acb);
	pci_set_drvdata(pdev, host);
	if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
		host->max_sectors= ARCMSR_MAX_XFER_SECTORS_B;

	error = scsi_add_host(host, &pdev->dev);
	if (error)
@@ -338,6 +349,7 @@ static int arcmsr_probe(struct pci_dev *pdev,
		goto out_free_sysfs;

	scsi_scan_host(host);
	pci_enable_pcie_error_reporting(pdev);
	return 0;
 out_free_sysfs:
 out_free_irq:
@@ -762,7 +774,7 @@ static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb)
				switch(ccb->arcmsr_cdb.DeviceStatus) {
				case ARCMSR_DEV_SELECT_TIMEOUT: {
						acb->devstate[id][lun] = ARECA_RAID_GONE;
						ccb->pcmd->result = DID_TIME_OUT << 16;
						ccb->pcmd->result = DID_NO_CONNECT << 16;
						arcmsr_ccb_complete(ccb, 1);
					}
					break;
@@ -1247,7 +1259,7 @@ static void arcmsr_polling_ccbdone(struct AdapterControlBlock *acb,
			switch(ccb->arcmsr_cdb.DeviceStatus) {
			case ARCMSR_DEV_SELECT_TIMEOUT: {
					acb->devstate[id][lun] = ARECA_RAID_GONE;
					ccb->pcmd->result = DID_TIME_OUT << 16;
					ccb->pcmd->result = DID_NO_CONNECT << 16;
					arcmsr_ccb_complete(ccb, 1);
				}
				break;
@@ -1281,6 +1293,94 @@ static void arcmsr_polling_ccbdone(struct AdapterControlBlock *acb,
		}
	}
}
static void arcmsr_done4_abort_postqueue(struct AdapterControlBlock *acb)
{
	int i = 0, found = 0;
	int id, lun;
	uint32_t flag_ccb, outbound_intstatus;
	struct MessageUnit __iomem *reg = acb->pmu;
	struct CommandControlBlock *ccb;
	/*clear and abort all outbound posted Q*/

	while (((flag_ccb = readl(&reg->outbound_queueport)) != 0xFFFFFFFF) &&
(i++ < 256)){
		ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
(flag_ccb << 5));
	if (ccb){
		if ((ccb->acb != acb)||(ccb->startdone != \
ARCMSR_CCB_START)){
				printk(KERN_NOTICE "arcmsr%d: polling get \
an illegal ccb" "command done ccb = '0x%p'""ccboutstandingcount = %d \n",
					acb->host->host_no, ccb,
					atomic_read(&acb->ccboutstandingcount));
				continue;
			}

			id = ccb->pcmd->device->id;
			lun = ccb->pcmd->device->lun;
			if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)){
				if (acb->devstate[id][lun] == ARECA_RAID_GONE)
					acb->devstate[id][lun] = ARECA_RAID_GOOD;
				ccb->pcmd->result = DID_OK << 16;
				arcmsr_ccb_complete(ccb, 1);
			}
			else {
				switch(ccb->arcmsr_cdb.DeviceStatus) {
				case ARCMSR_DEV_SELECT_TIMEOUT: {
						acb->devstate[id][lun] = ARECA_RAID_GONE;
						ccb->pcmd->result = DID_NO_CONNECT << 16;
						arcmsr_ccb_complete(ccb, 1);
				}
				break;

				case ARCMSR_DEV_ABORTED:

				case ARCMSR_DEV_INIT_FAIL: {
						acb->devstate[id][lun] =
							ARECA_RAID_GONE;
						ccb->pcmd->result =
							DID_BAD_TARGET << 16;
				arcmsr_ccb_complete(ccb, 1);
				}
				break;

				case ARCMSR_DEV_CHECK_CONDITION: {
						acb->devstate[id][lun] =
							ARECA_RAID_GOOD;
						arcmsr_report_sense_info(ccb);
						arcmsr_ccb_complete(ccb, 1);
				}
				break;

				default:
						printk(KERN_NOTICE
						      "arcmsr%d: scsi id = %d \
							lun = %d""polling and \
							getting command error \
							done""but got unknown \
							DeviceStatus = 0x%x \n",
							acb->host->host_no, id,
					   lun, ccb->arcmsr_cdb.DeviceStatus);
						acb->devstate[id][lun] =
								ARECA_RAID_GONE;
						ccb->pcmd->result =
							DID_BAD_TARGET << 16;
						arcmsr_ccb_complete(ccb, 1);
				break;
			       }
	}
		       found = 1;
	       }
	}
	if (found){
		outbound_intstatus = readl(&reg->outbound_intstatus) & \
			acb->outbound_int_enable;
		writel(outbound_intstatus, &reg->outbound_intstatus);
		/*clear interrupt*/
	}
	return;
}


static void arcmsr_iop_init(struct AdapterControlBlock *acb)
{
@@ -1314,7 +1414,6 @@ static void arcmsr_iop_init(struct AdapterControlBlock *acb)

static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
{
	struct MessageUnit __iomem *reg = acb->pmu;
	struct CommandControlBlock *ccb;
	uint32_t intmask_org;
	int i = 0;
@@ -1327,21 +1426,17 @@ static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
		/* disable all outbound interrupt */
		intmask_org = arcmsr_disable_outbound_ints(acb);
		/* clear all outbound posted Q */
		for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
			readl(&reg->outbound_queueport);
		arcmsr_done4_abort_postqueue(acb);
		for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
			ccb = acb->pccb_pool[i];
			if ((ccb->startdone == ARCMSR_CCB_START) ||
				(ccb->startdone == ARCMSR_CCB_ABORTED)) {
			if (ccb->startdone == ARCMSR_CCB_START) {
				ccb->startdone = ARCMSR_CCB_ABORTED;
				ccb->pcmd->result = DID_ABORT << 16;
				arcmsr_ccb_complete(ccb, 1);
			}
		}
		/* enable all outbound interrupt */
		arcmsr_enable_outbound_ints(acb, intmask_org);
	}
	atomic_set(&acb->ccboutstandingcount, 0);

}

static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
@@ -1390,7 +1485,6 @@ static int arcmsr_abort(struct scsi_cmnd *cmd)
		"arcmsr%d: abort device command of scsi id = %d lun = %d \n",
		acb->host->host_no, cmd->device->id, cmd->device->lun);
	acb->num_aborts++;

	/*
	************************************************
	** the all interrupt service routine is locked
@@ -1451,4 +1545,300 @@ static const char *arcmsr_info(struct Scsi_Host *host)
	return buf;
}

static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev)
{
	struct Scsi_Host *host;
	struct AdapterControlBlock *acb;
	uint8_t bus, dev_fun;
	int error;

	error = pci_enable_device(pdev);
	if (error)
		return PCI_ERS_RESULT_DISCONNECT;
	pci_set_master(pdev);

	host = scsi_host_alloc(&arcmsr_scsi_host_template, sizeof \
(struct AdapterControlBlock));
	if (!host)
		return PCI_ERS_RESULT_DISCONNECT;
	acb = (struct AdapterControlBlock *)host->hostdata;
	memset(acb, 0, sizeof (struct AdapterControlBlock));

	error = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
	if (error) {
		error = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
		if (error) {
			printk(KERN_WARNING
			       "scsi%d: No suitable DMA mask available\n",
			       host->host_no);
			return PCI_ERS_RESULT_DISCONNECT;
		}
	}
	bus = pdev->bus->number;
	dev_fun = pdev->devfn;
	acb = (struct AdapterControlBlock *) host->hostdata;
	memset(acb, 0, sizeof(struct AdapterControlBlock));
	acb->pdev = pdev;
	acb->host = host;
	host->max_sectors = ARCMSR_MAX_XFER_SECTORS;
	host->max_lun = ARCMSR_MAX_TARGETLUN;
	host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/
	host->max_cmd_len = 16;    /*this is issue of 64bit LBA, over 2T byte*/
	host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES;
	host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */
	host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN;
	host->this_id = ARCMSR_SCSI_INITIATOR_ID;
	host->unique_id = (bus << 8) | dev_fun;
	host->irq = pdev->irq;
	error = pci_request_regions(pdev, "arcmsr");
	if (error)
		return PCI_ERS_RESULT_DISCONNECT;

	acb->pmu = ioremap(pci_resource_start(pdev, 0),
			   pci_resource_len(pdev, 0));
	if (!acb->pmu) {
		printk(KERN_NOTICE "arcmsr%d: memory"
			" mapping region fail \n", acb->host->host_no);
		return PCI_ERS_RESULT_DISCONNECT;
	}
	acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED |
			   ACB_F_MESSAGE_RQBUFFER_CLEARED |
			   ACB_F_MESSAGE_WQBUFFER_READED);
	acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
	INIT_LIST_HEAD(&acb->ccb_free_list);

	error = arcmsr_alloc_ccb_pool(acb);
	if (error)
		return PCI_ERS_RESULT_DISCONNECT;

	error = request_irq(pdev->irq, arcmsr_do_interrupt,
			IRQF_DISABLED | IRQF_SHARED, "arcmsr", acb);
	if (error)
		return PCI_ERS_RESULT_DISCONNECT;

	arcmsr_iop_init(acb);
	if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
	      host->max_sectors = ARCMSR_MAX_XFER_SECTORS_B;

	pci_set_drvdata(pdev, host);

	error = scsi_add_host(host, &pdev->dev);
	if (error)
		return PCI_ERS_RESULT_DISCONNECT;

	error = arcmsr_alloc_sysfs_attr(acb);
	if (error)
		return PCI_ERS_RESULT_DISCONNECT;

	scsi_scan_host(host);
	return PCI_ERS_RESULT_RECOVERED;
}

static void arcmsr_pci_ers_need_reset_forepart(struct pci_dev *pdev)
{
	struct Scsi_Host *host = pci_get_drvdata(pdev);
	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
	struct MessageUnit __iomem *reg = acb->pmu;
	struct CommandControlBlock *ccb;
	/*clear and abort all outbound posted Q*/
	int i = 0, found = 0;
	int id, lun;
	uint32_t flag_ccb, outbound_intstatus;

	while (((flag_ccb = readl(&reg->outbound_queueport)) != 0xFFFFFFFF) &&
								(i++ < 256)){
			ccb = (struct CommandControlBlock *)(acb->vir2phy_offset
							 + (flag_ccb << 5));
			if (ccb){
				if ((ccb->acb != acb)||(ccb->startdone !=
							ARCMSR_CCB_START)){
					printk(KERN_NOTICE "arcmsr%d: polling \
					get an illegal ccb"" command done ccb = '0x%p'"
					"ccboutstandingcount = %d \n",
					acb->host->host_no, ccb,
					atomic_read(&acb->ccboutstandingcount));
					continue;
				}

				id = ccb->pcmd->device->id;
				lun = ccb->pcmd->device->lun;
				if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
					if (acb->devstate[id][lun] ==
								ARECA_RAID_GONE)
						acb->devstate[id][lun] =
								ARECA_RAID_GOOD;
					ccb->pcmd->result = DID_OK << 16;
					arcmsr_ccb_complete(ccb, 1);
				}
				else {
					switch(ccb->arcmsr_cdb.DeviceStatus) {
					case ARCMSR_DEV_SELECT_TIMEOUT: {
							acb->devstate[id][lun] =
							ARECA_RAID_GONE;
							ccb->pcmd->result =
							DID_NO_CONNECT << 16;
							arcmsr_ccb_complete(ccb, 1);
					}
					break;

					case ARCMSR_DEV_ABORTED:

					case ARCMSR_DEV_INIT_FAIL: {
							acb->devstate[id][lun] =
							 ARECA_RAID_GONE;
							ccb->pcmd->result =
							DID_BAD_TARGET << 16;
							arcmsr_ccb_complete(ccb, 1);
					}
					break;

					case ARCMSR_DEV_CHECK_CONDITION: {
							acb->devstate[id][lun] =
							 ARECA_RAID_GOOD;
							arcmsr_report_sense_info(ccb);
							arcmsr_ccb_complete(ccb, 1);
					}
					break;

					default:
							printk(KERN_NOTICE
								"arcmsr%d: scsi \
								id = %d lun = %d"
								" polling and \
								getting command \
								error done"
								"but got unknown \
							DeviceStatus = 0x%x \n"
							, acb->host->host_no,
								id, lun,
						ccb->arcmsr_cdb.DeviceStatus);
							acb->devstate[id][lun] =
								ARECA_RAID_GONE;
							ccb->pcmd->result =
							DID_BAD_TARGET << 16;
							arcmsr_ccb_complete(ccb, 1);
					break;
					}
				}
				found = 1;
			}
		}
	if (found){
		outbound_intstatus = readl(&reg->outbound_intstatus) &
							acb->outbound_int_enable;
		writel(outbound_intstatus, &reg->outbound_intstatus);
		/*clear interrupt*/
		    }
	return;
}


static void arcmsr_pci_ers_disconnect_forepart(struct pci_dev *pdev)
{
	struct Scsi_Host *host = pci_get_drvdata(pdev);
	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
	struct MessageUnit __iomem *reg = acb->pmu;
	struct CommandControlBlock *ccb;
	/*clear and abort all outbound posted Q*/
	int i = 0, found = 0;
	int id, lun;
	uint32_t flag_ccb, outbound_intstatus;

	while (((flag_ccb = readl(&reg->outbound_queueport)) != 0xFFFFFFFF) &&
								(i++ < 256)){
			ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
							(flag_ccb << 5));
			if (ccb){
				if ((ccb->acb != acb)||(ccb->startdone !=
							ARCMSR_CCB_START)){
					printk(KERN_NOTICE
						"arcmsr%d: polling get an illegal ccb"
						" command done ccb = '0x%p'"
						"ccboutstandingcount = %d \n",
						acb->host->host_no, ccb,
						atomic_read(&acb->ccboutstandingcount));
					continue;
			}

			id = ccb->pcmd->device->id;
			lun = ccb->pcmd->device->lun;
			if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR))	{
				if (acb->devstate[id][lun] == ARECA_RAID_GONE)
					acb->devstate[id][lun] = ARECA_RAID_GOOD;
				ccb->pcmd->result = DID_OK << 16;
				arcmsr_ccb_complete(ccb, 1);
			}
			else {
				switch(ccb->arcmsr_cdb.DeviceStatus) {
				case ARCMSR_DEV_SELECT_TIMEOUT: {
						acb->devstate[id][lun] =
								ARECA_RAID_GONE;
						ccb->pcmd->result =
							DID_NO_CONNECT << 16;
						arcmsr_ccb_complete(ccb, 1);
				}
				break;

				case ARCMSR_DEV_ABORTED:

				case ARCMSR_DEV_INIT_FAIL: {
						acb->devstate[id][lun] =
								ARECA_RAID_GONE;
						ccb->pcmd->result =
							DID_BAD_TARGET << 16;
						arcmsr_ccb_complete(ccb, 1);
				}
				break;

				case ARCMSR_DEV_CHECK_CONDITION: {
						acb->devstate[id][lun] =
								ARECA_RAID_GOOD;
						arcmsr_report_sense_info(ccb);
						arcmsr_ccb_complete(ccb, 1);
				}
				break;

				default:
						printk(KERN_NOTICE "arcmsr%d: \
							scsi id = %d lun = %d"
								" polling and \
						getting command error done"
								"but got unknown \
						 DeviceStatus = 0x%x \n"
								, acb->host->host_no,
					id, lun, ccb->arcmsr_cdb.DeviceStatus);
							acb->devstate[id][lun] =
								ARECA_RAID_GONE;
							ccb->pcmd->result =
							DID_BAD_TARGET << 16;
							arcmsr_ccb_complete(ccb, 1);
				break;
				}
			}
			found = 1;
		}
	}
	if (found){
		outbound_intstatus = readl(&reg->outbound_intstatus) &
						acb->outbound_int_enable;
		writel(outbound_intstatus, &reg->outbound_intstatus);
		/*clear interrupt*/
	}
	return;
}

static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
						pci_channel_state_t state)
{
	switch (state) {
	case pci_channel_io_frozen:
			arcmsr_pci_ers_need_reset_forepart(pdev);
			return PCI_ERS_RESULT_NEED_RESET;
	case pci_channel_io_perm_failure:
			arcmsr_pci_ers_disconnect_forepart(pdev);
			return PCI_ERS_RESULT_DISCONNECT;
			break;
	default:
			return PCI_ERS_RESULT_NEED_RESET;
	}
}