Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0cf07f84 authored by James Smart's avatar James Smart Committed by Martin K. Petersen
Browse files

scsi: lpfc: Add auto EQ delay logic



Administrator intervention is currently required to get good numbers
when switching from running latency tests to IOPS tests.

The configured interrupt coalescing values will greatly effect the
results of these tests.  Currently, the driver has a single coalescing
value set by values of the module attribute.  This patch changes the
driver to support auto-configuration of the coalescing value based on
the total number of outstanding IOs and average number of CQEs processed
per interrupt for an EQ.  Values are checked every 5 seconds.

The driver defaults to the automatic selection. Automatic selection can
be disabled by the new lpfc_auto_imax module_parameter.

Older hardware can only change interrupt coalescing by mailbox
command. Newer hardware supports change via a register. The patch
support both.

Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <james.smart@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 78e1d200
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -756,6 +756,7 @@ struct lpfc_hba {
	uint8_t  nvmet_support;	/* driver supports NVMET */
#define LPFC_NVMET_MAX_PORTS	32
	uint8_t  mds_diags_support;
	uint32_t initial_imax;

	/* HBA Config Parameters */
	uint32_t cfg_ack0;
@@ -777,6 +778,7 @@ struct lpfc_hba {
	uint32_t cfg_poll_tmo;
	uint32_t cfg_task_mgmt_tmo;
	uint32_t cfg_use_msi;
	uint32_t cfg_auto_imax;
	uint32_t cfg_fcp_imax;
	uint32_t cfg_fcp_cpu_map;
	uint32_t cfg_fcp_io_channel;
@@ -1050,6 +1052,7 @@ struct lpfc_hba {

	uint8_t temp_sensor_support;
	/* Fields used for heart beat. */
	unsigned long last_eqdelay_time;
	unsigned long last_completion_time;
	unsigned long skipped_hb;
	struct timer_list hb_tmofunc;
+19 −1
Original line number Diff line number Diff line
@@ -4481,9 +4481,11 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
		return -EINVAL;

	phba->cfg_fcp_imax = (uint32_t)val;
	phba->initial_imax = phba->cfg_fcp_imax;

	for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
		lpfc_modify_hba_eq_delay(phba, i);
		lpfc_modify_hba_eq_delay(phba, i, LPFC_MAX_EQ_DELAY_EQID_CNT,
					 val);

	return strlen(buf);
}
@@ -4538,6 +4540,16 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
static DEVICE_ATTR(lpfc_fcp_imax, S_IRUGO | S_IWUSR,
		   lpfc_fcp_imax_show, lpfc_fcp_imax_store);

/*
 * lpfc_auto_imax: Controls Auto-interrupt coalescing values support.
 *       0       No auto_imax support
 *       1       auto imax on
 * Auto imax will change the value of fcp_imax on a per EQ basis, using
 * the EQ Delay Multiplier, depending on the activity for that EQ.
 * Value range [0,1]. Default value is 1.
 */
LPFC_ATTR_RW(auto_imax, 1, 0, 1, "Enable Auto imax");

/**
 * lpfc_state_show - Display current driver CPU affinity
 * @dev: class converted to a Scsi_host structure.
@@ -5164,6 +5176,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
	&dev_attr_lpfc_task_mgmt_tmo,
	&dev_attr_lpfc_use_msi,
	&dev_attr_lpfc_nvme_oas,
	&dev_attr_lpfc_auto_imax,
	&dev_attr_lpfc_fcp_imax,
	&dev_attr_lpfc_fcp_cpu_map,
	&dev_attr_lpfc_fcp_io_channel,
@@ -6182,6 +6195,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
	lpfc_enable_SmartSAN_init(phba, lpfc_enable_SmartSAN);
	lpfc_use_msi_init(phba, lpfc_use_msi);
	lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
	lpfc_auto_imax_init(phba, lpfc_auto_imax);
	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
	lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
@@ -6226,6 +6240,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
			phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
	}

	if (phba->cfg_auto_imax && !phba->cfg_fcp_imax)
		phba->cfg_auto_imax = 0;
	phba->initial_imax = phba->cfg_fcp_imax;

	/* A value of 0 means use the number of CPUs found in the system */
	if (phba->cfg_fcp_io_channel == 0)
		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
+2 −2
Original line number Diff line number Diff line
@@ -3265,9 +3265,9 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,

	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
			"\n%s EQ info: EQ-STAT[max:x%x noE:x%x "
			"bs:x%x proc:x%llx]\n",
			"bs:x%x proc:x%llx eqd %d]\n",
			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
			(unsigned long long)qp->q_cnt_4);
			(unsigned long long)qp->q_cnt_4, qp->q_mode);
	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+14 −0
Original line number Diff line number Diff line
@@ -197,6 +197,7 @@ struct lpfc_sli_intf {

/* Delay Multiplier constant */
#define LPFC_DMULT_CONST       651042
#define LPFC_DMULT_MAX         1023

/* Configuration of Interrupts / sec for entire HBA port */
#define LPFC_MIN_IMAX          5000
@@ -657,6 +658,15 @@ struct lpfc_register {
#define LPFC_CTL_PORT_ER1_OFFSET	0x40C
#define LPFC_CTL_PORT_ER2_OFFSET	0x410

#define LPFC_CTL_PORT_EQ_DELAY_OFFSET	0x418
#define lpfc_sliport_eqdelay_delay_SHIFT 16
#define lpfc_sliport_eqdelay_delay_MASK	0xffff
#define lpfc_sliport_eqdelay_delay_WORD	word0
#define lpfc_sliport_eqdelay_id_SHIFT	0
#define lpfc_sliport_eqdelay_id_MASK	0xfff
#define lpfc_sliport_eqdelay_id_WORD	word0
#define LPFC_SEC_TO_USEC		1000000

/* The following Registers apply to SLI4 if_type 0 UCNAs. They typically
 * reside in BAR 2.
 */
@@ -3258,6 +3268,10 @@ struct lpfc_sli4_parameters {
#define cfg_xib_SHIFT				4
#define cfg_xib_MASK				0x00000001
#define cfg_xib_WORD				word19
#define cfg_eqdr_SHIFT				8
#define cfg_eqdr_MASK				0x00000001
#define cfg_eqdr_WORD				word19
#define LPFC_NODELAY_MAX_IO		32
};

#define LPFC_SET_UE_RECOVERY		0x10
+103 −1
Original line number Diff line number Diff line
@@ -1249,6 +1249,12 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
	int retval, i;
	struct lpfc_sli *psli = &phba->sli;
	LIST_HEAD(completions);
	struct lpfc_queue *qp;
	unsigned long time_elapsed;
	uint32_t tick_cqe, max_cqe, val;
	uint64_t tot, data1, data2, data3;
	struct lpfc_register reg_data;
	void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;

	vports = lpfc_create_vport_work_array(phba);
	if (vports != NULL)
@@ -1263,6 +1269,95 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
		(phba->pport->fc_flag & FC_OFFLINE_MODE))
		return;

	if (phba->cfg_auto_imax) {
		if (!phba->last_eqdelay_time) {
			phba->last_eqdelay_time = jiffies;
			goto skip_eqdelay;
		}
		time_elapsed = jiffies - phba->last_eqdelay_time;
		phba->last_eqdelay_time = jiffies;

		tot = 0xffff;
		/* Check outstanding IO count */
		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
			if (phba->nvmet_support) {
				spin_lock(&phba->sli4_hba.nvmet_io_lock);
				tot = phba->sli4_hba.nvmet_xri_cnt -
					phba->sli4_hba.nvmet_ctx_cnt;
				spin_unlock(&phba->sli4_hba.nvmet_io_lock);
			} else {
				tot = atomic_read(&phba->fc4NvmeIoCmpls);
				data1 = atomic_read(
					&phba->fc4NvmeInputRequests);
				data2 = atomic_read(
					&phba->fc4NvmeOutputRequests);
				data3 = atomic_read(
					&phba->fc4NvmeControlRequests);
				tot =  (data1 + data2 + data3) - tot;
			}
		}

		/* Interrupts per sec per EQ */
		val = phba->cfg_fcp_imax / phba->io_channel_irqs;
		tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */

		/* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
		max_cqe = time_elapsed * tick_cqe;

		for (i = 0; i < phba->io_channel_irqs; i++) {
			/* Fast-path EQ */
			qp = phba->sli4_hba.hba_eq[i];
			if (!qp)
				continue;

			/* Use no EQ delay if we don't have many outstanding
			 * IOs, or if we are only processing 1 CQE/ISR or less.
			 * Otherwise, assume we can process up to lpfc_fcp_imax
			 * interrupts per HBA.
			 */
			if (tot < LPFC_NODELAY_MAX_IO ||
			    qp->EQ_cqe_cnt <= max_cqe)
				val = 0;
			else
				val = phba->cfg_fcp_imax;

			if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
				/* Use EQ Delay Register method */

				/* Convert for EQ Delay register */
				if (val) {
					/* First, interrupts per sec per EQ */
					val = phba->cfg_fcp_imax /
						phba->io_channel_irqs;

					/* us delay between each interrupt */
					val = LPFC_SEC_TO_USEC / val;
				}
				if (val != qp->q_mode) {
					reg_data.word0 = 0;
					bf_set(lpfc_sliport_eqdelay_id,
					       &reg_data, qp->queue_id);
					bf_set(lpfc_sliport_eqdelay_delay,
					       &reg_data, val);
					writel(reg_data.word0, eqdreg);
				}
			} else {
				/* Use mbox command method */
				if (val != qp->q_mode)
					lpfc_modify_hba_eq_delay(phba, i,
								 1, val);
			}

			/*
			 * val is cfg_fcp_imax or 0 for mbox delay or us delay
			 * between interrupts for EQDR.
			 */
			qp->q_mode = val;
			qp->EQ_cqe_cnt = 0;
		}
	}

skip_eqdelay:
	spin_lock_irq(&phba->pport->work_port_lock);

	if (time_after(phba->last_completion_time +
@@ -7257,6 +7352,9 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type)
			phba->sli4_hba.conf_regs_memmap_p + LPFC_SLI_INTF;
		break;
	case LPFC_SLI_INTF_IF_TYPE_2:
		phba->sli4_hba.u.if_type2.EQDregaddr =
			phba->sli4_hba.conf_regs_memmap_p +
						LPFC_CTL_PORT_EQ_DELAY_OFFSET;
		phba->sli4_hba.u.if_type2.ERR1regaddr =
			phba->sli4_hba.conf_regs_memmap_p +
						LPFC_CTL_PORT_ER1_OFFSET;
@@ -8783,7 +8881,8 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
	}

	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
		lpfc_modify_hba_eq_delay(phba, qidx);
		lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT,
					 phba->cfg_fcp_imax);

	return 0;

@@ -10252,6 +10351,9 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
	if (bf_get(cfg_xib, mbx_sli4_parameters) && phba->cfg_suppress_rsp)
		phba->sli.sli_flag |= LPFC_SLI_SUPPRESS_RSP;

	if (bf_get(cfg_eqdr, mbx_sli4_parameters))
		phba->sli.sli_flag |= LPFC_SLI_USE_EQDR;

	/* Make sure that sge_supp_len can be handled by the driver */
	if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE)
		sli4_params->sge_supp_len = LPFC_MAX_SGE_SIZE;
Loading