Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9467a9b3 authored by Martin Peschke's avatar Martin Peschke Committed by James Bottomley
Browse files

[SCSI] zfcp: Trace all triggers of error recovery activity



This patch allows any recovery event to be traced back to an exact
cause, e.g. a particular request identified by an id (address).

Signed-off-by: default avatarMartin Peschke <mp3@de.ibm.com>
Signed-off-by: default avatarChristof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: default avatarJames Bottomley <James.Bottomley@HansenPartnership.com>
parent 698ec016
Loading
Loading
Loading
Loading
+16 −15
Original line number Diff line number Diff line
@@ -1326,10 +1326,10 @@ zfcp_nameserver_enqueue(struct zfcp_adapter *adapter)

#define ZFCP_LOG_AREA                   ZFCP_LOG_AREA_FC

static void
zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
			   struct fsf_status_read_buffer *status_buffer)
static void zfcp_fsf_incoming_els_rscn(struct zfcp_fsf_req *fsf_req)
{
	struct fsf_status_read_buffer *status_buffer = (void*)fsf_req->data;
	struct zfcp_adapter *adapter = fsf_req->adapter;
	struct fcp_rscn_head *fcp_rscn_head;
	struct fcp_rscn_element *fcp_rscn_element;
	struct zfcp_port *port;
@@ -1376,7 +1376,8 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
				ZFCP_LOG_INFO("incoming RSCN, trying to open "
					      "port 0x%016Lx\n", port->wwpn);
				zfcp_erp_port_reopen(port,
						     ZFCP_STATUS_COMMON_ERP_FAILED);
						     ZFCP_STATUS_COMMON_ERP_FAILED,
						     82, (u64)fsf_req);
				continue;
			}

@@ -1407,10 +1408,10 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
	}
}

static void
zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter,
			    struct fsf_status_read_buffer *status_buffer)
static void zfcp_fsf_incoming_els_plogi(struct zfcp_fsf_req *fsf_req)
{
	struct fsf_status_read_buffer *status_buffer = (void*)fsf_req->data;
	struct zfcp_adapter *adapter = fsf_req->adapter;
	struct fsf_plogi *els_plogi;
	struct zfcp_port *port;
	unsigned long flags;
@@ -1429,14 +1430,14 @@ zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter,
			       status_buffer->d_id,
			       zfcp_get_busid_by_adapter(adapter));
	} else {
		zfcp_erp_port_forced_reopen(port, 0);
		zfcp_erp_port_forced_reopen(port, 0, 83, (u64)fsf_req);
	}
}

static void
zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter,
			   struct fsf_status_read_buffer *status_buffer)
static void zfcp_fsf_incoming_els_logo(struct zfcp_fsf_req *fsf_req)
{
	struct fsf_status_read_buffer *status_buffer = (void*)fsf_req->data;
	struct zfcp_adapter *adapter = fsf_req->adapter;
	struct fcp_logo *els_logo = (struct fcp_logo *) status_buffer->payload;
	struct zfcp_port *port;
	unsigned long flags;
@@ -1454,7 +1455,7 @@ zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter,
			       status_buffer->d_id,
			       zfcp_get_busid_by_adapter(adapter));
	} else {
		zfcp_erp_port_forced_reopen(port, 0);
		zfcp_erp_port_forced_reopen(port, 0, 84, (u64)fsf_req);
	}
}

@@ -1481,12 +1482,12 @@ zfcp_fsf_incoming_els(struct zfcp_fsf_req *fsf_req)

	zfcp_san_dbf_event_incoming_els(fsf_req);
	if (els_type == LS_PLOGI)
		zfcp_fsf_incoming_els_plogi(adapter, status_buffer);
		zfcp_fsf_incoming_els_plogi(fsf_req);
	else if (els_type == LS_LOGO)
		zfcp_fsf_incoming_els_logo(adapter, status_buffer);
		zfcp_fsf_incoming_els_logo(fsf_req);
	else if ((els_type & 0xffff0000) == LS_RSCN)
		/* we are only concerned with the command, not the length */
		zfcp_fsf_incoming_els_rscn(adapter, status_buffer);
		zfcp_fsf_incoming_els_rscn(fsf_req);
	else
		zfcp_fsf_incoming_els_unknown(adapter, status_buffer);
}
+6 −6
Original line number Diff line number Diff line
@@ -172,7 +172,7 @@ zfcp_ccw_set_online(struct ccw_device *ccw_device)

	zfcp_erp_modify_adapter_status(adapter, 10, 0,
				       ZFCP_STATUS_COMMON_RUNNING, ZFCP_SET);
	zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED);
	zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, 85, 0);
	zfcp_erp_wait(adapter);
	goto out;

@@ -197,7 +197,7 @@ zfcp_ccw_set_offline(struct ccw_device *ccw_device)

	down(&zfcp_data.config_sema);
	adapter = dev_get_drvdata(&ccw_device->dev);
	zfcp_erp_adapter_shutdown(adapter, 0);
	zfcp_erp_adapter_shutdown(adapter, 0, 86, 0);
	zfcp_erp_wait(adapter);
	zfcp_erp_thread_kill(adapter);
	up(&zfcp_data.config_sema);
@@ -224,13 +224,13 @@ zfcp_ccw_notify(struct ccw_device *ccw_device, int event)
		ZFCP_LOG_NORMAL("adapter %s: device gone\n",
				zfcp_get_busid_by_adapter(adapter));
		debug_text_event(adapter->erp_dbf,1,"dev_gone");
		zfcp_erp_adapter_shutdown(adapter, 0);
		zfcp_erp_adapter_shutdown(adapter, 0, 87, 0);
		break;
	case CIO_NO_PATH:
		ZFCP_LOG_NORMAL("adapter %s: no path\n",
				zfcp_get_busid_by_adapter(adapter));
		debug_text_event(adapter->erp_dbf,1,"no_path");
		zfcp_erp_adapter_shutdown(adapter, 0);
		zfcp_erp_adapter_shutdown(adapter, 0, 88, 0);
		break;
	case CIO_OPER:
		ZFCP_LOG_NORMAL("adapter %s: operational again\n",
@@ -240,7 +240,7 @@ zfcp_ccw_notify(struct ccw_device *ccw_device, int event)
					       ZFCP_STATUS_COMMON_RUNNING,
					       ZFCP_SET);
		zfcp_erp_adapter_reopen(adapter,
					ZFCP_STATUS_COMMON_ERP_FAILED);
					ZFCP_STATUS_COMMON_ERP_FAILED, 89, 0);
		break;
	}
	zfcp_erp_wait(adapter);
@@ -272,7 +272,7 @@ zfcp_ccw_shutdown(struct ccw_device *cdev)

	down(&zfcp_data.config_sema);
	adapter = dev_get_drvdata(&cdev->dev);
	zfcp_erp_adapter_shutdown(adapter, 0);
	zfcp_erp_adapter_shutdown(adapter, 0, 90, 0);
	zfcp_erp_wait(adapter);
	up(&zfcp_data.config_sema);
}
+134 −0
Original line number Diff line number Diff line
@@ -523,6 +523,7 @@ static struct debug_view zfcp_hba_dbf_view = {
static const char *zfcp_rec_dbf_tags[] = {
	[ZFCP_REC_DBF_ID_THREAD] = "thread",
	[ZFCP_REC_DBF_ID_TARGET] = "target",
	[ZFCP_REC_DBF_ID_TRIGGER] = "trigger",
};

static const char *zfcp_rec_dbf_ids[] = {
@@ -587,6 +588,89 @@ static const char *zfcp_rec_dbf_ids[] = {
	[59]	= "unit access denied open unit",
	[60]	= "shared unit access denied open unit",
	[61]	= "unit access denied fcp",
	[62]	= "request timeout",
	[63]	= "adisc link test reject or timeout",
	[64]	= "adisc link test d_id changed",
	[65]	= "adisc link test failed",
	[66]	= "recovery out of memory",
	[67]	= "adapter recovery repeated after state change",
	[68]	= "port recovery repeated after state change",
	[69]	= "unit recovery repeated after state change",
	[70]	= "port recovery follow-up after successful adapter recovery",
	[71]	= "adapter recovery escalation after failed adapter recovery",
	[72]	= "port recovery follow-up after successful physical port "
		  "recovery",
	[73]	= "adapter recovery escalation after failed physical port "
		  "recovery",
	[74]	= "unit recovery follow-up after successful port recovery",
	[75]	= "physical port recovery escalation after failed port "
		  "recovery",
	[76]	= "port recovery escalation after failed unit recovery",
	[77]	= "recovery opening nameserver port",
	[78]	= "duplicate request id",
	[79]	= "link down",
	[80]	= "exclusive read-only unit access unsupported",
	[81]	= "shared read-write unit access unsupported",
	[82]	= "incoming rscn",
	[83]	= "incoming plogi",
	[84]	= "incoming logo",
	[85]	= "online",
	[86]	= "offline",
	[87]	= "ccw device gone",
	[88]	= "ccw device no path",
	[89]	= "ccw device operational",
	[90]	= "ccw device shutdown",
	[91]	= "sysfs port addition",
	[92]	= "sysfs port removal",
	[93]	= "sysfs adapter recovery",
	[94]	= "sysfs unit addition",
	[95]	= "sysfs unit removal",
	[96]	= "sysfs port recovery",
	[97]	= "sysfs unit recovery",
	[98]	= "sequence number mismatch",
	[99]	= "link up",
	[100]	= "error state",
	[101]	= "status read physical port closed",
	[102]	= "link up status read",
	[103]	= "too many failed status read buffers",
	[104]	= "port handle not valid abort",
	[105]	= "lun handle not valid abort",
	[106]	= "port handle not valid ct",
	[107]	= "port handle not valid close port",
	[108]	= "port handle not valid close physical port",
	[109]	= "port handle not valid open unit",
	[110]	= "port handle not valid close unit",
	[111]	= "lun handle not valid close unit",
	[112]	= "port handle not valid fcp",
	[113]	= "lun handle not valid fcp",
	[114]	= "handle mismatch fcp",
	[115]	= "lun not valid fcp",
	[116]	= "qdio send failed",
	[117]	= "version mismatch",
	[118]	= "incompatible qtcb type",
	[119]	= "unknown protocol status",
	[120]	= "unknown fsf command",
	[121]	= "no recommendation for status qualifier",
	[122]	= "status read physical port closed in error",
	[123]	= "fc service class not supported ct",
	[124]	= "fc service class not supported els",
	[125]	= "need newer zfcp",
	[126]	= "need newer microcode",
	[127]	= "arbitrated loop not supported",
	[128]	= "unknown topology",
	[129]	= "qtcb size mismatch",
	[130]	= "unknown fsf status ecd",
	[131]	= "fcp request too big",
	[132]	= "fc service class not supported fcp",
	[133]	= "data direction not valid fcp",
	[134]	= "command length not valid fcp",
	[135]	= "status read act update",
	[136]	= "status read cfdc update",
	[137]	= "hbaapi port open",
	[138]	= "hbaapi unit open",
	[139]	= "hbaapi unit shutdown",
	[140]	= "qdio error",
	[141]	= "scsi host reset",
};

static int zfcp_rec_dbf_view_format(debug_info_t *id, struct debug_view *view,
@@ -613,6 +697,17 @@ static int zfcp_rec_dbf_view_format(debug_info_t *id, struct debug_view *view,
		zfcp_dbf_out(&p, "wwpn", "0x%016Lx", r->u.target.wwpn);
		zfcp_dbf_out(&p, "fcp_lun", "0x%016Lx", r->u.target.fcp_lun);
		break;
	case ZFCP_REC_DBF_ID_TRIGGER:
		zfcp_dbf_out(&p, "reference", "0x%016Lx", r->u.trigger.ref);
		zfcp_dbf_out(&p, "erp_action", "0x%016Lx", r->u.trigger.action);
		zfcp_dbf_out(&p, "requested", "%d", r->u.trigger.want);
		zfcp_dbf_out(&p, "executed", "%d", r->u.trigger.need);
		zfcp_dbf_out(&p, "wwpn", "0x%016Lx", r->u.trigger.wwpn);
		zfcp_dbf_out(&p, "fcp_lun", "0x%016Lx", r->u.trigger.fcp_lun);
		zfcp_dbf_out(&p, "adapter_status", "0x%08x", r->u.trigger.as);
		zfcp_dbf_out(&p, "port_status", "0x%08x", r->u.trigger.ps);
		zfcp_dbf_out(&p, "unit_status", "0x%08x", r->u.trigger.us);
		break;
	}
	sprintf(p, "\n");
	return (p - buf) + 1;
@@ -727,6 +822,45 @@ void zfcp_rec_dbf_event_unit(u8 id, u64 ref, struct zfcp_unit *unit)
				  unit->fcp_lun);
}

/**
 * zfcp_rec_dbf_event_trigger - trace event for triggered error recovery
 * @id2: identifier for error recovery trigger
 * @ref: additional reference (e.g. request)
 * @want: originally requested error recovery action
 * @need: error recovery action actually initiated
 * @action: address of error recovery action struct
 * @adapter: adapter
 * @port: port
 * @unit: unit
 */
void zfcp_rec_dbf_event_trigger(u8 id2, u64 ref, u8 want, u8 need, u64 action,
				struct zfcp_adapter *adapter,
				struct zfcp_port *port, struct zfcp_unit *unit)
{
	struct zfcp_rec_dbf_record *r = &adapter->rec_dbf_buf;
	unsigned long flags;

	spin_lock_irqsave(&adapter->rec_dbf_lock, flags);
	memset(r, 0, sizeof(*r));
	r->id = ZFCP_REC_DBF_ID_TRIGGER;
	r->id2 = id2;
	r->u.trigger.ref = ref;
	r->u.trigger.want = want;
	r->u.trigger.need = need;
	r->u.trigger.action = action;
	r->u.trigger.as = atomic_read(&adapter->status);
	if (port) {
		r->u.trigger.ps = atomic_read(&port->status);
		r->u.trigger.wwpn = port->wwpn;
	}
	if (unit) {
		r->u.trigger.us = atomic_read(&unit->status);
		r->u.trigger.fcp_lun = unit->fcp_lun;
	}
	debug_event(adapter->rec_dbf, action ? 1 : 4, r, sizeof(*r));
	spin_unlock_irqrestore(&adapter->rec_dbf_lock, flags);
}

static void
_zfcp_san_dbf_event_common_ct(const char *tag, struct zfcp_fsf_req *fsf_req,
			      u32 s_id, u32 d_id, void *buffer, int buflen)
+14 −0
Original line number Diff line number Diff line
@@ -295,18 +295,32 @@ struct zfcp_rec_dbf_record_target {
	u32 erp_count;
} __attribute__ ((packed));

struct zfcp_rec_dbf_record_trigger {
	u8 want;
	u8 need;
	u32 as;
	u32 ps;
	u32 us;
	u64 ref;
	u64 action;
	u64 wwpn;
	u64 fcp_lun;
} __attribute__ ((packed));

struct zfcp_rec_dbf_record {
	u8 id;
	u8 id2;
	union {
		struct zfcp_rec_dbf_record_thread thread;
		struct zfcp_rec_dbf_record_target target;
		struct zfcp_rec_dbf_record_trigger trigger;
	} u;
} __attribute__ ((packed));

enum {
	ZFCP_REC_DBF_ID_THREAD,
	ZFCP_REC_DBF_ID_TARGET,
	ZFCP_REC_DBF_ID_TRIGGER,
};

struct zfcp_hba_dbf_record_response {
+108 −105

File changed.

Preview size limit exceeded, changes collapsed.

Loading