Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c2fb3db2 authored by Mikhael Goikhman's avatar Mikhael Goikhman Committed by Saeed Mahameed
Browse files

net/mlx5: Rework handling of port module events



Add explicit HW defined error values. For simplicity, keep counters for all
statuses starting from 0, although currently status=0 is not used.

Additionally, when HW signals an unexpected cable status, it is reported
now rather than ignored. And status counter is now updated on errors.

Signed-off-by: default avatarMikhael Goikhman <migo@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 7300375f
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -1087,13 +1087,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
}

static const struct counter_desc mlx5e_pme_status_desc[] = {
	{ "module_unplug", 8 },
	{ "module_unplug",       sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
};

static const struct counter_desc mlx5e_pme_error_desc[] = {
	{ "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
	{ "module_high_temp", 48 },       /* high temperature */
	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
	{ "module_bus_stuck",    sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
	{ "module_high_temp",    sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
	{ "module_bad_shorted",  sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
};

#define NUM_PME_STATUS_STATS		ARRAY_SIZE(mlx5e_pme_status_desc)
+52 −31
Original line number Diff line number Diff line
@@ -157,23 +157,43 @@ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
}

/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
	"Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
	"Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
	"Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
};
static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
{
	switch (status) {
	case MLX5_MODULE_STATUS_PLUGGED:
		return "Cable plugged";
	case MLX5_MODULE_STATUS_UNPLUGGED:
		return "Cable unplugged";
	case MLX5_MODULE_STATUS_ERROR:
		return "Cable error";
	default:
		return "Unknown status";
	}
}

static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
	"Power budget exceeded",
	"Long Range for non MLNX cable",
	"Bus stuck(I2C or data shorted)",
	"No EEPROM/retry timeout",
	"Enforce part number list",
	"Unknown identifier",
	"High Temperature",
	"Bad or shorted cable/module",
	"Unknown status",
};
static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
{
	switch (error) {
	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
		return "Power budget exceeded";
	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
		return "Long Range for non MLNX cable";
	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
		return "Bus stuck (I2C or data shorted)";
	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
		return "No EEPROM/retry timeout";
	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
		return "Enforce part number list";
	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
		return "Unknown identifier";
	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
		return "High Temperature";
	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
		return "Bad or shorted cable/module";
	default:
		return "Unknown error";
	}
}

/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
static int port_module(struct notifier_block *nb, unsigned long type, void *data)
@@ -185,6 +205,7 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data
	enum port_module_event_status_type module_status;
	enum port_module_event_error_type error_type;
	struct mlx5_eqe_port_module *module_event_eqe;
	const char *status_str, *error_str;
	u8 module_num;

	module_event_eqe = &eqe->data.port_module;
@@ -193,28 +214,28 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data
			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
	error_type = module_event_eqe->error_type &
		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
	if (module_status < MLX5_MODULE_STATUS_ERROR) {
		events->pme_stats.status_counters[module_status - 1]++;
	} else if (module_status == MLX5_MODULE_STATUS_ERROR) {
		if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
			/* Unknown error type */
			error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;

	if (module_status < MLX5_MODULE_STATUS_NUM)
		events->pme_stats.status_counters[module_status]++;
	status_str = mlx5_pme_status_to_string(module_status);

	if (module_status == MLX5_MODULE_STATUS_ERROR) {
		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
			events->pme_stats.error_counters[error_type]++;
		error_str = mlx5_pme_error_to_string(error_type);
	}

	if (!printk_ratelimit())
		return NOTIFY_OK;

	if (module_status < MLX5_MODULE_STATUS_ERROR)
	if (module_status == MLX5_MODULE_STATUS_ERROR)
		mlx5_core_err(events->dev,
			      "Port module event[error]: module %u, %s, %s\n",
			      module_num, status_str, error_str);
	else
		mlx5_core_info(events->dev,
			       "Port module event: module %u, %s\n",
			       module_num, mlx5_pme_status[module_status - 1]);

	else if (module_status == MLX5_MODULE_STATUS_ERROR)
		mlx5_core_info(events->dev,
			       "Port module event[error]: module %u, %s, %s\n",
			       module_num, mlx5_pme_status[module_status - 1],
			       mlx5_pme_error[error_type]);
			       module_num, status_str);

	return NOTIFY_OK;
}
+9 −10
Original line number Diff line number Diff line
@@ -51,19 +51,18 @@ enum port_module_event_status_type {
	MLX5_MODULE_STATUS_PLUGGED   = 0x1,
	MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
	MLX5_MODULE_STATUS_ERROR     = 0x3,
	MLX5_MODULE_STATUS_NUM       = 0x3,
	MLX5_MODULE_STATUS_NUM,
};

enum  port_module_event_error_type {
	MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
	MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
	MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
	MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
	MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
	MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
	MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
	MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
	MLX5_MODULE_EVENT_ERROR_UNKNOWN,
	MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED    = 0x0,
	MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX  = 0x1,
	MLX5_MODULE_EVENT_ERROR_BUS_STUCK                = 0x2,
	MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT  = 0x3,
	MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
	MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER       = 0x5,
	MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE         = 0x6,
	MLX5_MODULE_EVENT_ERROR_BAD_CABLE                = 0x7,
	MLX5_MODULE_EVENT_ERROR_NUM,
};