Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5984be90 authored by Jack Morgenstein's avatar Jack Morgenstein Committed by Roland Dreier
Browse files

mlx4_core: Report thermal error events



Print an error message when a thermal error async event is reported by the HW.

Signed-off-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarDotan Barak <dotanb@mellanox.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent e10903b0
Loading
Loading
Loading
Loading
+31 −1
Original line number Original line Diff line number Diff line
@@ -79,7 +79,8 @@ enum {
			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
			       (1ull << MLX4_EVENT_TYPE_CMD)		    | \
			       (1ull << MLX4_EVENT_TYPE_CMD)		    | \
			       (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL)       | \
			       (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL)       | \
			       (1ull << MLX4_EVENT_TYPE_FLR_EVENT))
			       (1ull << MLX4_EVENT_TYPE_FLR_EVENT)	    | \
			       (1ull << MLX4_EVENT_TYPE_FATAL_WARNING))


static void eq_set_ci(struct mlx4_eq *eq, int req_not)
static void eq_set_ci(struct mlx4_eq *eq, int req_not)
{
{
@@ -443,6 +444,35 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
			queue_work(priv->mfunc.master.comm_wq,
			queue_work(priv->mfunc.master.comm_wq,
				   &priv->mfunc.master.slave_flr_event_work);
				   &priv->mfunc.master.slave_flr_event_work);
			break;
			break;

		case MLX4_EVENT_TYPE_FATAL_WARNING:
			if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
				if (mlx4_is_master(dev))
					for (i = 0; i < dev->num_slaves; i++) {
						mlx4_dbg(dev, "%s: Sending "
							"MLX4_FATAL_WARNING_SUBTYPE_WARMING"
							" to slave: %d\n", __func__, i);
						if (i == dev->caps.function)
							continue;
						mlx4_slave_event(dev, i, eqe);
					}
				mlx4_err(dev, "Temperature Threshold was reached! "
					"Threshold: %d celsius degrees; "
					"Current Temperature: %d\n",
					be16_to_cpu(eqe->event.warming.warning_threshold),
					be16_to_cpu(eqe->event.warming.current_temperature));
			} else
				mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), "
					  "subtype %02x on EQ %d at index %u. owner=%x, "
					  "nent=0x%x, slave=%x, ownership=%s\n",
					  eqe->type, eqe->subtype, eq->eqn,
					  eq->cons_index, eqe->owner, eq->nent,
					  eqe->slave_id,
					  !!(eqe->owner & 0x80) ^
					  !!(eq->cons_index & eq->nent) ? "HW" : "SW");

			break;

		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
		case MLX4_EVENT_TYPE_ECC_DETECT:
		case MLX4_EVENT_TYPE_ECC_DETECT:
		default:
		default:
+4 −0
Original line number Original line Diff line number Diff line
@@ -363,6 +363,10 @@ struct mlx4_eqe {
		struct {
		struct {
			__be32	slave_id;
			__be32	slave_id;
		} __packed flr_event;
		} __packed flr_event;
		struct {
			__be16  current_temperature;
			__be16  warning_threshold;
		} __packed warming;
	}			event;
	}			event;
	u8			slave_id;
	u8			slave_id;
	u8			reserved3[2];
	u8			reserved3[2];
+5 −0
Original line number Original line Diff line number Diff line
@@ -133,6 +133,7 @@ enum mlx4_event {
	MLX4_EVENT_TYPE_CMD		   = 0x0a,
	MLX4_EVENT_TYPE_CMD		   = 0x0a,
	MLX4_EVENT_TYPE_VEP_UPDATE	   = 0x19,
	MLX4_EVENT_TYPE_VEP_UPDATE	   = 0x19,
	MLX4_EVENT_TYPE_COMM_CHANNEL	   = 0x18,
	MLX4_EVENT_TYPE_COMM_CHANNEL	   = 0x18,
	MLX4_EVENT_TYPE_FATAL_WARNING	   = 0x1b,
	MLX4_EVENT_TYPE_FLR_EVENT	   = 0x1c,
	MLX4_EVENT_TYPE_FLR_EVENT	   = 0x1c,
	MLX4_EVENT_TYPE_NONE		   = 0xff,
	MLX4_EVENT_TYPE_NONE		   = 0xff,
};
};
@@ -142,6 +143,10 @@ enum {
	MLX4_PORT_CHANGE_SUBTYPE_ACTIVE	= 4
	MLX4_PORT_CHANGE_SUBTYPE_ACTIVE	= 4
};
};


enum {
	MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
};

enum {
enum {
	MLX4_PERM_LOCAL_READ	= 1 << 10,
	MLX4_PERM_LOCAL_READ	= 1 << 10,
	MLX4_PERM_LOCAL_WRITE	= 1 << 11,
	MLX4_PERM_LOCAL_WRITE	= 1 << 11,