Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6d67ee9a authored by Shiju Jose's avatar Shiju Jose Committed by David S. Miller
Browse files

net: hns3: Add enable and process common ecc errors



This patch adds enable and processing of ecc errors from
common HNS blocks, CMDQ(Command Queue),
IMP(Integrated Management Processor) and TQP(Task Queue Pair).

Signed-off-by: default avatarShiju Jose <shiju.jose@huawei.com>
Signed-off-by: default avatarSalil Mehta <salil.mehta@huawei.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 99714195
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -209,6 +209,9 @@ enum hclge_opcode_type {

	/* Led command */
	HCLGE_OPC_LED_STATUS_CFG	= 0xB000,

	/* Error INT commands */
	HCLGE_COMMON_ECC_INT_CFG	= 0x1505,
};

#define HCLGE_TQP_REG_OFFSET		0x80000
+285 −0
Original line number Diff line number Diff line
@@ -3,7 +3,292 @@

#include "hclge_err.h"

static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
	{ .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" },
	{ .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
	{ .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" },
	{ .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
	{ .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" },
	{ .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
	{ .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" },
	{ .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
	{ .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" },
	{ .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
	{ .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" },
	{ .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
	{ .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" },
	{ .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
	{ .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
	{ .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
	{ /* sentinel */ }
};

static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
	{ .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
	{ .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
	{ /* sentinel */ }
};

static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
	{ .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" },
	{ .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
	{ .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" },
	{ .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
	{ .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" },
	{ .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
	{ .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" },
	{ .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
	{ .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" },
	{ .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
	{ .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" },
	{ .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
	{ .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" },
	{ .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
	{ .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
	{ .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
	{ /* sentinel */ }
};

static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
	{ .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
	{ .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
	{ .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
	{ .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
	{ .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
	{ .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
	{ .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
	{ .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
	{ .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
	{ .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
	{ .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
	{ .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
	{ .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
	{ .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
	{ .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
	{ .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
	{ /* sentinel */ }
};

static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
	{ .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" },
	{ .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" },
	{ .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" },
	{ .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" },
	{ .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" },
	{ .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" },
	{ .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
	{ .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
	{ .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
	{ .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" },
	{ .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" },
	{ .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" },
	{ /* sentinel */ }
};

static void hclge_log_error(struct device *dev,
			    const struct hclge_hw_error *err_list,
			    u32 err_sts)
{
	const struct hclge_hw_error *err;
	int i = 0;

	while (err_list[i].msg) {
		err = &err_list[i];
		if (!(err->int_msk & err_sts)) {
			i++;
			continue;
		}
		dev_warn(dev, "%s [error status=0x%x] found\n",
			 err->msg, err_sts);
		i++;
	}
}

/* hclge_cmd_query_error: read the error information
 * @hdev: pointer to struct hclge_dev
 * @desc: descriptor for describing the command
 * @cmd:  command opcode
 * @flag: flag for extended command structure
 * @w_num: offset for setting the read interrupt type.
 * @int_type: select which type of the interrupt for which the error
 * info will be read(RAS-CE/RAS-NFE/RAS-FE etc).
 *
 * This function query the error info from hw register/s using command
 */
static int hclge_cmd_query_error(struct hclge_dev *hdev,
				 struct hclge_desc *desc, u32 cmd,
				 u16 flag, u8 w_num,
				 enum hclge_err_int_type int_type)
{
	struct device *dev = &hdev->pdev->dev;
	int num = 1;
	int ret;

	hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
	if (flag) {
		desc[0].flag |= cpu_to_le16(flag);
		hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
		num = 2;
	}
	if (w_num)
		desc[0].data[w_num] = cpu_to_le32(int_type);

	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
	if (ret)
		dev_err(dev, "query error cmd failed (%d)\n", ret);

	return ret;
}

/* hclge_cmd_clear_error: clear the error status
 * @hdev: pointer to struct hclge_dev
 * @desc: descriptor for describing the command
 * @desc_src: prefilled descriptor from the previous command for reusing
 * @cmd:  command opcode
 * @flag: flag for extended command structure
 *
 * This function clear the error status in the hw register/s using command
 */
static int hclge_cmd_clear_error(struct hclge_dev *hdev,
				 struct hclge_desc *desc,
				 struct hclge_desc *desc_src,
				 u32 cmd, u16 flag)
{
	struct device *dev = &hdev->pdev->dev;
	int num = 1;
	int ret, i;

	if (cmd) {
		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
		if (flag) {
			desc[0].flag |= cpu_to_le16(flag);
			hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
			num = 2;
		}
		if (desc_src) {
			for (i = 0; i < 6; i++) {
				desc[0].data[i] = desc_src[0].data[i];
				if (flag)
					desc[1].data[i] = desc_src[1].data[i];
			}
		}
	} else {
		hclge_cmd_reuse_desc(&desc[0], false);
		if (flag) {
			desc[0].flag |= cpu_to_le16(flag);
			hclge_cmd_reuse_desc(&desc[1], false);
			num = 2;
		}
	}
	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
	if (ret)
		dev_err(dev, "clear error cmd failed (%d)\n", ret);

	return ret;
}

static int hclge_enable_common_error(struct hclge_dev *hdev, bool en)
{
	struct device *dev = &hdev->pdev->dev;
	struct hclge_desc desc[2];
	int ret;

	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);

	if (en) {
		/* enable COMMON error interrupts */
		desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
		desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
					HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
		desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
		desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
		desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
	} else {
		/* disable COMMON error interrupts */
		desc[0].data[0] = 0;
		desc[0].data[2] = 0;
		desc[0].data[3] = 0;
		desc[0].data[4] = 0;
		desc[0].data[5] = 0;
	}
	desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
	desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
				HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
	desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
	desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
	desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);

	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
	if (ret)
		dev_err(dev,
			"failed(%d) to enable/disable COMMON err interrupts\n",
			ret);

	return ret;
}

static void hclge_process_common_error(struct hclge_dev *hdev,
				       enum hclge_err_int_type type)
{
	struct device *dev = &hdev->pdev->dev;
	struct hclge_desc desc[2];
	u32 err_sts;
	int ret;

	/* read err sts */
	ret = hclge_cmd_query_error(hdev, &desc[0],
				    HCLGE_COMMON_ECC_INT_CFG,
				    HCLGE_CMD_FLAG_NEXT, 0, 0);
	if (ret) {
		dev_err(dev,
			"failed(=%d) to query COMMON error interrupt status\n",
			ret);
		return;
	}

	/* log err */
	err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK;
	hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts);

	err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK;
	hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts);

	err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT)
		   & HCLGE_CMDQ_ECC_INT_MASK;
	hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts);

	if ((le32_to_cpu(desc[0].data[3])) & BIT(0))
		dev_warn(dev, "imp_rd_data_poison_err found\n");

	err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) &
		   HCLGE_TQP_ECC_INT_MASK;
	hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts);

	err_sts = (le32_to_cpu(desc[0].data[5])) &
		   HCLGE_IMP_ITCM4_ECC_INT_MASK;
	hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts);

	/* clear error interrupts */
	desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK);
	desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK |
				HCLGE_CMDQ_ROCEE_ECC_CLR_MASK);
	desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK);
	desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK);

	ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0,
				    HCLGE_CMD_FLAG_NEXT);
	if (ret)
		dev_err(dev,
			"failed(%d) to clear COMMON error interrupt status\n",
			ret);
}

static const struct hclge_hw_blk hw_blk[] = {
	{ .msk = BIT(5), .name = "COMMON",
	  .enable_error = hclge_enable_common_error,
	  .process_error = hclge_process_common_error, },
	{ /* sentinel */ }
};

+30 −0
Original line number Diff line number Diff line
@@ -11,6 +11,31 @@
#define HCLGE_RAS_REG_NFE_MASK   0xFF00
#define HCLGE_RAS_REG_NFE_SHIFT	8

#define HCLGE_IMP_TCM_ECC_ERR_INT_EN	0xFFFF0000
#define HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK	0xFFFF0000
#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN	0x300
#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK	0x300
#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN	0xFFFF
#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK	0xFFFF
#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN	0xFFFF0000
#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK	0xFFFF0000
#define HCLGE_IMP_RD_POISON_ERR_INT_EN	0x0100
#define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK	0x0100
#define HCLGE_TQP_ECC_ERR_INT_EN	0x0FFF
#define HCLGE_TQP_ECC_ERR_INT_EN_MASK	0x0FFF

#define HCLGE_IMP_TCM_ECC_INT_MASK	0xFFFF
#define HCLGE_IMP_ITCM4_ECC_INT_MASK	0x3
#define HCLGE_CMDQ_ECC_INT_MASK		0xFFFF
#define HCLGE_CMDQ_ROC_ECC_INT_SHIFT	16
#define HCLGE_TQP_ECC_INT_MASK		0xFFF
#define HCLGE_TQP_ECC_INT_SHIFT		16
#define HCLGE_IMP_TCM_ECC_CLR_MASK	0xFFFF
#define HCLGE_IMP_ITCM4_ECC_CLR_MASK	0x3
#define HCLGE_CMDQ_NIC_ECC_CLR_MASK	0xFFFF
#define HCLGE_CMDQ_ROCEE_ECC_CLR_MASK	0xFFFF0000
#define HCLGE_TQP_IMP_ERR_CLR_MASK	0x0FFF0001

enum hclge_err_int_type {
	HCLGE_ERR_INT_MSIX = 0,
	HCLGE_ERR_INT_RAS_CE = 1,
@@ -26,6 +51,11 @@ struct hclge_hw_blk {
			      enum hclge_err_int_type type);
};

struct hclge_hw_error {
	u32 int_msk;
	const char *msg;
};

int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state);
pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev);
#endif