Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 760c0f8f authored by qctecmdr Service's avatar qctecmdr Service Committed by Gerrit - the friendly Code Review server
Browse files

Merge "drivers: edac: Add GIC RAM error reporting driver"

parents 068defd1 ab577196
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
* GIC EDAC node:

GIC EDAC node is defined to describe on-chip error or fault detection and correction
for GIC RAM. GICT(GIC trace and debug) register map describes the syndrome information
and GICT support is available from GIC-600 onwards.

GIC EDAC reports all Single Bit Error and Double Bit Error found in GIC RAM.
Current GIC ECC implementation is interrupt based. GIC scrub feature helps
to avoid potential errors accumulation of particular error location by a write-back
of all valid RAM entries periodically.

GICT identifies software programming errors, correctable and uncorrectable
errors of Shared Peripheral Interrupt(SPI) RAM, Software Generated Interrupt(SGI) RAM,
Private Peripheral Interrupt(PPI) RAM, Locality-Specific Peripheral Interrupt(LPI) RAM
and Interrupt Translation Service (ITS) RAM.
For every error record following information available -
	ERRXSTATUS - Error Record Status Register
	ERRXMISC0 - Error Record Miscellaneous Register 0
	ERRXMISC1 - Error Record Miscellaneous Register 1
	ERRXADDR  - Error Record Address Resister

The following section describes the DT node binding for gic-erp.

Required properties:
- compatible		: "arm,gic-600-erp".
- reg-names		: GIC Trace, GIC Distributor, GIC Redistributor bases
- interrupt-config	: SPI interrupt numbers of Fault and Error interrupts
			  to configure GICT_ERRORQCR0, GICT_ERRORQCR1 registers respectively.
- interrupts		: Interrupts for Fault and Error IRQs

gict: gict@17a20000 {
	compatible = "arm,gic-600-erp";
	reg = <0x17a20000 0x10000>;	/* GICT */
	reg-names = "gict-base";
	interrupt-config = <46, 17>;
	interrupt-names = "gict-fault", "gict-err";
	interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>,
		     <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
};
+33 −0
Original line number Diff line number Diff line
@@ -492,6 +492,39 @@ config EDAC_KRYO_ARM64_PANIC_ON_UE
	   For production builds, you should probably say 'N' here.


config EDAC_GIC
	depends on ARM_GIC_V3
	tristate "GIC ECC"
	help
	   Supports error detection and correction on the
	   GIC RAMs. Reports correctable and uncorrectable errors caught by
	   GIC ECC mechanism. GICT(GIC Trace and debug) register map page
	   describes the syndrome registers GIC and GICT support is available
	   from GIC600 onwards.
	   For debugging issues having to do with stability and overall system
	   health, you should probably say 'Y' here.

config EDAC_GIC_PANIC_ON_CE
	depends on EDAC_GIC
	bool "Panic on correctable errors - GIC"
	help
	   Forcibly cause a kernel panic if an correctable error (CE) is
	   detected, even though the error is (by definition) correctable and
	   would otherwise result in no adverse system effects. This can reduce
	   debugging times on hardware which may be operating at voltages or
	   frequencies outside normal specification.
	   For production builds, you should definitely say 'N' here.

config EDAC_GIC_PANIC_ON_UE
	depends on EDAC_GIC
	bool "Panic on un-correctable errors - GIC"
	help
	   Forcibly cause a kernel panic if an uncorrectable error (UE) is
	   detected. This can reduce debugging times on hardware which may
	   be operating at voltages or frequencies outside
	   normal specification.
	   For production builds, you should definitely say 'N' here.

config EDAC_XGENE
	tristate "APM X-Gene SoC"
	depends on (ARM64 || COMPILE_TEST)
+1 −0
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@ obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860)		+= i82860_edac.o
obj-$(CONFIG_EDAC_R82600)		+= r82600_edac.o
obj-$(CONFIG_EDAC_KRYO_ARM64)		+= kryo_arm64_edac.o
obj-$(CONFIG_EDAC_GIC)			+= gic600_edac.o

amd64_edac_mod-y := amd64_edac.o
amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
+448 −0
Original line number Diff line number Diff line
/* Copyright (c) 2018, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#define pr_fmt(fmt) "GICT: %s(): " fmt, __func__
#include <linux/kernel.h>
#include <linux/edac.h>
#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/of_irq.h>
#include <soc/qcom/scm.h>

#include "edac_mc.h"
#include "edac_device.h"

#ifdef CONFIG_EDAC_GIC_PANIC_ON_CE
#define GICT_PANIC_ON_CE 1
#else
#define GICT_PANIC_ON_CE 0
#endif

#ifdef CONFIG_EDAC_GIC_PANIC_ON_UE
#define GICT_PANIC_ON_UE 1
#else
#define GICT_PANIC_ON_UE 0
#endif

#define EDAC_NODE		"GICT"
#define RECORD_WIDTH(n)		(64 * n)
#define GICT_ERR_FR(n)		(RECORD_WIDTH(n) + 0x00)
#define GICT_ERR_CTRL(n)	(RECORD_WIDTH(n) + 0x08)
#define GICT_ERR_STATUS(n)	(RECORD_WIDTH(n) + 0x10)
#define GICT_ERR_ADDR(n)	(RECORD_WIDTH(n) + 0x18)
#define GICT_ERR_MISC0(n)	(RECORD_WIDTH(n) + 0x20)
#define GICT_ERR_MISC1(n)	(RECORD_WIDTH(n) + 0x28)
#define GICT_ERRGSR		0xE000
#define GICT_ERRDEVARCH		0xFFBC
#define GICT_ERRIDR		0xFFC8
#define GICT_ERRIRQCR0		0xE800
#define GICT_ERRIRQCR1		(GICT_ERRIRQCR0+0x8)

#define GICD_FCTLR		0x0020
#define GICR_FCTLR		0x0020
#define GICR_OFFSET		0x20000


#define RECORD0		"Software error in GICD programming(UEO)"
#define RECORD1		"Correctable SPI RAM errors(CE)"
#define RECORD2		"Uncorrectable SPI RAM errors(UER)"
#define RECORD3		"Correctable SGI RAM errors(CE)"
#define RECORD4		"Uncorrectable SGI RAM errors(UER)"
#define RECORD5		"Correctable TGT cache errors(CE)"
#define RECORD6		"Correctable TGT cache errors(UER)"
#define RECORD7		"Correctable PPI RAM errors(CE)"
#define RECORD8		"Uncorrectable PPI RAM errors(UER)"
#define RECORD9		"Correctable LPI RAM errors(CE)"
#define RECORD10	"Uncorrectable LPI RAM errors(UER)"
#define RECORD11	"Correctable ITS RAM errors(CE)"
#define RECORD12	"Uncorrectable ITS RAM errors(UEO)"
#define RECORD13	"Uncorrectable 2xITS RAM errors(UER)"
#define RECORD14	"Uncorrectable 2xITS RAM errors(UER)"
#define RECORD(N)	RECORD##N

#define ERR_FR_UI_MASK			GENMASK(5, 4)
#define ERR_FR_FI_MASK			GENMASK(7, 6)
#define ERR_FR_UE_MASK			GENMASK(9, 8)
#define ERR_FR_CFI_MASK			GENMASK(11, 10)

#define ERR_CTRL_UI_MASK		BIT(2)
#define ERR_CTRL_FI_MASK		BIT(3)
#define ERR_CTRL_UE_MASK		BIT(4)
#define ERR_CTRL_CFI_MASK		BIT(8)

#define ERR_STATUS_SERR_MASK		GENMASK(7, 0)
#define ERR_STATUS_IERR_MASK		GENMASK(15, 8)
#define ERR_STATUS_UET_MASK		GENMASK(21, 20)
#define ERR_STATUS_CE_MASK		GENMASK(25, 24)
#define ERR_STATUS_MV_MASK		BIT(26)
#define ERR_STATUS_OF_MASK		BIT(27)
#define ERR_STATUS_ER_MASK		BIT(28)
#define ERR_STATUS_UE_MASK		BIT(29)
#define ERR_STATUS_V_MASK		BIT(30)
#define ERR_STATUS_AV_MASK		BIT(31)

#define ERR_MISC0_COUNT_SHIFT		32
#define ERR_MISC0_COUNT_MASK		GENMASK(39, 32)
#define FAULT_THRESHOLD			0xFF

#define ERR_IRQCR_SPIID_MASK		GENMASK(9, 0)

#define ERR_ADDR_PADDR_MASK		GENMASK(47, 0)
#define ERR_ADDR_NS_MASK		BIT(63)

struct errors_edac {
	const char *const msg;
	void (*func)(struct edac_device_ctl_info *edac_dev,
			int inst_nr, int block_nr, const char *msg);
};
#define GICT_IRQS		2
#define GICT_FAULT_IRQ_IDX	0
#define GICT_ERROR_IRQ_IDX	1
#define GICT_BUFFER_SIZE	32
static u32 gict_buffer[GICT_BUFFER_SIZE] __aligned(PAGE_SIZE);

static void gict_spi_recovery(struct edac_device_ctl_info *edac_dev,
		int inst_nr, int block_nr, const char *msg);

static const struct errors_edac errors[] = {
	{RECORD(0), edac_device_handle_ue },
	{RECORD(1), edac_device_handle_ce },
	{RECORD(2), gict_spi_recovery },
	{RECORD(3), edac_device_handle_ce },
	{RECORD(4), edac_device_handle_ue },
	{RECORD(5), edac_device_handle_ce },
	{RECORD(6), edac_device_handle_ce },
	{RECORD(7), edac_device_handle_ce },
	{RECORD(8), edac_device_handle_ue },
	{RECORD(9), edac_device_handle_ce },
	{RECORD(10), edac_device_handle_ue },
	{RECORD(11), edac_device_handle_ce },
	{RECORD(12), edac_device_handle_ue },
	{RECORD(13), edac_device_handle_ue },
	{RECORD(14), edac_device_handle_ue },
};

struct erp_drvdata {
	struct edac_device_ctl_info *edev_ctl;
	void __iomem *base;
	u32 interrupt_config[GICT_IRQS];
	u32 max_records;
	struct mutex mutex;
};
static struct erp_drvdata *gict_edac;

static inline void gict_write(struct erp_drvdata *drv, u32 val, u32 off)
{
	writel_relaxed(val, drv->base + off);
}

static inline u32 gict_read(struct erp_drvdata *drv, u32 off)
{
	return readl_relaxed(drv->base + off);
}

static inline u64 gict_readq(struct erp_drvdata *drv, u32 off)
{
	return readq_relaxed(drv->base + off);
}

static inline void gict_writeq(struct erp_drvdata *drv, u64 val, u32 off)
{
	writeq_relaxed(val, drv->base + off);
}

#define for_each_bit(i, buf, max) \
	for (i = find_first_bit((unsigned long *)buf, max); \
	     i < max; \
	     i = find_next_bit((unsigned long *)buf, max, i + 1))

#define SCM_SVC_GIC			0x1D
#define GIC_ERROR_RECOVERY_SMC_ID	0x01
#define MAX_IRQS			1023
/* RECORD(2) - Recovery of GIC SPI interrupts */
static void gict_spi_recovery(struct edac_device_ctl_info *edac_dev,
			int inst_nr, int block_nr, const char *msg)
{
	struct scm_desc desc;
	u64 i;
	int ret;

	/*
	 * SPIs that are in the error state can be determined
	 * by reading the GICD_IERRRn register.
	 * Secure side reads GICD_IERRRn register into gict_buffer.
	 */
	memset(&gict_buffer, 0, sizeof(gict_buffer));
	desc.args[0] = virt_to_phys(gict_buffer);
	desc.args[1] = sizeof(gict_buffer);
	desc.arginfo = SCM_ARGS(2, SCM_RW, SCM_VAL);

	ret = scm_call2(SCM_SIP_FNID(SCM_SVC_GIC,
			GIC_ERROR_RECOVERY_SMC_ID), &desc);

	if (ret) {
		pr_warn("Recovery SCM call failed\n");
		return;
	}

	for_each_bit(i, gict_buffer, MAX_IRQS)
		pr_warn("GICT: Corrupted SPI:%d", i);
}

static inline void gict_dump_err_record(int record, u64 errxstatus,
			u64 errxmisc0, u64 errxmisc1, u64 errxaddr)
{
	edac_printk(KERN_INFO, EDAC_NODE, "RECORD: %d", record);
	edac_printk(KERN_INFO, EDAC_NODE, "%s\n", errors[record].msg);
	edac_printk(KERN_INFO, EDAC_NODE, "ERRXSTATUS: %llx\n", errxstatus);
	edac_printk(KERN_INFO, EDAC_NODE, "ERRXMISC0: %llx\n", errxmisc0);
	edac_printk(KERN_INFO, EDAC_NODE, "ERRXMISC1: %llx\n", errxmisc1);
	edac_printk(KERN_INFO, EDAC_NODE, "ERRXADDR: %llx\n", errxaddr);
}

static void handle_record(int record, int level,
		struct edac_device_ctl_info *edev_ctl)
{
	errors[record].func(edev_ctl, raw_smp_processor_id(),
					level, errors[record].msg);
}

static void gict_check_error_records(struct erp_drvdata *drv)
{
	u64 errxstatus, errxaddr, errxmisc0, errxmisc1, errgsr, i;

	errgsr = gict_readq(drv, GICT_ERRGSR);
	/*
	 * Lets first dump all error records
	 * details which are having a valid status.
	 */
	for_each_bit(i, &errgsr, drv->max_records) {
		errxstatus = gict_readq(drv, GICT_ERR_STATUS(i));
		if (errxstatus & ERR_STATUS_V_MASK) {
			errxaddr = gict_readq(drv, GICT_ERR_ADDR(i));
			errxmisc0 = gict_readq(drv, GICT_ERR_MISC0(i));
			errxmisc1 = gict_readq(drv, GICT_ERR_MISC1(i));
			gict_dump_err_record(i, errxstatus,
				errxmisc0, errxmisc1, errxaddr);
		}
	}
}

static void gict_handle_records(struct erp_drvdata *drv)
{
	u64 errxstatus, errgsr, errxstatus_ack, i;

	errgsr = gict_readq(drv, GICT_ERRGSR);

	for_each_bit(i, &errgsr, drv->max_records) {
		errxstatus = gict_readq(drv, GICT_ERR_STATUS(i));
		errxstatus_ack = 0;
		if (errxstatus & ERR_STATUS_V_MASK) {
			handle_record(i, 0, drv->edev_ctl);
			errxstatus_ack = ERR_STATUS_CE_MASK |
				ERR_STATUS_UE_MASK | ERR_STATUS_UET_MASK |
				ERR_STATUS_OF_MASK | ERR_STATUS_MV_MASK |
				ERR_STATUS_AV_MASK | ERR_STATUS_V_MASK;
			gict_write(drv, errxstatus_ack, GICT_ERR_STATUS(i));
		}

	}
}

static void configure_thresholds(struct erp_drvdata *drv)
{
	u64 errxmisc0, i;

	/*
	 * Configure ERRXMISC0 count to 0xFF so that fault
	 * interrupt raised on every error observed.
	 */
	for (i = 0; i <= drv->max_records; i++) {
		errxmisc0 = gict_readq(drv, GICT_ERR_MISC0(i));
		errxmisc0 |= (u64)FAULT_THRESHOLD << ERR_MISC0_COUNT_SHIFT;
		gict_writeq(drv, errxmisc0, GICT_ERR_MISC0(i));
	}

}

static irqreturn_t gict_fault_handler(int irq, void *drvdata)
{
	struct erp_drvdata *drv = drvdata;

	mutex_lock(&drv->mutex);
	gict_check_error_records(drv);
	gict_handle_records(drv);
	configure_thresholds(drv);
	mutex_unlock(&drv->mutex);
	return IRQ_HANDLED;
}

static void configure_ctrl_registers(struct erp_drvdata *drv)
{
	u64 errxfr, errxctrl, i;

	configure_thresholds(drv);

	/* Configure Control Register for CFI, UE, FI and UI faults/errors. */
	for (i = 0; i <= drv->max_records; i++) {
		errxfr = gict_readq(drv, GICT_ERR_FR(i));
		errxctrl = gict_readq(drv, GICT_ERR_CTRL(i));
		/* Configure to enable Correctable Fault Interrupt(CFI) */
		if (errxfr & ERR_FR_CFI_MASK)
			errxctrl = errxctrl | ERR_CTRL_CFI_MASK;
		/* Configure to enable Uncorrectable Error(UE) */
		if (errxfr & ERR_FR_UE_MASK)
			errxctrl = errxctrl | ERR_CTRL_UE_MASK;
		/* Configure to enable Uncorrectable Fault Interrupt(FI) */
		if (errxfr & ERR_FR_FI_MASK)
			errxctrl = errxctrl | ERR_CTRL_FI_MASK;
		/* Configure to enable Uncorrectable Interrupt(UI) */
		if (errxfr & ERR_FR_UI_MASK)
			errxctrl = errxctrl | ERR_CTRL_UI_MASK;

		gict_writeq(drv, errxctrl, GICT_ERR_CTRL(i));
	}
}

static int gic_erp_probe(struct platform_device *pdev)
{
	struct device *dev = &pdev->dev;
	struct erp_drvdata *drv;
	struct resource *res;
	int ret, errirq, faultirq;
	u64 errirqcrx;
	const char *err_irqname = "gict-err";
	const char *fault_irqname = "gict-fault";

	drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
	if (!drv)
		return -ENOMEM;

	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gict-base");
	if (!res)
		return -ENXIO;

	drv->base = devm_ioremap_resource(dev, res);
	if (IS_ERR_OR_NULL(drv->base))
		return -EBUSY;

	ret = of_property_read_u32_array(dev->of_node,
			"interrupt-config", drv->interrupt_config, GICT_IRQS);
	if (ret)
		return -ENXIO;

	errirq = platform_get_irq_byname(pdev, err_irqname);
	if (errirq < 0)
		return errirq;

	faultirq = platform_get_irq_byname(pdev, fault_irqname);
	if (faultirq < 0)
		return faultirq;

	drv->edev_ctl = edac_device_alloc_ctl_info(0, "gic",
					1, "T", 1, 1, NULL, 0,
					edac_device_alloc_index());

	if (!drv->edev_ctl)
		return -ENOMEM;

	drv->edev_ctl->dev = dev;
	drv->edev_ctl->mod_name = dev_name(dev);
	drv->edev_ctl->dev_name = dev_name(dev);
	drv->edev_ctl->ctl_name = "GICT";
	drv->edev_ctl->panic_on_ce = GICT_PANIC_ON_CE;
	drv->edev_ctl->panic_on_ue = GICT_PANIC_ON_UE;
	platform_set_drvdata(pdev, drv);
	gict_edac = drv;

	mutex_init(&drv->mutex);
	ret = edac_device_add_device(drv->edev_ctl);
	if (ret)
		goto out_mem;

	/*
	 * Find no of error records supported by GICT from ERRIDR register
	 * and configure control registers for all records supported.
	 */
	drv->max_records = gict_read(drv, GICT_ERRIDR);
	configure_ctrl_registers(drv);

	/* Configure GICT_ERRIRQCR0 register with fault_interrupt number */
	errirqcrx = gict_readq(drv, GICT_ERRIRQCR0);
	errirqcrx |= (drv->interrupt_config[GICT_FAULT_IRQ_IDX]
				& ERR_IRQCR_SPIID_MASK);
	gict_writeq(drv, errirqcrx, GICT_ERRIRQCR0);

	/* Configure GICT_ERRIRQCR1 register with err_interrupt number */
	errirqcrx = gict_readq(drv, GICT_ERRIRQCR1);
	errirqcrx |= (drv->interrupt_config[GICT_ERROR_IRQ_IDX]
				& ERR_IRQCR_SPIID_MASK);
	gict_writeq(drv, errirqcrx, GICT_ERRIRQCR1);

	ret = devm_request_threaded_irq(&pdev->dev, faultirq,
		NULL, gict_fault_handler,
		IRQF_ONESHOT | IRQF_TRIGGER_HIGH, fault_irqname, drv);
	if (ret) {
		dev_err(dev, "Failed to request %s IRQ %d: %d\n",
					fault_irqname, res->start, ret);
		goto out_dev;
	}

	ret = devm_request_threaded_irq(&pdev->dev, errirq,
		NULL, gict_fault_handler,
		IRQF_ONESHOT | IRQF_TRIGGER_HIGH, err_irqname, drv);
	if (ret) {
		dev_err(dev, "Failed to request %s IRQ %d: %d\n",
					err_irqname, res->start, ret);
		goto out_dev;
	}
	return ret;

out_dev:
	edac_device_del_device(dev);
out_mem:
	edac_device_free_ctl_info(drv->edev_ctl);
	return ret;
}

static int gic_erp_remove(struct platform_device *pdev)
{
	struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev);
	struct edac_device_ctl_info *edac_ctl = drv->edev_ctl;

	edac_device_del_device(edac_ctl->dev);
	edac_device_free_ctl_info(edac_ctl);

	return 0;
}

static const struct of_device_id gic_edac_match[] = {
	{ .compatible = "arm,gic-600-erp", },
	{ }
};
MODULE_DEVICE_TABLE(of, gic_edac_match);

static struct platform_driver gic_edac_driver = {
	.probe = gic_erp_probe,
	.remove = gic_erp_remove,
	.driver = {
		.name = "gic_erp",
		.of_match_table = of_match_ptr(gic_edac_match),
	},
};
module_platform_driver(gic_edac_driver);

MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("GICT driver");