SB1 cache exception handling. (a4b5bd9a) · Commits · e / devices / android_kernel_teracube_emerald

arch/mips/mm/cerr-sb1.c

+46 −8

Original line number	Diff line number	Diff line
		@@ -19,13 +19,19 @@
		#include <linux/sched.h>
		#include <asm/mipsregs.h>
		#include <asm/sibyte/sb1250.h>
		#include <asm/sibyte/sb1250_regs.h>

		#ifndef CONFIG_SIBYTE_BUS_WATCHER
		#if !defined(CONFIG_SIBYTE_BUS_WATCHER) \|\| defined(CONFIG_SIBYTE_BW_TRACE)
		#include <asm/io.h>
		#include <asm/sibyte/sb1250_regs.h>
		#include <asm/sibyte/sb1250_scd.h>
		#endif

		/*
		* We'd like to dump the L2_ECC_TAG register on errors, but errata make
		* that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.)
		*/
		#undef DUMP_L2_ECC_TAG_ON_ERROR

		/* SB1 definitions */

		/* XXX should come from config1 XXX */
		@@ -139,12 +145,18 @@ static inline void breakout_cerrd(unsigned int val)
		static void check_bus_watcher(void)
		{
		uint32_t status, l2_err, memio_err;
		#ifdef DUMP_L2_ECC_TAG_ON_ERROR
		uint64_t l2_tag;
		#endif

		/* Destructive read, clears register and interrupt */
		status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
		/* Bit 31 is always on, but there's no #define for that */
		if (status & ~(1UL << 31)) {
		l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
		#ifdef DUMP_L2_ECC_TAG_ON_ERROR
		l2_tag = in64(IO_SPACE_BASE \| A_L2_ECC_TAG);
		#endif
		memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
		prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
		prom_printf("\nLast recorded signature:\n");
		@@ -153,6 +165,9 @@ static void check_bus_watcher(void)
		(int)(G_SCD_BERR_TID(status) >> 6),
		(int)G_SCD_BERR_RID(status),
		(int)G_SCD_BERR_DCODE(status));
		#ifdef DUMP_L2_ECC_TAG_ON_ERROR
		prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag);
		#endif
		} else {
		prom_printf("Bus watcher indicates no error\n");
		}
		@@ -166,6 +181,16 @@ asmlinkage void sb1_cache_error(void)
		uint64_t cerr_dpa;
		uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;

		#ifdef CONFIG_SIBYTE_BW_TRACE
		/* Freeze the trace buffer now */
		#if defined(CONFIG_SIBYTE_BCM1x55) \|\| defined(CONFIG_SIBYTE_BCM1x80)
		csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE \| A_SCD_TRACE_CFG);
		#else
		csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE \| A_SCD_TRACE_CFG);
		#endif
		prom_printf("Trace buffer frozen\n");
		#endif

		prom_printf("Cache error exception on CPU %x:\n",
		(read_c0_prid() >> 25) & 0x7);

		@@ -229,11 +254,19 @@ asmlinkage void sb1_cache_error(void)

		check_bus_watcher();

		while (1);
		/*
		* This tends to make things get really ugly; let's just stall instead.
		* panic("Can't handle the cache error!");
		* Calling panic() when a fatal cache error occurs scrambles the
		* state of the system (and the cache), making it difficult to
		* investigate after the fact. However, if you just stall the CPU,
		* the other CPU may keep on running, which is typically very
		* undesirable.
		*/
		#ifdef CONFIG_SB1_CERR_STALL
		while (1)
		;
		#else
		panic("unhandled cache error");
		#endif
		}


		@@ -434,7 +467,8 @@ static struct dc_state dc_states[] = {
		};

		#define DC_TAG_VALID(state) \
		(((state) == 0xf) \|\| ((state) == 0x13) \|\| ((state) == 0x19) \|\| ((state == 0x16)) \|\| ((state) == 0x1c))
		(((state) == 0x0) \|\| ((state) == 0xf) \|\| ((state) == 0x13) \|\| \
		((state) == 0x19) \|\| ((state) == 0x16) \|\| ((state) == 0x1c))

		static char *dc_state_str(unsigned char state)
		{
		@@ -505,6 +539,7 @@ static uint32_t extract_dc(unsigned short addr, int data)
		uint64_t datalo;
		uint32_t datalohi, datalolo, datahi;
		int offset;
		char bad_ecc = 0;

		for (offset = 0; offset < 4; offset++) {
		/* Index-load-data-D */
		@@ -525,8 +560,7 @@ static uint32_t extract_dc(unsigned short addr, int data)
		ecc = dc_ecc(datalo);
		if (ecc != datahi) {
		int bits = 0;
		prom_printf(" ** bad ECC (%02x %02x) ->",
		datahi, ecc);
		bad_ecc \|= 1 << (3-offset);
		ecc ^= datahi;
		while (ecc) {
		if (ecc & 1) bits++;
		@@ -537,6 +571,10 @@ static uint32_t extract_dc(unsigned short addr, int data)
		prom_printf(" %02X-%016llX", datahi, datalo);
		}
		prom_printf("\n");
		if (bad_ecc)
		prom_printf(" dwords w/ bad ECC: %d %d %d %d\n",
		!!(bad_ecc & 8), !!(bad_ecc & 4),
		!!(bad_ecc & 2), !!(bad_ecc & 1));
		}
		}
		return res;

arch/mips/mm/cex-sb1.S

+5 −0

Original line number	Diff line number	Diff line
		@@ -64,6 +64,10 @@ LEAF(except_vec2_sb1)
		sd k0,0x170($0)
		sd k1,0x178($0)

		#if CONFIG_SB1_CEX_ALWAYS_FATAL
		j handle_vec2_sb1
		nop
		#else
		/*
		* M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
		* if we can fast-path out of here for a h/w-recovered error.
		@@ -134,6 +138,7 @@ unrecoverable:
		/* Unrecoverable Icache or Dcache error; log it and/or fail */
		j handle_vec2_sb1
		nop
		#endif

		END(except_vec2_sb1)

arch/mips/sibyte/Kconfig

+8 −0

Original line number	Diff line number	Diff line
		@@ -102,6 +102,14 @@ config SIMULATION
		Build a kernel suitable for running under the GDB simulator.
		Primarily adjusts the kernel's notion of time.

		config CONFIG_SB1_CEX_ALWAYS_FATAL
		bool "All cache exceptions considered fatal (no recovery attempted)"
		depends on SIBYTE_SB1xxx_SOC

		config CONFIG_SB1_CERR_STALL
		bool "Stall (rather than panic) on fatal cache error"
		depends on SIBYTE_SB1xxx_SOC

		config SIBYTE_CFE
		bool "Booting from CFE"
		depends on SIBYTE_SB1xxx_SOC