Merge tag 'mce-recovery-for-tip' of... (4e9f44ba) · Commits · e / devices / android_kernel_teracube_emerald

arch/x86/kernel/cpu/mcheck/mce-severity.c

+15 −1

Original line number	Diff line number	Diff line
		@@ -54,6 +54,7 @@ static struct severity {
		#define MASK(x, y) .mask = x, .result = y
		#define MCI_UC_S (MCI_STATUS_UC\|MCI_STATUS_S)
		#define MCI_UC_SAR (MCI_STATUS_UC\|MCI_STATUS_S\|MCI_STATUS_AR)
		#define MCI_ADDR (MCI_STATUS_ADDRV\|MCI_STATUS_MISCV)
		#define MCACOD 0xffff

		MCESEV(
		@@ -102,11 +103,24 @@ static struct severity {
		SER, BITCLR(MCI_STATUS_S)
		),

		/* AR add known MCACODs here */
		MCESEV(
		PANIC, "Action required with lost events",
		SER, BITSET(MCI_STATUS_OVER\|MCI_UC_SAR)
		),

		/* known AR MCACODs: */
		#ifdef CONFIG_MEMORY_FAILURE
		MCESEV(
		KEEP, "HT thread notices Action required: data load error",
		SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|0x0134),
		MCGMASK(MCG_STATUS_EIPV, 0)
		),
		MCESEV(
		AR, "Action required: data load error",
		SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|0x0134),
		USER
		),
		#endif
		MCESEV(
		PANIC, "Action required: unknown MCACOD",
		SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR, MCI_UC_SAR)

arch/x86/kernel/cpu/mcheck/mce.c

+126 −53

Original line number	Diff line number	Diff line
		@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs)
		irq_work_queue(&__get_cpu_var(mce_irq_work));
		}

		/*
		* Read ADDR and MISC registers.
		*/
		static void mce_read_aux(struct mce *m, int i)
		{
		if (m->status & MCI_STATUS_MISCV)
		m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
		if (m->status & MCI_STATUS_ADDRV) {
		m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));

		/*
		* Mask the reported address by the reported granularity.
		*/
		if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
		u8 shift = MCI_MISC_ADDR_LSB(m->misc);
		m->addr >>= shift;
		m->addr <<= shift;
		}
		}
		}

		DEFINE_PER_CPU(unsigned, mce_poll_count);

		/*
		@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
		(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
		continue;

		if (m.status & MCI_STATUS_MISCV)
		m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
		if (m.status & MCI_STATUS_ADDRV)
		m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
		mce_read_aux(&m, i);

		if (!(flags & MCP_TIMESTAMP))
		m.tsc = 0;
		@@ -916,6 +934,49 @@ static void mce_clear_state(unsigned long *toclear)
		}
		}

		/*
		* Need to save faulting physical address associated with a process
		* in the machine check handler some place where we can grab it back
		* later in mce_notify_process()
		*/
		#define MCE_INFO_MAX 16

		struct mce_info {
		atomic_t inuse;
		struct task_struct *t;
		__u64 paddr;
		} mce_info[MCE_INFO_MAX];

		static void mce_save_info(__u64 addr)
		{
		struct mce_info *mi;

		for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
		if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
		mi->t = current;
		mi->paddr = addr;
		return;
		}
		}

		mce_panic("Too many concurrent recoverable errors", NULL, NULL);
		}

		static struct mce_info *mce_find_info(void)
		{
		struct mce_info *mi;

		for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
		if (atomic_read(&mi->inuse) && mi->t == current)
		return mi;
		return NULL;
		}

		static void mce_clear_info(struct mce_info *mi)
		{
		atomic_set(&mi->inuse, 0);
		}

		/*
		* The actual machine check handler. This only handles real
		* exceptions when something got corrupted coming in through int 18.
		@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
		barrier();

		/*
		* When no restart IP must always kill or panic.
		* When no restart IP might need to kill or panic.
		* Assume the worst for now, but if we find the
		* severity is MCE_AR_SEVERITY we have other options.
		*/
		if (!(m.mcgstatus & MCG_STATUS_RIPV))
		kill_it = 1;
		@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
		continue;
		}

		/*
		* Kill on action required.
		*/
		if (severity == MCE_AR_SEVERITY)
		kill_it = 1;

		if (m.status & MCI_STATUS_MISCV)
		m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
		if (m.status & MCI_STATUS_ADDRV)
		m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
		mce_read_aux(&m, i);

		/*
		* Action optional error. Queue address for later processing.
		@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
		}
		}

		/* mce_clear_state will clear final, save locally for use later /
		m = *final;

		if (!no_way_out)
		mce_clear_state(toclear);

		@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
		no_way_out = worst >= MCE_PANIC_SEVERITY;

		/*
		* If we have decided that we just CAN'T continue, and the user
		* has not set tolerant to an insane level, give up and die.
		*
		* This is mainly used in the case when the system doesn't
		* support MCE broadcasting or it has been disabled.
		*/
		if (no_way_out && tolerant < 3)
		mce_panic("Fatal machine check on current CPU", final, msg);

		/*
		* If the error seems to be unrecoverable, something should be
		* done. Try to kill as little as possible. If we can kill just
		* one task, do that. If the user has set the tolerance very
		* high, don't try to do anything at all.
		* At insane "tolerant" levels we take no action. Otherwise
		* we only die if we have no other choice. For less serious
		* issues we try to recover, or limit damage to the current
		* process.
		*/

		if (kill_it && tolerant < 3)
		force_sig(SIGBUS, current);

		/* notify userspace ASAP */
		if (tolerant < 3) {
		if (no_way_out)
		mce_panic("Fatal machine check on current CPU", &m, msg);
		if (worst == MCE_AR_SEVERITY) {
		/* schedule action before return to userland */
		mce_save_info(m.addr);
		set_thread_flag(TIF_MCE_NOTIFY);
		} else if (kill_it) {
		force_sig(SIGBUS, current);
		}
		}

		if (worst > 0)
		mce_report_event(regs);
		@@ -1094,34 +1146,57 @@ void do_machine_check(struct pt_regs *regs, long error_code)
		}
		EXPORT_SYMBOL_GPL(do_machine_check);

		/* dummy to break dependency. actual code is in mm/memory-failure.c */
		void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
		#ifndef CONFIG_MEMORY_FAILURE
		int memory_failure(unsigned long pfn, int vector, int flags)
		{
		printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
		/* mce_severity() should not hand us an ACTION_REQUIRED error */
		BUG_ON(flags & MF_ACTION_REQUIRED);
		printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
		"Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);

		return 0;
		}
		#endif

		/*
		* Called after mce notification in process context. This code
		* is allowed to sleep. Call the high level VM handler to process
		* any corrupted pages.
		* Assume that the work queue code only calls this one at a time
		* per CPU.
		* Note we don't disable preemption, so this code might run on the wrong
		* CPU. In this case the event is picked up by the scheduled work queue.
		* This is merely a fast path to expedite processing in some common
		* cases.
		* Called in process context that interrupted by MCE and marked with
		* TIF_MCE_NOTIFY, just before returning to erroneous userland.
		* This code is allowed to sleep.
		* Attempt possible recovery such as calling the high level VM handler to
		* process any corrupted pages, and kill/signal current process if required.
		* Action required errors are handled here.
		*/
		void mce_notify_process(void)
		{
		unsigned long pfn;
		mce_notify_irq();
		while (mce_ring_get(&pfn))
		memory_failure(pfn, MCE_VECTOR);
		struct mce_info *mi = mce_find_info();

		if (!mi)
		mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
		pfn = mi->paddr >> PAGE_SHIFT;

		clear_thread_flag(TIF_MCE_NOTIFY);

		pr_err("Uncorrected hardware memory error in user-access at %llx",
		mi->paddr);
		if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
		pr_err("Memory error not recovered");
		force_sig(SIGBUS, current);
		}
		mce_clear_info(mi);
		}

		/*
		* Action optional processing happens here (picking up
		* from the list of faulting pages that do_machine_check()
		* placed into the "ring").
		*/
		static void mce_process_work(struct work_struct *dummy)
		{
		mce_notify_process();
		unsigned long pfn;

		while (mce_ring_get(&pfn))
		memory_failure(pfn, MCE_VECTOR, 0);
		}

		#ifdef CONFIG_X86_MCE_INTEL
		@@ -1211,8 +1286,6 @@ int mce_notify_irq(void)
		/* Not more than two messages every minute */
		static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);

		clear_thread_flag(TIF_MCE_NOTIFY);

		if (test_and_clear_bit(0, &mce_need_notify)) {
		/* wake processes polling /dev/mcelog */
		wake_up_interruptible(&mce_chrdev_wait);

drivers/base/memory.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -466,7 +466,7 @@ store_hard_offline_page(struct device *dev,
		if (strict_strtoull(buf, 0, &pfn) < 0)
		return -EINVAL;
		pfn >>= PAGE_SHIFT;
		ret = __memory_failure(pfn, 0, 0);
		ret = memory_failure(pfn, 0, 0);
		return ret ? ret : count;
		}

include/linux/mm.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -1598,9 +1598,9 @@ void vmemmap_populate_print_last(void);

		enum mf_flags {
		MF_COUNT_INCREASED = 1 << 0,
		MF_ACTION_REQUIRED = 1 << 1,
		};
		extern void memory_failure(unsigned long pfn, int trapno);
		extern int __memory_failure(unsigned long pfn, int trapno, int flags);
		extern int memory_failure(unsigned long pfn, int trapno, int flags);
		extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
		extern int unpoison_memory(unsigned long pfn);
		extern int sysctl_memory_failure_early_kill;

mm/hwpoison-inject.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -45,7 +45,7 @@ static int hwpoison_inject(void *data, u64 val)
		* do a racy check with elevated page count, to make sure PG_hwpoison
		* will only be set for the targeted owner (or on a free page).
		* We temporarily take page lock for try_get_mem_cgroup_from_page().
		* __memory_failure() will redo the check reliably inside page lock.
		* memory_failure() will redo the check reliably inside page lock.
		*/
		lock_page(hpage);
		err = hwpoison_filter(hpage);
		@@ -55,7 +55,7 @@ static int hwpoison_inject(void *data, u64 val)

		inject:
		printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn);
		return __memory_failure(pfn, 18, MF_COUNT_INCREASED);
		return memory_failure(pfn, 18, MF_COUNT_INCREASED);
		}

		static int hwpoison_unpoison(void *data, u64 val)