Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e8779776 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, mce: Use HW_ERR in MCE handler
  x86, mce: Add HW_ERR printk prefix for hardware error logging
  x86, mce: Fix MSR_IA32_MCI_CTL2 CMCI threshold setup
  x86, mce: Rename MSR_IA32_MCx_CTL2 value
parents 3cf8ad33 a2d7b0d4
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -38,6 +38,10 @@
#define MCM_ADDR_MEM	 3	/* memory address */
#define MCM_ADDR_GENERIC 7	/* generic */

/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN		(1ULL << 30)
#define MCI_CTL2_CMCI_THRESHOLD_MASK	0x7fffULL

#define MCJ_CTX_MASK		3
#define MCJ_CTX(flags)		((flags) & MCJ_CTX_MASK)
#define MCJ_CTX_RANDOM		0    /* inject context: random */
+0 −3
Original line number Diff line number Diff line
@@ -96,9 +96,6 @@
#define MSR_IA32_MC0_CTL2		0x00000280
#define MSR_IA32_MCx_CTL2(x)		(MSR_IA32_MC0_CTL2 + (x))

#define CMCI_EN			(1ULL << 30)
#define CMCI_THRESHOLD_MASK		0xffffULL

#define MSR_P6_PERFCTR0			0x000000c1
#define MSR_P6_PERFCTR1			0x000000c2
#define MSR_P6_EVNTSEL0			0x00000186
+10 −22
Original line number Diff line number Diff line
@@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
static int default_decode_mce(struct notifier_block *nb, unsigned long val,
			       void *data)
{
	pr_emerg("No human readable MCE decoding support on this CPU type.\n");
	pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
	pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
	pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");

	return NOTIFY_STOP;
}
@@ -211,11 +211,11 @@ void mce_log(struct mce *mce)

static void print_mce(struct mce *m)
{
	pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
	       m->extcpu, m->mcgstatus, m->bank, m->status);

	if (m->ip) {
		pr_emerg("RIP%s %02x:<%016Lx> ",
		pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
			!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
				m->cs, m->ip);

@@ -224,14 +224,14 @@ static void print_mce(struct mce *m)
		pr_cont("\n");
	}

	pr_emerg("TSC %llx ", m->tsc);
	pr_emerg(HW_ERR "TSC %llx ", m->tsc);
	if (m->addr)
		pr_cont("ADDR %llx ", m->addr);
	if (m->misc)
		pr_cont("MISC %llx ", m->misc);

	pr_cont("\n");
	pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);

	/*
@@ -241,16 +241,6 @@ static void print_mce(struct mce *m)
	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}

static void print_mce_head(void)
{
	pr_emerg("\nHARDWARE ERROR\n");
}

static void print_mce_tail(void)
{
	pr_emerg("This is not a software problem!\n");
}

#define PANIC_TIMEOUT 5 /* 5 seconds */

static atomic_t mce_paniced;
@@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
		if (atomic_inc_return(&mce_fake_paniced) > 1)
			return;
	}
	print_mce_head();
	/* First print corrected ones that are still unlogged */
	for (i = 0; i < MCE_LOG_LEN; i++) {
		struct mce *m = &mcelog.entry[i];
@@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
			apei_err = apei_write_mce(final);
	}
	if (cpu_missing)
		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
	print_mce_tail();
		pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
	if (exp)
		printk(KERN_EMERG "Machine check: %s\n", exp);
		pr_emerg(HW_ERR "Machine check: %s\n", exp);
	if (!fake_panic) {
		if (panic_timeout == 0)
			panic_timeout = mce_panic_timeout;
		panic(msg);
	} else
		printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
		pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
}

/* Support code for software error injection */
@@ -1221,7 +1209,7 @@ int mce_notify_irq(void)
			schedule_work(&mce_trigger_work);

		if (__ratelimit(&ratelimit))
			printk(KERN_INFO "Machine check events logged\n");
			pr_info(HW_ERR "Machine check events logged\n");

		return 1;
	}
+5 −4
Original line number Diff line number Diff line
@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot)
		rdmsrl(MSR_IA32_MCx_CTL2(i), val);

		/* Already owned by someone else? */
		if (val & CMCI_EN) {
		if (val & MCI_CTL2_CMCI_EN) {
			if (test_and_clear_bit(i, owned) && !boot)
				print_update("SHD", &hdr, i);
			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			continue;
		}

		val |= CMCI_EN | CMCI_THRESHOLD;
		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
		val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
		rdmsrl(MSR_IA32_MCx_CTL2(i), val);

		/* Did the enable bit stick? -- the bank supports CMCI */
		if (val & CMCI_EN) {
		if (val & MCI_CTL2_CMCI_EN) {
			if (!test_and_set_bit(i, owned) && !boot)
				print_update("CMCI", &hdr, i);
			__clear_bit(i, __get_cpu_var(mce_poll_banks));
@@ -155,7 +156,7 @@ void cmci_clear(void)
			continue;
		/* Disable CMCI */
		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
		val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
		__clear_bit(i, __get_cpu_var(mce_banks_owned));
	}
+7 −0
Original line number Diff line number Diff line
@@ -252,6 +252,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp);
#define FW_WARN		"[Firmware Warn]: "
#define FW_INFO		"[Firmware Info]: "

/*
 * HW_ERR
 * Add this to a message for hardware errors, so that user can report
 * it to hardware vendor instead of LKML or software vendor.
 */
#define HW_ERR		"[Hardware Error]: "

#ifdef CONFIG_PRINTK
asmlinkage int vprintk(const char *fmt, va_list args)
	__attribute__ ((format (printf, 1, 0)));