Loading arch/x86/include/asm/mce.h +51 −12 Original line number Diff line number Diff line Loading @@ -102,15 +102,39 @@ struct mce_log { #ifdef __KERNEL__ #include <linux/percpu.h> #include <linux/init.h> #include <asm/atomic.h> extern int mce_disabled; extern int mce_p5_enabled; #include <asm/atomic.h> #include <linux/percpu.h> #ifdef CONFIG_X86_MCE void mcheck_init(struct cpuinfo_x86 *c); #else static inline void mcheck_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_OLD_MCE extern int nr_mce_banks; void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); #endif #ifdef CONFIG_X86_ANCIENT_MCE void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * To support more than 128 would need to escape the predefined Loading Loading @@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); void mce_log_therm_throt_event(__u64 status); extern atomic_t mce_entry; void do_machine_check(struct pt_regs *, long); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); Loading @@ -167,13 +187,32 @@ void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; #ifdef CONFIG_X86_MCE void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do { } while (0) #endif /* * Exception handler */ /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); void do_machine_check(struct pt_regs *, long); /* * Threshold handler */ extern void (*mce_threshold_vector)(void); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * Thermal handler */ void intel_init_thermal(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_NEW_MCE void mce_log_therm_throt_event(__u64 status); #else static inline void mce_log_therm_throt_event(__u64 status) {} #endif #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ arch/x86/include/asm/therm_throt.hdeleted 100644 → 0 +0 −9 Original line number Diff line number Diff line #ifndef _ASM_X86_THERM_THROT_H #define _ASM_X86_THERM_THROT_H #include <asm/atomic.h> extern atomic_t therm_throt_en; int therm_throt_process(int curr); #endif /* _ASM_X86_THERM_THROT_H */ arch/x86/kernel/cpu/mcheck/Makefile +5 −4 Original line number Diff line number Diff line obj-y = mce.o therm_throt.o obj-y = mce.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o arch/x86/kernel/cpu/mcheck/k7.c +1 −2 Original line number Diff line number Diff line Loading @@ -10,10 +10,9 @@ #include <asm/processor.h> #include <asm/system.h> #include <asm/mce.h> #include <asm/msr.h> #include "mce.h" /* Machine Check Handler For AMD Athlon/Duron: */ static void k7_machine_check(struct pt_regs *regs, long error_code) { Loading arch/x86/kernel/cpu/mcheck/mce.c +158 −73 Original line number Diff line number Diff line Loading @@ -44,7 +44,6 @@ #include <asm/msr.h> #include "mce-internal.h" #include "mce.h" /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) Loading @@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; int mce_disabled; int mce_disabled __read_mostly; #ifdef CONFIG_X86_NEW_MCE Loading @@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors * 3: never panic or SIGBUS, log all errors (for testing only) */ static int tolerant = 1; static int banks; static u64 *bank; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; static int monarch_timeout = -1; static int mce_panic_timeout; static int mce_dont_log_ce; int mce_cmci_disabled; int mce_ignore_ce; int mce_ser; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; static int tolerant __read_mostly = 1; static int banks __read_mostly; static u64 *bank __read_mostly; static int rip_msr __read_mostly; static int mce_bootlog __read_mostly = -1; static int monarch_timeout __read_mostly = -1; static int mce_panic_timeout __read_mostly; static int mce_dont_log_ce __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; /* User mode helper program triggered by machine check event */ static unsigned long mce_need_notify; static char mce_helper[128]; static char *mce_helper_argv[2] = { mce_helper, NULL }; static unsigned long dont_init_banks; Loading Loading @@ -180,7 +180,7 @@ void mce_log(struct mce *mce) wmb(); mce->finished = 1; set_bit(0, ¬ify_user); set_bit(0, &mce_need_notify); } static void print_mce(struct mce *m) Loading Loading @@ -691,18 +691,21 @@ static atomic_t global_nwo; * in the entry order. * TBD double check parallel CPU hotunplug */ static int mce_start(int no_way_out, int *order) static int mce_start(int *no_way_out) { int nwo; int order; int cpus = num_online_cpus(); u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; if (!timeout) { *order = -1; return no_way_out; } if (!timeout) return -1; atomic_add(no_way_out, &global_nwo); atomic_add(*no_way_out, &global_nwo); /* * global_nwo should be updated before mce_callin */ smp_wmb(); order = atomic_add_return(1, &mce_callin); /* * Wait for everyone. Loading @@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) while (atomic_read(&mce_callin) != cpus) { if (mce_timed_out(&timeout)) { atomic_set(&global_nwo, 0); *order = -1; return no_way_out; return -1; } ndelay(SPINUNIT); } /* * Cache the global no_way_out state. * mce_callin should be read before global_nwo */ nwo = atomic_read(&global_nwo); smp_rmb(); if (order == 1) { /* * Monarch starts executing now, the others wait. * Monarch: Starts executing now, the others wait. */ if (*order == 1) { atomic_set(&mce_executing, 1); return nwo; } } else { /* * Now start the scanning loop one by one * in the original callin order. * This way when there are any shared banks it will * be only seen by one CPU before cleared, avoiding duplicates. * Subject: Now start the scanning loop one by one in * the original callin order. * This way when there are any shared banks it will be * only seen by one CPU before cleared, avoiding duplicates. */ while (atomic_read(&mce_executing) < *order) { while (atomic_read(&mce_executing) < order) { if (mce_timed_out(&timeout)) { atomic_set(&global_nwo, 0); *order = -1; return no_way_out; return -1; } ndelay(SPINUNIT); } return nwo; } /* * Cache the global no_way_out state. */ *no_way_out = atomic_read(&global_nwo); return order; } /* Loading Loading @@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) * check handler. */ int order; /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If tolerant is cranked up, we'll try anyway. Loading @@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (!banks) goto out; order = atomic_add_return(1, &mce_callin); mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); Loading @@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * This way we don't report duplicated events on shared banks * because the first one to see it will clear it. */ no_way_out = mce_start(no_way_out, &order); order = mce_start(&no_way_out); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); if (!bank[i]) Loading Loading @@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data) static void mce_do_trigger(struct work_struct *work) { call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); } static DECLARE_WORK(mce_trigger_work, mce_do_trigger); Loading @@ -1135,7 +1139,7 @@ int mce_notify_irq(void) clear_thread_flag(TIF_MCE_NOTIFY); if (test_and_clear_bit(0, ¬ify_user)) { if (test_and_clear_bit(0, &mce_need_notify)) { wake_up_interruptible(&mce_wait); /* Loading @@ -1143,7 +1147,7 @@ int mce_notify_irq(void) * work_pending is always cleared before the function is * executed. */ if (trigger[0] && !work_pending(&mce_trigger_work)) if (mce_helper[0] && !work_pending(&mce_trigger_work)) schedule_work(&mce_trigger_work); if (__ratelimit(&ratelimit)) Loading Loading @@ -1282,7 +1286,6 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) return; switch (c->x86_vendor) { case X86_VENDOR_INTEL: if (mce_p5_enabled()) intel_p5_mcheck_init(c); break; case X86_VENDOR_CENTAUR: Loading Loading @@ -1609,7 +1612,8 @@ static int mce_resume(struct sys_device *dev) static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); if (mce_available(¤t_cpu_data)) if (!mce_available(¤t_cpu_data)) return; mce_init(); mce_init_timer(); } Loading @@ -1620,6 +1624,26 @@ static void mce_restart(void) on_each_cpu(mce_cpu_restart, NULL, 1); } /* Toggle features for corrected errors */ static void mce_disable_ce(void *all) { if (!mce_available(¤t_cpu_data)) return; if (all) del_timer_sync(&__get_cpu_var(mce_timer)); cmci_clear(); } static void mce_enable_ce(void *all) { if (!mce_available(¤t_cpu_data)) return; cmci_reenable(); cmci_recheck(); if (all) mce_init_timer(); } static struct sysdev_class mce_sysclass = { .suspend = mce_suspend, .shutdown = mce_shutdown, Loading Loading @@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { strcpy(buf, trigger); strcpy(buf, mce_helper); strcat(buf, "\n"); return strlen(trigger) + 1; return strlen(mce_helper) + 1; } static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, Loading @@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *p; int len; strncpy(trigger, buf, sizeof(trigger)); trigger[sizeof(trigger)-1] = 0; len = strlen(trigger); p = strchr(trigger, '\n'); strncpy(mce_helper, buf, sizeof(mce_helper)); mce_helper[sizeof(mce_helper)-1] = 0; len = strlen(mce_helper); p = strchr(mce_helper, '\n'); if (*p) *p = 0; Loading @@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, return len; } static ssize_t set_ignore_ce(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) { u64 new; if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; if (mce_ignore_ce ^ !!new) { if (new) { /* disable ce features */ on_each_cpu(mce_disable_ce, (void *)1, 1); mce_ignore_ce = 1; } else { /* enable ce features */ mce_ignore_ce = 0; on_each_cpu(mce_enable_ce, (void *)1, 1); } } return size; } static ssize_t set_cmci_disabled(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) { u64 new; if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; if (mce_cmci_disabled ^ !!new) { if (new) { /* disable cmci */ on_each_cpu(mce_disable_ce, NULL, 1); mce_cmci_disabled = 1; } else { /* enable cmci */ mce_cmci_disabled = 0; on_each_cpu(mce_enable_ce, NULL, 1); } } return size; } static ssize_t store_int_with_restart(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) Loading @@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); static struct sysdev_ext_attribute attr_check_interval = { _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, Loading @@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { &check_interval }; static struct sysdev_ext_attribute attr_ignore_ce = { _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), &mce_ignore_ce }; static struct sysdev_ext_attribute attr_cmci_disabled = { _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), &mce_cmci_disabled }; static struct sysdev_attribute *mce_attrs[] = { &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, &attr_monarch_timeout.attr, &attr_dont_log_ce.attr, &attr_ignore_ce.attr, &attr_cmci_disabled.attr, NULL }; Loading @@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; static __cpuinit int mce_create_device(unsigned int cpu) { int err; int i; int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; Loading @@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } for (i = 0; i < banks; i++) { for (j = 0; j < banks; j++) { err = sysdev_create_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); &bank_attrs[j]); if (err) goto error2; } Loading @@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) return 0; error2: while (--i >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); while (--j >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); error: while (--i >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); Loading Loading @@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) Loading Loading @@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled == 1) if (mce_disabled) return; switch (c->x86_vendor) { Loading Loading @@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_enable(char *str) { mce_disabled = -1; mce_p5_enabled = 1; return 1; } __setup("mce", mcheck_enable); #endif /* CONFIG_X86_OLD_MCE */ Loading Loading
arch/x86/include/asm/mce.h +51 −12 Original line number Diff line number Diff line Loading @@ -102,15 +102,39 @@ struct mce_log { #ifdef __KERNEL__ #include <linux/percpu.h> #include <linux/init.h> #include <asm/atomic.h> extern int mce_disabled; extern int mce_p5_enabled; #include <asm/atomic.h> #include <linux/percpu.h> #ifdef CONFIG_X86_MCE void mcheck_init(struct cpuinfo_x86 *c); #else static inline void mcheck_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_OLD_MCE extern int nr_mce_banks; void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); #endif #ifdef CONFIG_X86_ANCIENT_MCE void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * To support more than 128 would need to escape the predefined Loading Loading @@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); void mce_log_therm_throt_event(__u64 status); extern atomic_t mce_entry; void do_machine_check(struct pt_regs *, long); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); Loading @@ -167,13 +187,32 @@ void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; #ifdef CONFIG_X86_MCE void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do { } while (0) #endif /* * Exception handler */ /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); void do_machine_check(struct pt_regs *, long); /* * Threshold handler */ extern void (*mce_threshold_vector)(void); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * Thermal handler */ void intel_init_thermal(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_NEW_MCE void mce_log_therm_throt_event(__u64 status); #else static inline void mce_log_therm_throt_event(__u64 status) {} #endif #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */
arch/x86/include/asm/therm_throt.hdeleted 100644 → 0 +0 −9 Original line number Diff line number Diff line #ifndef _ASM_X86_THERM_THROT_H #define _ASM_X86_THERM_THROT_H #include <asm/atomic.h> extern atomic_t therm_throt_en; int therm_throt_process(int curr); #endif /* _ASM_X86_THERM_THROT_H */
arch/x86/kernel/cpu/mcheck/Makefile +5 −4 Original line number Diff line number Diff line obj-y = mce.o therm_throt.o obj-y = mce.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
arch/x86/kernel/cpu/mcheck/k7.c +1 −2 Original line number Diff line number Diff line Loading @@ -10,10 +10,9 @@ #include <asm/processor.h> #include <asm/system.h> #include <asm/mce.h> #include <asm/msr.h> #include "mce.h" /* Machine Check Handler For AMD Athlon/Duron: */ static void k7_machine_check(struct pt_regs *regs, long error_code) { Loading
arch/x86/kernel/cpu/mcheck/mce.c +158 −73 Original line number Diff line number Diff line Loading @@ -44,7 +44,6 @@ #include <asm/msr.h> #include "mce-internal.h" #include "mce.h" /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) Loading @@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; int mce_disabled; int mce_disabled __read_mostly; #ifdef CONFIG_X86_NEW_MCE Loading @@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors * 3: never panic or SIGBUS, log all errors (for testing only) */ static int tolerant = 1; static int banks; static u64 *bank; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; static int monarch_timeout = -1; static int mce_panic_timeout; static int mce_dont_log_ce; int mce_cmci_disabled; int mce_ignore_ce; int mce_ser; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; static int tolerant __read_mostly = 1; static int banks __read_mostly; static u64 *bank __read_mostly; static int rip_msr __read_mostly; static int mce_bootlog __read_mostly = -1; static int monarch_timeout __read_mostly = -1; static int mce_panic_timeout __read_mostly; static int mce_dont_log_ce __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; /* User mode helper program triggered by machine check event */ static unsigned long mce_need_notify; static char mce_helper[128]; static char *mce_helper_argv[2] = { mce_helper, NULL }; static unsigned long dont_init_banks; Loading Loading @@ -180,7 +180,7 @@ void mce_log(struct mce *mce) wmb(); mce->finished = 1; set_bit(0, ¬ify_user); set_bit(0, &mce_need_notify); } static void print_mce(struct mce *m) Loading Loading @@ -691,18 +691,21 @@ static atomic_t global_nwo; * in the entry order. * TBD double check parallel CPU hotunplug */ static int mce_start(int no_way_out, int *order) static int mce_start(int *no_way_out) { int nwo; int order; int cpus = num_online_cpus(); u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; if (!timeout) { *order = -1; return no_way_out; } if (!timeout) return -1; atomic_add(no_way_out, &global_nwo); atomic_add(*no_way_out, &global_nwo); /* * global_nwo should be updated before mce_callin */ smp_wmb(); order = atomic_add_return(1, &mce_callin); /* * Wait for everyone. Loading @@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) while (atomic_read(&mce_callin) != cpus) { if (mce_timed_out(&timeout)) { atomic_set(&global_nwo, 0); *order = -1; return no_way_out; return -1; } ndelay(SPINUNIT); } /* * Cache the global no_way_out state. * mce_callin should be read before global_nwo */ nwo = atomic_read(&global_nwo); smp_rmb(); if (order == 1) { /* * Monarch starts executing now, the others wait. * Monarch: Starts executing now, the others wait. */ if (*order == 1) { atomic_set(&mce_executing, 1); return nwo; } } else { /* * Now start the scanning loop one by one * in the original callin order. * This way when there are any shared banks it will * be only seen by one CPU before cleared, avoiding duplicates. * Subject: Now start the scanning loop one by one in * the original callin order. * This way when there are any shared banks it will be * only seen by one CPU before cleared, avoiding duplicates. */ while (atomic_read(&mce_executing) < *order) { while (atomic_read(&mce_executing) < order) { if (mce_timed_out(&timeout)) { atomic_set(&global_nwo, 0); *order = -1; return no_way_out; return -1; } ndelay(SPINUNIT); } return nwo; } /* * Cache the global no_way_out state. */ *no_way_out = atomic_read(&global_nwo); return order; } /* Loading Loading @@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) * check handler. */ int order; /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If tolerant is cranked up, we'll try anyway. Loading @@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (!banks) goto out; order = atomic_add_return(1, &mce_callin); mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); Loading @@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * This way we don't report duplicated events on shared banks * because the first one to see it will clear it. */ no_way_out = mce_start(no_way_out, &order); order = mce_start(&no_way_out); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); if (!bank[i]) Loading Loading @@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data) static void mce_do_trigger(struct work_struct *work) { call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); } static DECLARE_WORK(mce_trigger_work, mce_do_trigger); Loading @@ -1135,7 +1139,7 @@ int mce_notify_irq(void) clear_thread_flag(TIF_MCE_NOTIFY); if (test_and_clear_bit(0, ¬ify_user)) { if (test_and_clear_bit(0, &mce_need_notify)) { wake_up_interruptible(&mce_wait); /* Loading @@ -1143,7 +1147,7 @@ int mce_notify_irq(void) * work_pending is always cleared before the function is * executed. */ if (trigger[0] && !work_pending(&mce_trigger_work)) if (mce_helper[0] && !work_pending(&mce_trigger_work)) schedule_work(&mce_trigger_work); if (__ratelimit(&ratelimit)) Loading Loading @@ -1282,7 +1286,6 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) return; switch (c->x86_vendor) { case X86_VENDOR_INTEL: if (mce_p5_enabled()) intel_p5_mcheck_init(c); break; case X86_VENDOR_CENTAUR: Loading Loading @@ -1609,7 +1612,8 @@ static int mce_resume(struct sys_device *dev) static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); if (mce_available(¤t_cpu_data)) if (!mce_available(¤t_cpu_data)) return; mce_init(); mce_init_timer(); } Loading @@ -1620,6 +1624,26 @@ static void mce_restart(void) on_each_cpu(mce_cpu_restart, NULL, 1); } /* Toggle features for corrected errors */ static void mce_disable_ce(void *all) { if (!mce_available(¤t_cpu_data)) return; if (all) del_timer_sync(&__get_cpu_var(mce_timer)); cmci_clear(); } static void mce_enable_ce(void *all) { if (!mce_available(¤t_cpu_data)) return; cmci_reenable(); cmci_recheck(); if (all) mce_init_timer(); } static struct sysdev_class mce_sysclass = { .suspend = mce_suspend, .shutdown = mce_shutdown, Loading Loading @@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { strcpy(buf, trigger); strcpy(buf, mce_helper); strcat(buf, "\n"); return strlen(trigger) + 1; return strlen(mce_helper) + 1; } static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, Loading @@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *p; int len; strncpy(trigger, buf, sizeof(trigger)); trigger[sizeof(trigger)-1] = 0; len = strlen(trigger); p = strchr(trigger, '\n'); strncpy(mce_helper, buf, sizeof(mce_helper)); mce_helper[sizeof(mce_helper)-1] = 0; len = strlen(mce_helper); p = strchr(mce_helper, '\n'); if (*p) *p = 0; Loading @@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, return len; } static ssize_t set_ignore_ce(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) { u64 new; if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; if (mce_ignore_ce ^ !!new) { if (new) { /* disable ce features */ on_each_cpu(mce_disable_ce, (void *)1, 1); mce_ignore_ce = 1; } else { /* enable ce features */ mce_ignore_ce = 0; on_each_cpu(mce_enable_ce, (void *)1, 1); } } return size; } static ssize_t set_cmci_disabled(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) { u64 new; if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; if (mce_cmci_disabled ^ !!new) { if (new) { /* disable cmci */ on_each_cpu(mce_disable_ce, NULL, 1); mce_cmci_disabled = 1; } else { /* enable cmci */ mce_cmci_disabled = 0; on_each_cpu(mce_enable_ce, NULL, 1); } } return size; } static ssize_t store_int_with_restart(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) Loading @@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); static struct sysdev_ext_attribute attr_check_interval = { _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, Loading @@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { &check_interval }; static struct sysdev_ext_attribute attr_ignore_ce = { _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), &mce_ignore_ce }; static struct sysdev_ext_attribute attr_cmci_disabled = { _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), &mce_cmci_disabled }; static struct sysdev_attribute *mce_attrs[] = { &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, &attr_monarch_timeout.attr, &attr_dont_log_ce.attr, &attr_ignore_ce.attr, &attr_cmci_disabled.attr, NULL }; Loading @@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; static __cpuinit int mce_create_device(unsigned int cpu) { int err; int i; int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; Loading @@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } for (i = 0; i < banks; i++) { for (j = 0; j < banks; j++) { err = sysdev_create_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); &bank_attrs[j]); if (err) goto error2; } Loading @@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) return 0; error2: while (--i >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); while (--j >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); error: while (--i >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); Loading Loading @@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) Loading Loading @@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled == 1) if (mce_disabled) return; switch (c->x86_vendor) { Loading Loading @@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_enable(char *str) { mce_disabled = -1; mce_p5_enabled = 1; return 1; } __setup("mce", mcheck_enable); #endif /* CONFIG_X86_OLD_MCE */ Loading