Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bd44874d authored by Kyle Yan's avatar Kyle Yan
Browse files

ANDROID: hardlockup: detect hard lockups without NMIs using secondary cpus



This patch adds back the hardlockup detection without NMIs from
<aaf78ec6>("ANDROID: hardlockup: detect hard lockups without NMIs
using secondary cpus") which was reverted after the hard lockup detection
logic was moved to a separate file in <b969a240>("kernel/watchdog.c:
move hardlockup detector to separate file").

Change-Id: If6a24b379d29fa582344fe57d26df4bd2b8cc89f
Signed-off-by: default avatarKyle Yan <kyan@codeaurora.org>
parent f3fbc3a3
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -24,6 +24,9 @@
#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)

DECLARE_PER_CPU(unsigned long, hrtimer_interrupts);
DECLARE_PER_CPU(unsigned long, hrtimer_interrupts_saved);

/**
 * touch_nmi_watchdog - restart NMI watchdog timeout.
 *
@@ -31,8 +34,11 @@
 * may be used to reset the timeout - for code which intentionally
 * disables interrupts for a long time. This call is stateless.
 */
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR_NMI)
#include <asm/nmi.h>
#endif

#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
extern void touch_nmi_watchdog(void);
#else
static inline void touch_nmi_watchdog(void)
@@ -130,6 +136,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif

extern bool is_hardlockup(void);
struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int ,
+9 −2
Original line number Diff line number Diff line
@@ -81,10 +81,10 @@ static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(unsigned int, watchdog_en);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn;

unsigned int __read_mostly softlockup_panic =
@@ -254,6 +254,10 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
{
}

void __weak watchdog_check_hardlockup_other_cpu(void)
{
}

static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);

@@ -271,6 +275,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
	/* kick the hardlockup detector */
	watchdog_interrupt_count();

	/* test for hardlockups on the next cpu */
	watchdog_check_hardlockup_other_cpu();

	/* kick the softlockup detector */
	wake_up_process(__this_cpu_read(softlockup_watchdog));

+110 −0
Original line number Diff line number Diff line
@@ -18,7 +18,11 @@

static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
static cpumask_t __read_mostly watchdog_cpus;
#else
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif

/* boot commands */
/*
@@ -26,7 +30,10 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 */
unsigned int __read_mostly hardlockup_panic =
			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;

#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static unsigned long hardlockup_allcpu_dumped;
#endif
/*
 * We may not want to enable hard lockup detection by default in all cases,
 * for example when running the kernel as a guest on a hypervisor. In these
@@ -68,6 +75,108 @@ void touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(touch_nmi_watchdog);

#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
static unsigned int watchdog_next_cpu(unsigned int cpu)
{
	cpumask_t cpus = watchdog_cpus;
	unsigned int next_cpu;

	next_cpu = cpumask_next(cpu, &cpus);
	if (next_cpu >= nr_cpu_ids)
		next_cpu = cpumask_first(&cpus);

	if (next_cpu == cpu)
		return nr_cpu_ids;

	return next_cpu;
}

static int is_hardlockup_other_cpu(unsigned int cpu)
{
	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);

	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
		return 1;

	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
	return 0;
}

void watchdog_check_hardlockup_other_cpu(void)
{
	unsigned int next_cpu;

	/*
	 * Test for hardlockups every 3 samples.  The sample period is
	 *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
	 *  watchdog_thresh (over by 20%).
	 */
	if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
		return;

	/* check for a hardlockup on the next cpu */
	next_cpu = watchdog_next_cpu(smp_processor_id());
	if (next_cpu >= nr_cpu_ids)
		return;

	smp_rmb();

	if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
		per_cpu(watchdog_nmi_touch, next_cpu) = false;
		return;
	}

	if (is_hardlockup_other_cpu(next_cpu)) {
		/* only warn once */
		if (per_cpu(hard_watchdog_warn, next_cpu) == true)
			return;

		if (hardlockup_panic)
			panic("Watchdog detected hard LOCKUP on cpu %u",
				next_cpu);
		else
			WARN(1, "Watchdog detected hard LOCKUP on cpu %u",
				next_cpu);

		per_cpu(hard_watchdog_warn, next_cpu) = true;
	} else {
		per_cpu(hard_watchdog_warn, next_cpu) = false;
	}
}

int watchdog_nmi_enable(unsigned int cpu)
{
	/*
	 * The new cpu will be marked online before the first hrtimer interrupt
	 * runs on it.  If another cpu tests for a hardlockup on the new cpu
	 * before it has run its first hrtimer, it will get a false positive.
	 * Touch the watchdog on the new cpu to delay the first check for at
	 * least 3 sampling periods to guarantee one hrtimer has run on the new
	 * cpu.
	 */
	per_cpu(watchdog_nmi_touch, cpu) = true;
	smp_wmb();
	cpumask_set_cpu(cpu, &watchdog_cpus);
	return 0;
}

void watchdog_nmi_disable(unsigned int cpu)
{
	unsigned int next_cpu = watchdog_next_cpu(cpu);

	/*
	 * Offlining this cpu will cause the cpu before this one to start
	 * checking the one after this one.  If this cpu just finished checking
	 * the next cpu and updating hrtimer_interrupts_saved, and then the
	 * previous cpu checks it within one sample period, it will trigger a
	 * false positive.  Touch the watchdog on the next cpu to prevent it.
	 */
	if (next_cpu < nr_cpu_ids)
		per_cpu(watchdog_nmi_touch, next_cpu) = true;
	smp_wmb();
	cpumask_clear_cpu(cpu, &watchdog_cpus);
}
#else
static struct perf_event_attr wd_hw_attr = {
	.type		= PERF_TYPE_HARDWARE,
	.config		= PERF_COUNT_HW_CPU_CYCLES,
@@ -228,3 +337,4 @@ void watchdog_nmi_disable(unsigned int cpu)
		cpu0_err = 0;
	}
}
#endif
+10 −1
Original line number Diff line number Diff line
@@ -779,11 +779,20 @@ config LOCKUP_DETECTOR
	  The frequency of hrtimer and NMI events and the soft and hard lockup
	  thresholds can be controlled through the sysctl watchdog_thresh.

config HARDLOCKUP_DETECTOR
config HARDLOCKUP_DETECTOR_NMI
	def_bool y
	depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI

config HARDLOCKUP_DETECTOR_OTHER_CPU
       def_bool y
       depends on LOCKUP_DETECTOR && SMP
       depends on !HARDLOCKUP_DETECTOR_NMI && !HAVE_NMI_WATCHDOG

config HARDLOCKUP_DETECTOR
       def_bool y
       depends on HARDLOCKUP_DETECTOR_NMI || HARDLOCKUP_DETECTOR_OTHER_CPU

config BOOTPARAM_HARDLOCKUP_PANIC
	bool "Panic (Reboot) On Hard Lockups"
	depends on HARDLOCKUP_DETECTOR