Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e162b39a authored by Mandeep Singh Baines's avatar Mandeep Singh Baines Committed by Ingo Molnar
Browse files

softlockup: decouple hung tasks check from softlockup detection



Decoupling allows:

* hung tasks check to happen at very low priority

* hung tasks check and softlockup to be enabled/disabled independently
  at compile and/or run-time

* individual panic settings to be enabled disabled independently
  at compile and/or run-time

* softlockup threshold to be reduced without increasing hung tasks
  poll frequency (hung task check is expensive relative to softlock watchdog)

* hung task check to be zero over-head when disabled at run-time

Signed-off-by: default avatarMandeep Singh Baines <msb@google.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent c903ff83
Loading
Loading
Loading
Loading
+10 −4
Original line number Diff line number Diff line
@@ -297,9 +297,6 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
				    struct file *filp, void __user *buffer,
				    size_t *lenp, loff_t *ppos);
extern unsigned int  softlockup_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int softlockup_thresh;
#else
static inline void softlockup_tick(void)
@@ -316,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void)
}
#endif

#ifdef CONFIG_DETECT_HUNG_TASK
extern unsigned int  sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
					 struct file *filp, void __user *buffer,
					 size_t *lenp, loff_t *ppos);
#endif

/* Attach to any functions which should be ignored in wchan output. */
#define __sched		__attribute__((__section__(".sched.text")))
@@ -1236,7 +1242,7 @@ struct task_struct {
/* ipc stuff */
	struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
	unsigned long last_switch_timestamp;
	unsigned long last_switch_count;
+1 −0
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o

kernel/hung_task.c

0 → 100644
+198 −0
Original line number Diff line number Diff line
/*
 * Detect Hung Task
 *
 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
 *
 */

#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/sysctl.h>

/*
 * Have a reasonable limit on the number of tasks checked:
 */
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;

/*
 * Zero means infinite timeout - no checking done:
 */
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
static unsigned long __read_mostly hung_task_poll_jiffies;

unsigned long __read_mostly sysctl_hung_task_warnings = 10;

static int __read_mostly did_panic;

static struct task_struct *watchdog_task;

/*
 * Should we panic (and reboot, if panic_timeout= is set) when a
 * hung task is detected:
 */
unsigned int __read_mostly sysctl_hung_task_panic =
				CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;

static int __init hung_task_panic_setup(char *str)
{
	sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);

	return 1;
}
__setup("hung_task_panic=", hung_task_panic_setup);

static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
	did_panic = 1;

	return NOTIFY_DONE;
}

static struct notifier_block panic_block = {
	.notifier_call = hung_task_panic,
};

/*
 * Returns seconds, approximately.  We don't need nanosecond
 * resolution, and we don't need to waste time with a big divide when
 * 2^30ns == 1.074s.
 */
static unsigned long get_timestamp(void)
{
	int this_cpu = raw_smp_processor_id();

	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
}

static void check_hung_task(struct task_struct *t, unsigned long now)
{
	unsigned long switch_count = t->nvcsw + t->nivcsw;

	if (t->flags & PF_FROZEN)
		return;

	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
		t->last_switch_count = switch_count;
		t->last_switch_timestamp = now;
		return;
	}
	if ((long)(now - t->last_switch_timestamp) <
					sysctl_hung_task_timeout_secs)
		return;
	if (!sysctl_hung_task_warnings)
		return;
	sysctl_hung_task_warnings--;

	/*
	 * Ok, the task did not get scheduled for more than 2 minutes,
	 * complain:
	 */
	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
			"%ld seconds.\n", t->comm, t->pid,
			sysctl_hung_task_timeout_secs);
	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
			" disables this message.\n");
	sched_show_task(t);
	__debug_show_held_locks(t);

	t->last_switch_timestamp = now;
	touch_nmi_watchdog();

	if (sysctl_hung_task_panic)
		panic("hung_task: blocked tasks");
}

/*
 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
 * a really long time (120 seconds). If that happens, print out
 * a warning.
 */
static void check_hung_uninterruptible_tasks(void)
{
	int max_count = sysctl_hung_task_check_count;
	unsigned long now = get_timestamp();
	struct task_struct *g, *t;

	/*
	 * If the system crashed already then all bets are off,
	 * do not report extra hung tasks:
	 */
	if (test_taint(TAINT_DIE) || did_panic)
		return;

	read_lock(&tasklist_lock);
	do_each_thread(g, t) {
		if (!--max_count)
			goto unlock;
		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
		if (t->state == TASK_UNINTERRUPTIBLE)
			check_hung_task(t, now);
	} while_each_thread(g, t);
 unlock:
	read_unlock(&tasklist_lock);
}

static void update_poll_jiffies(void)
{
	/* timeout of 0 will disable the watchdog */
	if (sysctl_hung_task_timeout_secs == 0)
		hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
	else
		hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
}

/*
 * Process updating of timeout sysctl
 */
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
				  struct file *filp, void __user *buffer,
				  size_t *lenp, loff_t *ppos)
{
	int ret;

	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);

	if (ret || !write)
		goto out;

	update_poll_jiffies();

	wake_up_process(watchdog_task);

 out:
	return ret;
}

/*
 * kthread which checks for tasks stuck in D state
 */
static int watchdog(void *dummy)
{
	set_user_nice(current, 0);
	update_poll_jiffies();

	for ( ; ; ) {
		while (schedule_timeout_interruptible(hung_task_poll_jiffies));
		check_hung_uninterruptible_tasks();
	}

	return 0;
}

static int __init hung_task_init(void)
{
	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");

	return 0;
}

module_init(hung_task_init);
+0 −100
Original line number Diff line number Diff line
@@ -165,98 +165,12 @@ void softlockup_tick(void)
		panic("softlockup: hung tasks");
}

/*
 * Have a reasonable limit on the number of tasks checked:
 */
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;

/*
 * Zero means infinite timeout - no checking done:
 */
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;

unsigned long __read_mostly sysctl_hung_task_warnings = 10;

/*
 * Only do the hung-tasks check on one CPU:
 */
static int check_cpu __read_mostly = -1;

static void check_hung_task(struct task_struct *t, unsigned long now)
{
	unsigned long switch_count = t->nvcsw + t->nivcsw;

	if (t->flags & PF_FROZEN)
		return;

	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
		t->last_switch_count = switch_count;
		t->last_switch_timestamp = now;
		return;
	}
	if ((long)(now - t->last_switch_timestamp) <
					sysctl_hung_task_timeout_secs)
		return;
	if (!sysctl_hung_task_warnings)
		return;
	sysctl_hung_task_warnings--;

	/*
	 * Ok, the task did not get scheduled for more than 2 minutes,
	 * complain:
	 */
	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
			"%ld seconds.\n", t->comm, t->pid,
			sysctl_hung_task_timeout_secs);
	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
			" disables this message.\n");
	sched_show_task(t);
	__debug_show_held_locks(t);

	t->last_switch_timestamp = now;
	touch_nmi_watchdog();

	if (softlockup_panic)
		panic("softlockup: blocked tasks");
}

/*
 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
 * a really long time (120 seconds). If that happens, print out
 * a warning.
 */
static void check_hung_uninterruptible_tasks(int this_cpu)
{
	int max_count = sysctl_hung_task_check_count;
	unsigned long now = get_timestamp(this_cpu);
	struct task_struct *g, *t;

	/*
	 * If the system crashed already then all bets are off,
	 * do not report extra hung tasks:
	 */
	if (test_taint(TAINT_DIE) || did_panic)
		return;

	read_lock(&tasklist_lock);
	do_each_thread(g, t) {
		if (!--max_count)
			goto unlock;
		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
		if (t->state == TASK_UNINTERRUPTIBLE)
			check_hung_task(t, now);
	} while_each_thread(g, t);
 unlock:
	read_unlock(&tasklist_lock);
}

/*
 * The watchdog thread - runs every second and touches the timestamp.
 */
static int watchdog(void *__bind_cpu)
{
	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
	int this_cpu = (long)__bind_cpu;

	sched_setscheduler(current, SCHED_FIFO, &param);

@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
		if (kthread_should_stop())
			break;

		if (this_cpu == check_cpu) {
			if (sysctl_hung_task_timeout_secs)
				check_hung_uninterruptible_tasks(this_cpu);
		}

		set_current_state(TASK_INTERRUPTIBLE);
	}
	__set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
		break;
	case CPU_ONLINE:
	case CPU_ONLINE_FROZEN:
		check_cpu = cpumask_any(cpu_online_mask);
		wake_up_process(per_cpu(watchdog_task, hotcpu));
		break;
#ifdef CONFIG_HOTPLUG_CPU
	case CPU_DOWN_PREPARE:
	case CPU_DOWN_PREPARE_FROZEN:
		if (hotcpu == check_cpu) {
			/* Pick any other online cpu. */
			check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
		}
		break;

	case CPU_UP_CANCELED:
	case CPU_UP_CANCELED_FROZEN:
		if (!per_cpu(watchdog_task, hotcpu))
+14 −1
Original line number Diff line number Diff line
@@ -805,6 +805,19 @@ static struct ctl_table kern_table[] = {
		.extra1		= &neg_one,
		.extra2		= &sixty,
	},
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
	{
		.ctl_name	= CTL_UNNUMBERED,
		.procname	= "hung_task_panic",
		.data		= &sysctl_hung_task_panic,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec_minmax,
		.strategy	= &sysctl_intvec,
		.extra1		= &zero,
		.extra2		= &one,
	},
	{
		.ctl_name	= CTL_UNNUMBERED,
		.procname	= "hung_task_check_count",
@@ -820,7 +833,7 @@ static struct ctl_table kern_table[] = {
		.data		= &sysctl_hung_task_timeout_secs,
		.maxlen		= sizeof(unsigned long),
		.mode		= 0644,
		.proc_handler	= &proc_doulongvec_minmax,
		.proc_handler	= &proc_dohung_task_timeout_secs,
		.strategy	= &sysctl_intvec,
	},
	{
Loading