Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1f833081 authored by Pavankumar Kondeti's avatar Pavankumar Kondeti
Browse files

sched/tracing: GKI friendly long preempt/irq disable traces



Hook into preemptirq trace points and print the call stacks of
scenarios where preemption/irqs is disabled for more than
a use configured threshold. The threshold tunables are available
under /proc/sys/preemptirq directory. All the units are in nano seconds.

The following commands can be used to enable the trace. The stacktrace
trigger prints the stacktrace when the trace event is printed.

echo 1 > /d/tracing/events/preemptirq_long/enable
echo stacktrace > /d/tracing/events/preemptirq_long/preempt_disable_long/trigger
echo stacktrace > /d/tracing/events/preemptirq_long/irq_disable_long/trigger

Sample output is copied below. Note that this format works with both systrace
and commandline ftrace utilities.

   init-1     [005] d..2   187.905211: irq_disable_long: delta=15244480(ns)
   init-1     [005] d..3   187.905216: <stack trace>

 => test_irq_disable_long
 => trace_irq_enable_rcuidle
 => trace_hardirqs_on
 => console_unlock
 => vprintk_emit
 => devkmsg_emit
 => devkmsg_write
 => do_iter_readv_writev
 => do_iter_write
 => do_writev
 => __arm64_sys_writev
 => el0_svc_common
 => el0_svc_handler
 => el0_svc

   kworker/u16:4-419   [000] .N.2   187.905221: preempt_disable_long: delta=4923646(ns)
   kworker/u16:4-419   [000] .N.3   187.905226: <stack trace>
 => test_preempt_disable_long
 => trace_preempt_enable_rcuidle
 => trace_preempt_on
 => preempt_count_sub
 => perf_event_read
 => __perf_event_read_value
 => perf_event_read_value
 => memlat_monitor_work
 => process_one_work
 => worker_thread
 => kthread
 => ret_from_fork

Change-Id: If87a8d65359632b93da10b691e9b83e37000aeb3
Signed-off-by: default avatarPavankumar Kondeti <pkondeti@codeaurora.org>
parent fcecef89
Loading
Loading
Loading
Loading
+42 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 2020 The Linux Foundation. All rights reserved.
 */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM preemptirq_long

#if !defined(_TRACE_PREEMPTIRQ_LONG_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PREEMPTIRQ_LONG_H

#include <linux/tracepoint.h>

DECLARE_EVENT_CLASS(preemptirq_long_template,

	TP_PROTO(u64 delta),

	TP_ARGS(delta),

	TP_STRUCT__entry(
		__field(u64, delta)
	),

	TP_fast_assign(
		__entry->delta = delta;
	),

	TP_printk("delta=%llu(ns)", __entry->delta)
);

DEFINE_EVENT(preemptirq_long_template, irq_disable_long,
	     TP_PROTO(u64 delta),
	     TP_ARGS(delta));

DEFINE_EVENT(preemptirq_long_template, preempt_disable_long,
	     TP_PROTO(u64 delta),
	     TP_ARGS(delta));

#endif /* _TRACE_PREEMPTIRQ_LONG_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
+2 −0
Original line number Diff line number Diff line
@@ -91,3 +91,5 @@ ifdef CONFIG_DEBUG_FS
obj-$(CONFIG_IPC_LOGGING) += ipc_logging_debug.o
endif
libftrace-y := ftrace.o

obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += preemptirq_long.o
+165 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2020, The Linux Foundation. All rights reserved.
 */

#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/printk.h>
#include <linux/sched/clock.h>

#include <trace/events/preemptirq.h>
#include <trace/events/sched.h>

#define CREATE_TRACE_POINTS
#include <trace/events/preemptirq_long.h>

#define IRQSOFF_SENTINEL 0x0fffDEAD

static unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000;
static unsigned int sysctl_irqsoff_tracing_threshold_ns = 5000000;
static unsigned int sysctl_irqsoff_dmesg_output_enabled;
static unsigned int sysctl_irqsoff_crash_sentinel_value;
static unsigned int sysctl_irqsoff_crash_threshold_ns = 10000000;

static unsigned int half_million = 500000;
static unsigned int one_hundred_million = 100000000;
static unsigned int one_million = 1000000;

static DEFINE_PER_CPU(u64, irq_disabled_ts);
static DEFINE_PER_CPU(u64, preempt_disabled_ts);

static void note_irq_disable(void *u1, unsigned long u2, unsigned long u3)
{
	if (is_idle_task(current))
		return;

	/*
	 * We just have to note down the time stamp here. We
	 * use stacktrace trigger feature to print the stacktrace.
	 */
	this_cpu_write(irq_disabled_ts, sched_clock());
}

static void test_irq_disable_long(void *u1, unsigned long u2, unsigned long u3)
{
	u64 ts = this_cpu_read(irq_disabled_ts);

	if (!ts)
		return;

	this_cpu_write(irq_disabled_ts, 0);
	ts = sched_clock() - ts;

	if (ts > sysctl_irqsoff_tracing_threshold_ns) {
		trace_irq_disable_long(ts);

		if (sysctl_irqsoff_dmesg_output_enabled == IRQSOFF_SENTINEL)
			printk_deferred("D=%llu C:(%ps<-%ps<-%ps<-%ps)\n",
					ts, CALLER_ADDR2, CALLER_ADDR3,
					CALLER_ADDR4, CALLER_ADDR5);
	}

	if (sysctl_irqsoff_crash_sentinel_value == IRQSOFF_SENTINEL &&
			ts > sysctl_irqsoff_crash_threshold_ns) {
		printk_deferred("delta=%llu(ns) > crash_threshold=%llu(ns) Task=%s\n",
				ts, sysctl_irqsoff_crash_threshold_ns,
				current->comm);
		BUG_ON(1);
	}
}

static void note_preempt_disable(void *u1, unsigned long u2, unsigned long u3)
{
	this_cpu_write(preempt_disabled_ts, sched_clock());
}

static void test_preempt_disable_long(void *u1, unsigned long u2,
				      unsigned long u3)
{
	u64 ts = this_cpu_read(preempt_disabled_ts);

	if (!ts)
		return;

	this_cpu_write(preempt_disabled_ts, 0);
	ts = sched_clock() - ts;

	if (ts > sysctl_preemptoff_tracing_threshold_ns)
		trace_preempt_disable_long(ts);
}

static void note_context_switch(void *u1, bool u2, struct task_struct *u3,
				struct task_struct *next)
{
	/*
	 * Discard false positives during context switch to idle.
	 */
	if (is_idle_task(next))
		this_cpu_write(preempt_disabled_ts, 0);
}

static struct ctl_table preemptirq_long_table[] = {
	{
		.procname       = "preemptoff_tracing_threshold_ns",
		.data           = &sysctl_preemptoff_tracing_threshold_ns,
		.maxlen         = sizeof(unsigned int),
		.mode           = 0644,
		.proc_handler   = proc_dointvec,
	},
	{
		.procname	= "irqsoff_tracing_threshold_ns",
		.data		= &sysctl_irqsoff_tracing_threshold_ns,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_douintvec_minmax,
		.extra1		= &half_million,
		.extra2		= &one_hundred_million,
	},
	{
		.procname	= "irqsoff_dmesg_output_enabled",
		.data		= &sysctl_irqsoff_dmesg_output_enabled,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{
		.procname	= "irqsoff_crash_sentinel_value",
		.data		= &sysctl_irqsoff_crash_sentinel_value,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{
		.procname	= "irqsoff_crash_threshold_ns",
		.data		= &sysctl_irqsoff_crash_threshold_ns,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_douintvec_minmax,
		.extra1		= &one_million,
		.extra2		= &one_hundred_million,
	},
	{ }
};

static int preemptirq_long_init(void)
{
	if (!register_sysctl("preemptirq", preemptirq_long_table)) {
		pr_err("Fail to register sysctl table\n");
		return -EPERM;
	}

	register_trace_irq_disable(note_irq_disable, NULL);
	register_trace_irq_enable(test_irq_disable_long, NULL);

	register_trace_preempt_disable(note_preempt_disable, NULL);
	register_trace_preempt_enable(test_preempt_disable_long, NULL);

	register_trace_sched_switch(note_context_switch, NULL);
	return 0;
}

module_init(preemptirq_long_init);
MODULE_LICENSE("GPL v2");