Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb34a08c authored by Jason Baron's avatar Jason Baron Committed by Frederic Weisbecker
Browse files

tracing: Add trace events for each syscall entry/exit



Layer Frederic's syscall tracer on tracepoints. We create trace events
via hooking into the SYSCALL_DEFINE macros. This allows us to
individually toggle syscall entry and exit points on/off.

Signed-off-by: default avatarJason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
parent 69fd4f0e
Loading
Loading
Loading
Loading
+59 −2
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ struct perf_counter_attr;
#include <linux/sem.h>
#include <asm/siginfo.h>
#include <asm/signal.h>
#include <linux/unistd.h>
#include <linux/quota.h>
#include <linux/key.h>
#include <trace/syscall.h>
@@ -112,6 +113,59 @@ struct perf_counter_attr;
#define __SC_STR_TDECL5(t, a, ...)	#t, __SC_STR_TDECL4(__VA_ARGS__)
#define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)


#define SYSCALL_TRACE_ENTER_EVENT(sname)				\
	static struct ftrace_event_call event_enter_##sname;		\
	static int init_enter_##sname(void)				\
	{								\
		int num;						\
		num = syscall_name_to_nr("sys"#sname);			\
		if (num < 0)						\
			return -ENOSYS;					\
		register_ftrace_event(&event_syscall_enter);		\
		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
		init_preds(&event_enter_##sname);			\
		return 0;						\
	}								\
	static struct ftrace_event_call __used				\
	  __attribute__((__aligned__(4)))				\
	  __attribute__((section("_ftrace_events")))			\
	  event_enter_##sname = {					\
		.name                   = "sys_enter"#sname,		\
		.system                 = "syscalls",			\
		.event                  = &event_syscall_enter,		\
		.raw_init		= init_enter_##sname,		\
		.regfunc		= reg_event_syscall_enter,	\
		.unregfunc		= unreg_event_syscall_enter,	\
		.data			= "sys"#sname,			\
	}

#define SYSCALL_TRACE_EXIT_EVENT(sname)					\
	static struct ftrace_event_call event_exit_##sname;		\
	static int init_exit_##sname(void)				\
	{								\
		int num;						\
		num = syscall_name_to_nr("sys"#sname);			\
		if (num < 0)						\
			return -ENOSYS;					\
		register_ftrace_event(&event_syscall_exit);		\
		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
		init_preds(&event_exit_##sname);			\
		return 0;						\
	}								\
	static struct ftrace_event_call __used				\
	  __attribute__((__aligned__(4)))				\
	  __attribute__((section("_ftrace_events")))			\
	  event_exit_##sname = {					\
		.name                   = "sys_exit"#sname,		\
		.system                 = "syscalls",			\
		.event                  = &event_syscall_exit,		\
		.raw_init		= init_exit_##sname,		\
		.regfunc		= reg_event_syscall_exit,	\
		.unregfunc		= unreg_event_syscall_exit,	\
		.data			= "sys"#sname,			\
	}

#define SYSCALL_METADATA(sname, nb)				\
	static const struct syscall_metadata __used		\
	  __attribute__((__aligned__(4)))			\
@@ -121,7 +175,9 @@ struct perf_counter_attr;
		.nb_args 	= nb,				\
		.types		= types_##sname,		\
		.args		= args_##sname,			\
	}
	};							\
	SYSCALL_TRACE_ENTER_EVENT(sname);			\
	SYSCALL_TRACE_EXIT_EVENT(sname);

#define SYSCALL_DEFINE0(sname)					\
	static const struct syscall_metadata __used		\
@@ -131,8 +187,9 @@ struct perf_counter_attr;
		.name 		= "sys_"#sname,			\
		.nb_args 	= 0,				\
	};							\
	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
	asmlinkage long sys_##sname(void)

#else
#define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
#endif
+9 −9
Original line number Diff line number Diff line
@@ -2,6 +2,8 @@
#define _TRACE_SYSCALL_H

#include <linux/tracepoint.h>
#include <linux/unistd.h>
#include <linux/ftrace_event.h>

#include <asm/ptrace.h>

@@ -40,15 +42,13 @@ struct syscall_metadata {

#ifdef CONFIG_FTRACE_SYSCALLS
extern struct syscall_metadata *syscall_nr_to_meta(int nr);
extern void start_ftrace_syscalls(void);
extern void stop_ftrace_syscalls(void);
extern void ftrace_syscall_enter(struct pt_regs *regs);
extern void ftrace_syscall_exit(struct pt_regs *regs);
#else
static inline void start_ftrace_syscalls(void)			{ }
static inline void stop_ftrace_syscalls(void)			{ }
static inline void ftrace_syscall_enter(struct pt_regs *regs)	{ }
static inline void ftrace_syscall_exit(struct pt_regs *regs)	{ }
extern int syscall_name_to_nr(char *name);
extern struct trace_event event_syscall_enter;
extern struct trace_event event_syscall_exit;
extern int reg_event_syscall_enter(void *ptr);
extern void unreg_event_syscall_enter(void *ptr);
extern int reg_event_syscall_exit(void *ptr);
extern void unreg_event_syscall_exit(void *ptr);
#endif

#endif /* _TRACE_SYSCALL_H */
+91 −92
Original line number Diff line number Diff line
#include <trace/syscall.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
#include <asm/syscall.h>

#include "trace_output.h"
#include "trace.h"

/* Keep a counter of the syscall tracing users */
static int refcount;

/* Prevent from races on thread flags toggling */
static DEFINE_MUTEX(syscall_trace_lock);
static int sys_refcount_enter;
static int sys_refcount_exit;
static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);

/* Option to display the parameters types */
enum {
@@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
	return TRACE_TYPE_HANDLED;
}

void start_ftrace_syscalls(void)
{
	unsigned long flags;
	struct task_struct *g, *t;

	mutex_lock(&syscall_trace_lock);

	/* Don't enable the flag on the tasks twice */
	if (++refcount != 1)
		goto unlock;

	read_lock_irqsave(&tasklist_lock, flags);

	do_each_thread(g, t) {
		set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
	} while_each_thread(g, t);

	read_unlock_irqrestore(&tasklist_lock, flags);

unlock:
	mutex_unlock(&syscall_trace_lock);
}

void stop_ftrace_syscalls(void)
{
	unsigned long flags;
	struct task_struct *g, *t;

	mutex_lock(&syscall_trace_lock);

	/* There are perhaps still some users */
	if (--refcount)
		goto unlock;

	read_lock_irqsave(&tasklist_lock, flags);

	do_each_thread(g, t) {
		clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
	} while_each_thread(g, t);

	read_unlock_irqrestore(&tasklist_lock, flags);

unlock:
	mutex_unlock(&syscall_trace_lock);
}

void ftrace_syscall_enter(struct pt_regs *regs)
void ftrace_syscall_enter(struct pt_regs *regs, long id)
{
	struct syscall_trace_enter *entry;
	struct syscall_metadata *sys_data;
@@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
	int syscall_nr;

	syscall_nr = syscall_get_nr(current, regs);
	if (!test_bit(syscall_nr, enabled_enter_syscalls))
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
@@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs)
	trace_wake_up();
}

void ftrace_syscall_exit(struct pt_regs *regs)
void ftrace_syscall_exit(struct pt_regs *regs, long ret)
{
	struct syscall_trace_exit *entry;
	struct syscall_metadata *sys_data;
@@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs)
	int syscall_nr;

	syscall_nr = syscall_get_nr(current, regs);
	if (!test_bit(syscall_nr, enabled_exit_syscalls))
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
@@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs)
	trace_wake_up();
}

static int init_syscall_tracer(struct trace_array *tr)
int reg_event_syscall_enter(void *ptr)
{
	start_ftrace_syscalls();

	return 0;
	int ret = 0;
	int num;
	char *name;

	name = (char *)ptr;
	num = syscall_name_to_nr(name);
	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
	if (!sys_refcount_enter)
		ret = register_trace_syscall_enter(ftrace_syscall_enter);
	if (ret) {
		pr_info("event trace: Could not activate"
				"syscall entry trace point");
	} else {
		set_bit(num, enabled_enter_syscalls);
		sys_refcount_enter++;
	}

static void reset_syscall_tracer(struct trace_array *tr)
{
	stop_ftrace_syscalls();
	tracing_reset_online_cpus(tr);
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

static struct trace_event syscall_enter_event = {
	.type	 	= TRACE_SYSCALL_ENTER,
	.trace		= print_syscall_enter,
};
void unreg_event_syscall_enter(void *ptr)
{
	int num;
	char *name;

static struct trace_event syscall_exit_event = {
	.type	 	= TRACE_SYSCALL_EXIT,
	.trace		= print_syscall_exit,
};
	name = (char *)ptr;
	num = syscall_name_to_nr(name);
	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
		return;
	mutex_lock(&syscall_trace_lock);
	sys_refcount_enter--;
	clear_bit(num, enabled_enter_syscalls);
	if (!sys_refcount_enter)
		unregister_trace_syscall_enter(ftrace_syscall_enter);
	mutex_unlock(&syscall_trace_lock);
}

static struct tracer syscall_tracer __read_mostly = {
	.name	     	= "syscall",
	.init		= init_syscall_tracer,
	.reset		= reset_syscall_tracer,
	.flags		= &syscalls_flags,
};
int reg_event_syscall_exit(void *ptr)
{
	int ret = 0;
	int num;
	char *name;

	name = (char *)ptr;
	num = syscall_name_to_nr(name);
	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
	if (!sys_refcount_exit)
		ret = register_trace_syscall_exit(ftrace_syscall_exit);
	if (ret) {
		pr_info("event trace: Could not activate"
				"syscall exit trace point");
	} else {
		set_bit(num, enabled_exit_syscalls);
		sys_refcount_exit++;
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

__init int register_ftrace_syscalls(void)
void unreg_event_syscall_exit(void *ptr)
{
	int ret;
	int num;
	char *name;

	ret = register_ftrace_event(&syscall_enter_event);
	if (!ret) {
		printk(KERN_WARNING "event %d failed to register\n",
		       syscall_enter_event.type);
		WARN_ON_ONCE(1);
	name = (char *)ptr;
	num = syscall_name_to_nr(name);
	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
		return;
	mutex_lock(&syscall_trace_lock);
	sys_refcount_exit--;
	clear_bit(num, enabled_exit_syscalls);
	if (!sys_refcount_exit)
		unregister_trace_syscall_exit(ftrace_syscall_exit);
	mutex_unlock(&syscall_trace_lock);
}

	ret = register_ftrace_event(&syscall_exit_event);
	if (!ret) {
		printk(KERN_WARNING "event %d failed to register\n",
		       syscall_exit_event.type);
		WARN_ON_ONCE(1);
	}
struct trace_event event_syscall_enter = {
	.trace			= print_syscall_enter,
	.type			= TRACE_SYSCALL_ENTER
};

	return register_tracer(&syscall_tracer);
}
device_initcall(register_ftrace_syscalls);
struct trace_event event_syscall_exit = {
	.trace			= print_syscall_exit,
	.type			= TRACE_SYSCALL_EXIT
};