Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e12f03d7 authored by Song Liu's avatar Song Liu Committed by Ingo Molnar
Browse files

perf/core: Implement the 'perf_kprobe' PMU



A new PMU type, perf_kprobe is added. Based on attr from perf_event_open(),
perf_kprobe creates a kprobe (or kretprobe) for the perf_event. This
kprobe is private to this perf_event, and thus not added to global
lists, and not available in tracefs.

Two functions, create_local_trace_kprobe() and
destroy_local_trace_kprobe()  are added to created and destroy these
local trace_kprobe.

Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarYonghong Song <yhs@fb.com>
Reviewed-by: default avatarJosef Bacik <jbacik@fb.com>
Cc: <daniel@iogearbox.net>
Cc: <davem@davemloft.net>
Cc: <kernel-team@fb.com>
Cc: <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171206224518.3598254-6-songliubraving@fb.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 0d8dd67b
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -533,6 +533,10 @@ extern int perf_trace_init(struct perf_event *event);
extern void perf_trace_destroy(struct perf_event *event);
extern int  perf_trace_add(struct perf_event *event, int flags);
extern void perf_trace_del(struct perf_event *event, int flags);
#ifdef CONFIG_KPROBE_EVENTS
extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event);
#endif
extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
				     char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
+107 −35
Original line number Diff line number Diff line
@@ -7992,9 +7992,77 @@ static struct pmu perf_tracepoint = {
	.read		= perf_swevent_read,
};

#ifdef CONFIG_KPROBE_EVENTS
/*
 * Flags in config, used by dynamic PMU kprobe and uprobe
 * The flags should match following PMU_FORMAT_ATTR().
 *
 * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
 *                               if not set, create kprobe/uprobe
 */
enum perf_probe_config {
	PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,  /* [k,u]retprobe */
};

PMU_FORMAT_ATTR(retprobe, "config:0");

static struct attribute *probe_attrs[] = {
	&format_attr_retprobe.attr,
	NULL,
};

static struct attribute_group probe_format_group = {
	.name = "format",
	.attrs = probe_attrs,
};

static const struct attribute_group *probe_attr_groups[] = {
	&probe_format_group,
	NULL,
};

static int perf_kprobe_event_init(struct perf_event *event);
static struct pmu perf_kprobe = {
	.task_ctx_nr	= perf_sw_context,
	.event_init	= perf_kprobe_event_init,
	.add		= perf_trace_add,
	.del		= perf_trace_del,
	.start		= perf_swevent_start,
	.stop		= perf_swevent_stop,
	.read		= perf_swevent_read,
	.attr_groups	= probe_attr_groups,
};

static int perf_kprobe_event_init(struct perf_event *event)
{
	int err;
	bool is_retprobe;

	if (event->attr.type != perf_kprobe.type)
		return -ENOENT;
	/*
	 * no branch sampling for probe events
	 */
	if (has_branch_stack(event))
		return -EOPNOTSUPP;

	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
	err = perf_kprobe_init(event, is_retprobe);
	if (err)
		return err;

	event->destroy = perf_kprobe_destroy;

	return 0;
}
#endif /* CONFIG_KPROBE_EVENTS */

static inline void perf_tp_register(void)
{
	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
#ifdef CONFIG_KPROBE_EVENTS
	perf_pmu_register(&perf_kprobe, "kprobe", -1);
#endif
}

static void perf_event_free_filter(struct perf_event *event)
@@ -8071,13 +8139,28 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
}
#endif

/*
 * returns true if the event is a tracepoint, or a kprobe/upprobe created
 * with perf_event_open()
 */
static inline bool perf_event_is_tracing(struct perf_event *event)
{
	if (event->pmu == &perf_tracepoint)
		return true;
#ifdef CONFIG_KPROBE_EVENTS
	if (event->pmu == &perf_kprobe)
		return true;
#endif
	return false;
}

static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
	bool is_kprobe, is_tracepoint, is_syscall_tp;
	struct bpf_prog *prog;
	int ret;

	if (event->attr.type != PERF_TYPE_TRACEPOINT)
	if (!perf_event_is_tracing(event))
		return perf_event_set_bpf_handler(event, prog_fd);

	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
@@ -8116,7 +8199,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)

static void perf_event_free_bpf_prog(struct perf_event *event)
{
	if (event->attr.type != PERF_TYPE_TRACEPOINT) {
	if (!perf_event_is_tracing(event)) {
		perf_event_free_bpf_handler(event);
		return;
	}
@@ -8535,47 +8618,36 @@ perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
	return ret;
}

static int
perf_tracepoint_set_filter(struct perf_event *event, char *filter_str)
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
	int ret = -EINVAL;
	char *filter_str;

	filter_str = strndup_user(arg, PAGE_SIZE);
	if (IS_ERR(filter_str))
		return PTR_ERR(filter_str);

#ifdef CONFIG_EVENT_TRACING
	if (perf_event_is_tracing(event)) {
		struct perf_event_context *ctx = event->ctx;
	int ret;

		/*
		 * Beware, here be dragons!!
		 *
	 * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint
	 * stuff does not actually need it. So temporarily drop ctx->mutex. As per
	 * perf_event_ctx_lock() we already have a reference on ctx.
		 * the tracepoint muck will deadlock against ctx->mutex, but
		 * the tracepoint stuff does not actually need it. So
		 * temporarily drop ctx->mutex. As per perf_event_ctx_lock() we
		 * already have a reference on ctx.
		 *
	 * This can result in event getting moved to a different ctx, but that
	 * does not affect the tracepoint state.
		 * This can result in event getting moved to a different ctx,
		 * but that does not affect the tracepoint state.
		 */
		mutex_unlock(&ctx->mutex);
		ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
		mutex_lock(&ctx->mutex);

	return ret;
}

static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
	char *filter_str;
	int ret = -EINVAL;

	if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
	    !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
	    !has_addr_filter(event))
		return -EINVAL;

	filter_str = strndup_user(arg, PAGE_SIZE);
	if (IS_ERR(filter_str))
		return PTR_ERR(filter_str);

	if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
	    event->attr.type == PERF_TYPE_TRACEPOINT)
		ret = perf_tracepoint_set_filter(event, filter_str);
	else if (has_addr_filter(event))
	} else
#endif
	if (has_addr_filter(event))
		ret = perf_event_set_addr_filter(event, filter_str);

	kfree(filter_str);
+49 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
#include "trace_probe.h"

static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];

@@ -237,6 +238,54 @@ void perf_trace_destroy(struct perf_event *p_event)
	mutex_unlock(&event_mutex);
}

#ifdef CONFIG_KPROBE_EVENTS
int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
{
	int ret;
	char *func = NULL;
	struct trace_event_call *tp_event;

	if (p_event->attr.kprobe_func) {
		func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
		if (!func)
			return -ENOMEM;
		ret = strncpy_from_user(
			func, u64_to_user_ptr(p_event->attr.kprobe_func),
			KSYM_NAME_LEN);
		if (ret < 0)
			goto out;

		if (func[0] == '\0') {
			kfree(func);
			func = NULL;
		}
	}

	tp_event = create_local_trace_kprobe(
		func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
		p_event->attr.probe_offset, is_retprobe);
	if (IS_ERR(tp_event)) {
		ret = PTR_ERR(tp_event);
		goto out;
	}

	ret = perf_trace_event_init(tp_event, p_event);
	if (ret)
		destroy_local_trace_kprobe(tp_event);
out:
	kfree(func);
	return ret;
}

void perf_kprobe_destroy(struct perf_event *p_event)
{
	perf_trace_event_close(p_event);
	perf_trace_event_unreg(p_event);

	destroy_local_trace_kprobe(p_event->tp_event);
}
#endif /* CONFIG_KPROBE_EVENTS */

int perf_trace_add(struct perf_event *p_event, int flags)
{
	struct trace_event_call *tp_event = p_event->tp_event;
+83 −8
Original line number Diff line number Diff line
@@ -438,6 +438,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
			disable_kprobe(&tk->rp.kp);
		wait = 1;
	}

	/*
	 * if tk is not added to any list, it must be a local trace_kprobe
	 * created with perf_event_open. We don't need to wait for these
	 * trace_kprobes
	 */
	if (list_empty(&tk->list))
		wait = 0;
 out:
	if (wait) {
		/*
@@ -1313,12 +1321,9 @@ static struct trace_event_functions kprobe_funcs = {
	.trace		= print_kprobe_event
};

static int register_kprobe_event(struct trace_kprobe *tk)
static inline void init_trace_event_call(struct trace_kprobe *tk,
					 struct trace_event_call *call)
{
	struct trace_event_call *call = &tk->tp.call;
	int ret;

	/* Initialize trace_event_call */
	INIT_LIST_HEAD(&call->class->fields);
	if (trace_kprobe_is_return(tk)) {
		call->event.funcs = &kretprobe_funcs;
@@ -1327,6 +1332,19 @@ static int register_kprobe_event(struct trace_kprobe *tk)
		call->event.funcs = &kprobe_funcs;
		call->class->define_fields = kprobe_event_define_fields;
	}

	call->flags = TRACE_EVENT_FL_KPROBE;
	call->class->reg = kprobe_register;
	call->data = tk;
}

static int register_kprobe_event(struct trace_kprobe *tk)
{
	struct trace_event_call *call = &tk->tp.call;
	int ret = 0;

	init_trace_event_call(tk, call);

	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
		return -ENOMEM;
	ret = register_trace_event(&call->event);
@@ -1334,9 +1352,6 @@ static int register_kprobe_event(struct trace_kprobe *tk)
		kfree(call->print_fmt);
		return -ENODEV;
	}
	call->flags = TRACE_EVENT_FL_KPROBE;
	call->class->reg = kprobe_register;
	call->data = tk;
	ret = trace_add_event_call(call);
	if (ret) {
		pr_info("Failed to register kprobe event: %s\n",
@@ -1358,6 +1373,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
	return ret;
}

#ifdef CONFIG_PERF_EVENTS
/* create a trace_kprobe, but don't add it to global lists */
struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
			  bool is_return)
{
	struct trace_kprobe *tk;
	int ret;
	char *event;

	/*
	 * local trace_kprobes are not added to probe_list, so they are never
	 * searched in find_trace_kprobe(). Therefore, there is no concern of
	 * duplicated name here.
	 */
	event = func ? func : "DUMMY_EVENT";

	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
				offs, 0 /* maxactive */, 0 /* nargs */,
				is_return);

	if (IS_ERR(tk)) {
		pr_info("Failed to allocate trace_probe.(%d)\n",
			(int)PTR_ERR(tk));
		return ERR_CAST(tk);
	}

	init_trace_event_call(tk, &tk->tp.call);

	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
		ret = -ENOMEM;
		goto error;
	}

	ret = __register_trace_kprobe(tk);
	if (ret < 0)
		goto error;

	return &tk->tp.call;
error:
	free_trace_kprobe(tk);
	return ERR_PTR(ret);
}

void destroy_local_trace_kprobe(struct trace_event_call *event_call)
{
	struct trace_kprobe *tk;

	tk = container_of(event_call, struct trace_kprobe, tp.call);

	if (trace_probe_is_enabled(&tk->tp)) {
		WARN_ON(1);
		return;
	}

	__unregister_trace_kprobe(tk);
	free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */

/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{
+7 −0
Original line number Diff line number Diff line
@@ -404,3 +404,10 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
}

extern int set_print_fmt(struct trace_probe *tp, bool is_return);

#ifdef CONFIG_PERF_EVENTS
extern struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
			  bool is_return);
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
#endif