Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2541517c authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Ingo Molnar
Browse files

tracing, perf: Implement BPF programs attached to kprobes



BPF programs, attached to kprobes, provide a safe way to execute
user-defined BPF byte-code programs without being able to crash or
hang the kernel in any way. The BPF engine makes sure that such
programs have a finite execution time and that they cannot break
out of their sandbox.

The user interface is to attach to a kprobe via the perf syscall:

	struct perf_event_attr attr = {
		.type	= PERF_TYPE_TRACEPOINT,
		.config	= event_id,
		...
	};

	event_fd = perf_event_open(&attr,...);
	ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);

'prog_fd' is a file descriptor associated with BPF program
previously loaded.

'event_id' is an ID of the kprobe created.

Closing 'event_fd':

	close(event_fd);

... automatically detaches BPF program from it.

BPF programs can call in-kernel helper functions to:

  - lookup/update/delete elements in maps

  - probe_read - wraper of probe_kernel_read() used to access any
    kernel data structures

BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is
architecture dependent) and return 0 to ignore the event and 1 to store
kprobe event into the ring buffer.

Note, kprobes are a fundamentally _not_ a stable kernel ABI,
so BPF programs attached to kprobes must be recompiled for
every kernel version and user must supply correct LINUX_VERSION_CODE
in attr.kern_version during bpf_prog_load() call.

Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Reviewed-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Reviewed-by: default avatarMasami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 72cbbc89
Loading
Loading
Loading
Loading
+11 −0
Original line number Original line Diff line number Diff line
@@ -13,6 +13,7 @@ struct trace_array;
struct trace_buffer;
struct trace_buffer;
struct tracer;
struct tracer;
struct dentry;
struct dentry;
struct bpf_prog;


struct trace_print_flags {
struct trace_print_flags {
	unsigned long		mask;
	unsigned long		mask;
@@ -306,6 +307,7 @@ struct ftrace_event_call {
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_PERF_EVENTS
	int				perf_refcount;
	int				perf_refcount;
	struct hlist_head __percpu	*perf_events;
	struct hlist_head __percpu	*perf_events;
	struct bpf_prog			*prog;


	int	(*perf_perm)(struct ftrace_event_call *,
	int	(*perf_perm)(struct ftrace_event_call *,
			     struct perf_event *);
			     struct perf_event *);
@@ -551,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
		event_triggers_post_call(file, tt);
		event_triggers_post_call(file, tt);
}
}


#ifdef CONFIG_BPF_SYSCALL
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
{
	return 1;
}
#endif

enum {
enum {
	FILTER_OTHER = 0,
	FILTER_OTHER = 0,
	FILTER_STATIC_STRING,
	FILTER_STATIC_STRING,
+3 −0
Original line number Original line Diff line number Diff line
@@ -118,6 +118,7 @@ enum bpf_map_type {
enum bpf_prog_type {
enum bpf_prog_type {
	BPF_PROG_TYPE_UNSPEC,
	BPF_PROG_TYPE_UNSPEC,
	BPF_PROG_TYPE_SOCKET_FILTER,
	BPF_PROG_TYPE_SOCKET_FILTER,
	BPF_PROG_TYPE_KPROBE,
};
};


/* flags for BPF_MAP_UPDATE_ELEM command */
/* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@ union bpf_attr {
		__u32		log_level;	/* verbosity level of verifier */
		__u32		log_level;	/* verbosity level of verifier */
		__u32		log_size;	/* size of user buffer */
		__u32		log_size;	/* size of user buffer */
		__aligned_u64	log_buf;	/* user supplied buffer */
		__aligned_u64	log_buf;	/* user supplied buffer */
		__u32		kern_version;	/* checked when prog_type=kprobe */
	};
	};
} __attribute__((aligned(8)));
} __attribute__((aligned(8)));


@@ -162,6 +164,7 @@ enum bpf_func_id {
	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
	__BPF_FUNC_MAX_ID,
	__BPF_FUNC_MAX_ID,
};
};


+1 −0
Original line number Original line Diff line number Diff line
@@ -381,6 +381,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)


enum perf_event_ioc_flags {
enum perf_event_ioc_flags {
	PERF_IOC_FLAG_GROUP		= 1U << 0,
	PERF_IOC_FLAG_GROUP		= 1U << 0,
+6 −1
Original line number Original line Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/file.h>
#include <linux/file.h>
#include <linux/license.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/filter.h>
#include <linux/version.h>


static LIST_HEAD(bpf_map_types);
static LIST_HEAD(bpf_map_types);


@@ -467,7 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
}
}


/* last field in 'union bpf_attr' used by this command */
/* last field in 'union bpf_attr' used by this command */
#define	BPF_PROG_LOAD_LAST_FIELD log_buf
#define	BPF_PROG_LOAD_LAST_FIELD kern_version


static int bpf_prog_load(union bpf_attr *attr)
static int bpf_prog_load(union bpf_attr *attr)
{
{
@@ -492,6 +493,10 @@ static int bpf_prog_load(union bpf_attr *attr)
	if (attr->insn_cnt >= BPF_MAXINSNS)
	if (attr->insn_cnt >= BPF_MAXINSNS)
		return -EINVAL;
		return -EINVAL;


	if (type == BPF_PROG_TYPE_KPROBE &&
	    attr->kern_version != LINUX_VERSION_CODE)
		return -EINVAL;

	/* plain bpf_prog allocation */
	/* plain bpf_prog allocation */
	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
	if (!prog)
	if (!prog)
+59 −0
Original line number Original line Diff line number Diff line
@@ -42,6 +42,8 @@
#include <linux/module.h>
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/mman.h>
#include <linux/compat.h>
#include <linux/compat.h>
#include <linux/bpf.h>
#include <linux/filter.h>


#include "internal.h"
#include "internal.h"


@@ -3407,6 +3409,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
}
}


static void perf_event_free_filter(struct perf_event *event);
static void perf_event_free_filter(struct perf_event *event);
static void perf_event_free_bpf_prog(struct perf_event *event);


static void free_event_rcu(struct rcu_head *head)
static void free_event_rcu(struct rcu_head *head)
{
{
@@ -3416,6 +3419,7 @@ static void free_event_rcu(struct rcu_head *head)
	if (event->ns)
	if (event->ns)
		put_pid_ns(event->ns);
		put_pid_ns(event->ns);
	perf_event_free_filter(event);
	perf_event_free_filter(event);
	perf_event_free_bpf_prog(event);
	kfree(event);
	kfree(event);
}
}


@@ -3928,6 +3932,7 @@ static inline int perf_fget_light(int fd, struct fd *p)
static int perf_event_set_output(struct perf_event *event,
static int perf_event_set_output(struct perf_event *event,
				 struct perf_event *output_event);
				 struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);


static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{
{
@@ -3981,6 +3986,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
	case PERF_EVENT_IOC_SET_FILTER:
	case PERF_EVENT_IOC_SET_FILTER:
		return perf_event_set_filter(event, (void __user *)arg);
		return perf_event_set_filter(event, (void __user *)arg);


	case PERF_EVENT_IOC_SET_BPF:
		return perf_event_set_bpf_prog(event, arg);

	default:
	default:
		return -ENOTTY;
		return -ENOTTY;
	}
	}
@@ -6455,6 +6463,49 @@ static void perf_event_free_filter(struct perf_event *event)
	ftrace_profile_free_filter(event);
	ftrace_profile_free_filter(event);
}
}


static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
	struct bpf_prog *prog;

	if (event->attr.type != PERF_TYPE_TRACEPOINT)
		return -EINVAL;

	if (event->tp_event->prog)
		return -EEXIST;

	if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
		/* bpf programs can only be attached to kprobes */
		return -EINVAL;

	prog = bpf_prog_get(prog_fd);
	if (IS_ERR(prog))
		return PTR_ERR(prog);

	if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) {
		/* valid fd, but invalid bpf program type */
		bpf_prog_put(prog);
		return -EINVAL;
	}

	event->tp_event->prog = prog;

	return 0;
}

static void perf_event_free_bpf_prog(struct perf_event *event)
{
	struct bpf_prog *prog;

	if (!event->tp_event)
		return;

	prog = event->tp_event->prog;
	if (prog) {
		event->tp_event->prog = NULL;
		bpf_prog_put(prog);
	}
}

#else
#else


static inline void perf_tp_register(void)
static inline void perf_tp_register(void)
@@ -6470,6 +6521,14 @@ static void perf_event_free_filter(struct perf_event *event)
{
{
}
}


static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
	return -ENOENT;
}

static void perf_event_free_bpf_prog(struct perf_event *event)
{
}
#endif /* CONFIG_EVENT_TRACING */
#endif /* CONFIG_EVENT_TRACING */


#ifdef CONFIG_HAVE_HW_BREAKPOINT
#ifdef CONFIG_HAVE_HW_BREAKPOINT
Loading