Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e87c6bc3 authored by Yonghong Song's avatar Yonghong Song Committed by David S. Miller
Browse files

bpf: permit multiple bpf attachments for a single perf event



This patch enables multiple bpf attachments for a
kprobe/uprobe/tracepoint single trace event.
Each trace_event keeps a list of attached perf events.
When an event happens, all attached bpf programs will
be executed based on the order of attachment.

A global bpf_event_mutex lock is introduced to protect
prog_array attaching and detaching. An alternative will
be introduce a mutex lock in every trace_event_call
structure, but it takes a lot of extra memory.
So a global bpf_event_mutex lock is a good compromise.

The bpf prog detachment involves allocation of memory.
If the allocation fails, a dummy do-nothing program
will replace to-be-detached program in-place.

Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0b4c6841
Loading
Loading
Loading
Loading
+25 −5
Original line number Diff line number Diff line
@@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
				__u32 __user *prog_ids, u32 cnt);

#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
				struct bpf_prog *old_prog);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
			struct bpf_prog *exclude_prog,
			struct bpf_prog *include_prog,
			struct bpf_prog_array **new_array);

#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
	({						\
		struct bpf_prog **_prog;		\
		struct bpf_prog **_prog, *__prog;	\
		struct bpf_prog_array *_array;		\
		u32 _ret = 1;				\
		rcu_read_lock();			\
		_prog = rcu_dereference(array)->progs;	\
		for (; *_prog; _prog++)			\
			_ret &= func(*_prog, ctx);	\
		_array = rcu_dereference(array);	\
		if (unlikely(check_non_null && !_array))\
			goto _out;			\
		_prog = _array->progs;			\
		while ((__prog = READ_ONCE(*_prog))) {	\
			_ret &= func(__prog, ctx);	\
			_prog++;			\
		}					\
_out:							\
		rcu_read_unlock();			\
		_ret;					\
	 })

#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
	__BPF_PROG_RUN_ARRAY(array, ctx, func, false)

#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)	\
	__BPF_PROG_RUN_ARRAY(array, ctx, func, true)

#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

+39 −4
Original line number Diff line number Diff line
@@ -271,14 +271,37 @@ struct trace_event_call {
#ifdef CONFIG_PERF_EVENTS
	int				perf_refcount;
	struct hlist_head __percpu	*perf_events;
	struct bpf_prog			*prog;
	struct perf_event		*bpf_prog_owner;
	struct bpf_prog_array __rcu	*prog_array;

	int	(*perf_perm)(struct trace_event_call *,
			     struct perf_event *);
#endif
};

#ifdef CONFIG_PERF_EVENTS
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
{
	/*
	 * This inline function checks whether call->prog_array
	 * is valid or not. The function is called in various places,
	 * outside rcu_read_lock/unlock, as a heuristic to speed up execution.
	 *
	 * If this function returns true, and later call->prog_array
	 * becomes false inside rcu_read_lock/unlock region,
	 * we bail out then. If this function return false,
	 * there is a risk that we might miss a few events if the checking
	 * were delayed until inside rcu_read_lock/unlock region and
	 * call->prog_array happened to become non-NULL then.
	 *
	 * Here, READ_ONCE() is used instead of rcu_access_pointer().
	 * rcu_access_pointer() requires the actual definition of
	 * "struct bpf_prog_array" while READ_ONCE() only needs
	 * a declaration of the same type.
	 */
	return !!READ_ONCE(call->prog_array);
}
#endif

static inline const char *
trace_event_name(struct trace_event_call *call)
{
@@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
}

#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
	return 1;
}

static inline int
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}

static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }

#endif

enum {
@@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
{
	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
}

#endif

#endif /* _LINUX_TRACE_EVENT_H */
+3 −3
Original line number Diff line number Diff line
@@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \
	struct trace_event_call *event_call = __data;			\
	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
	struct trace_event_raw_##call *entry;				\
	struct bpf_prog *prog = event_call->prog;			\
	struct pt_regs *__regs;						\
	u64 __count = 1;						\
	struct task_struct *__task = NULL;				\
@@ -46,7 +45,8 @@ perf_trace_##call(void *__data, proto) \
	__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
									\
	head = this_cpu_ptr(event_call->perf_events);			\
	if (!prog && __builtin_constant_p(!__task) && !__task &&	\
	if (!bpf_prog_array_valid(event_call) &&			\
	    __builtin_constant_p(!__task) && !__task &&			\
	    hlist_empty(head))						\
		return;							\
									\
+81 −0
Original line number Diff line number Diff line
@@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

static unsigned int __bpf_prog_ret1(const void *ctx,
				    const struct bpf_insn *insn)
{
	return 1;
}

static struct bpf_prog_dummy {
	struct bpf_prog prog;
} dummy_bpf_prog = {
	.prog = {
		.bpf_func = __bpf_prog_ret1,
	},
};

/* to avoid allocating empty bpf_prog_array for cgroups that
 * don't have bpf program attached use one global 'empty_prog_array'
 * It will not be modified the caller of bpf_prog_array_alloc()
@@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
	return 0;
}

void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
				struct bpf_prog *old_prog)
{
	struct bpf_prog **prog = progs->progs;

	for (; *prog; prog++)
		if (*prog == old_prog) {
			WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
			break;
		}
}

int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
			struct bpf_prog *exclude_prog,
			struct bpf_prog *include_prog,
			struct bpf_prog_array **new_array)
{
	int new_prog_cnt, carry_prog_cnt = 0;
	struct bpf_prog **existing_prog;
	struct bpf_prog_array *array;
	int new_prog_idx = 0;

	/* Figure out how many existing progs we need to carry over to
	 * the new array.
	 */
	if (old_array) {
		existing_prog = old_array->progs;
		for (; *existing_prog; existing_prog++) {
			if (*existing_prog != exclude_prog &&
			    *existing_prog != &dummy_bpf_prog.prog)
				carry_prog_cnt++;
			if (*existing_prog == include_prog)
				return -EEXIST;
		}
	}

	/* How many progs (not NULL) will be in the new array? */
	new_prog_cnt = carry_prog_cnt;
	if (include_prog)
		new_prog_cnt += 1;

	/* Do we have any prog (not NULL) in the new array? */
	if (!new_prog_cnt) {
		*new_array = NULL;
		return 0;
	}

	/* +1 as the end of prog_array is marked with NULL */
	array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
	if (!array)
		return -ENOMEM;

	/* Fill in the new prog array */
	if (carry_prog_cnt) {
		existing_prog = old_array->progs;
		for (; *existing_prog; existing_prog++)
			if (*existing_prog != exclude_prog &&
			    *existing_prog != &dummy_bpf_prog.prog)
				array->progs[new_prog_idx++] = *existing_prog;
	}
	if (include_prog)
		array->progs[new_prog_idx++] = include_prog;
	array->progs[new_prog_idx] = NULL;
	*new_array = array;
	return 0;
}

static void bpf_prog_free_deferred(struct work_struct *work)
{
	struct bpf_prog_aux *aux;
+8 −18
Original line number Diff line number Diff line
@@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
			       struct pt_regs *regs, struct hlist_head *head,
			       struct task_struct *task)
{
	struct bpf_prog *prog = call->prog;

	if (prog) {
	if (bpf_prog_array_valid(call)) {
		*(struct pt_regs **)raw_data = regs;
		if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
		if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
			perf_swevent_put_recursion_context(rctx);
			return;
		}
@@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
	bool is_kprobe, is_tracepoint, is_syscall_tp;
	struct bpf_prog *prog;
	int ret;

	if (event->attr.type != PERF_TYPE_TRACEPOINT)
		return perf_event_set_bpf_handler(event, prog_fd);

	if (event->tp_event->prog)
		return -EEXIST;

	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
	is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
	is_syscall_tp = is_syscall_trace_event(event->tp_event);
@@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
			return -EACCES;
		}
	}
	event->tp_event->prog = prog;
	event->tp_event->bpf_prog_owner = event;

	return 0;
	ret = perf_event_attach_bpf_prog(event, prog);
	if (ret)
		bpf_prog_put(prog);
	return ret;
}

static void perf_event_free_bpf_prog(struct perf_event *event)
{
	struct bpf_prog *prog;

	if (event->attr.type != PERF_TYPE_TRACEPOINT) {
		perf_event_free_bpf_handler(event);
		return;
	}

	prog = event->tp_event->prog;
	if (prog && event->tp_event->bpf_prog_owner == event) {
		event->tp_event->prog = NULL;
		bpf_prog_put(prog);
	}
	perf_event_detach_bpf_prog(event);
}

#else
Loading