Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 265229d2 authored by Yonghong Song's avatar Yonghong Song Committed by Connor O'Brien
Browse files

BACKPORT: bpf: permit multiple bpf attachments for a single perf event



This patch enables multiple bpf attachments for a
kprobe/uprobe/tracepoint single trace event.
Each trace_event keeps a list of attached perf events.
When an event happens, all attached bpf programs will
be executed based on the order of attachment.

A global bpf_event_mutex lock is introduced to protect
prog_array attaching and detaching. An alternative will
be introduce a mutex lock in every trace_event_call
structure, but it takes a lot of extra memory.
So a global bpf_event_mutex lock is a good compromise.

The bpf prog detachment involves allocation of memory.
If the allocation fails, a dummy do-nothing program
will replace to-be-detached program in-place.

Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
(cherry picked from commit e87c6bc3852b981e71c757be20771546ce9f76f3)
Signed-off-by: default avatarConnor O'Brien <connoro@google.com>
Bug: 121213201
Bug: 138317270
Test: build & boot cuttlefish; attach 2 progs to 1 tracepoint
Change-Id: I390d8c0146888ddb1aed5a6f6e5dae7ef394ebc9
parent ca76cc64
Loading
Loading
Loading
Loading
+25 −5
Original line number Original line Diff line number Diff line
@@ -261,18 +261,38 @@ struct bpf_prog_array {
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);


#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
				struct bpf_prog *old_prog);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
			struct bpf_prog *exclude_prog,
			struct bpf_prog *include_prog,
			struct bpf_prog_array **new_array);

#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
	({						\
	({						\
		struct bpf_prog **_prog;		\
		struct bpf_prog **_prog, *__prog;	\
		struct bpf_prog_array *_array;		\
		u32 _ret = 1;				\
		u32 _ret = 1;				\
		rcu_read_lock();			\
		rcu_read_lock();			\
		_prog = rcu_dereference(array)->progs;	\
		_array = rcu_dereference(array);	\
		for (; *_prog; _prog++)			\
		if (unlikely(check_non_null && !_array))\
			_ret &= func(*_prog, ctx);	\
			goto _out;			\
		_prog = _array->progs;			\
		while ((__prog = READ_ONCE(*_prog))) {	\
			_ret &= func(__prog, ctx);	\
			_prog++;			\
		}					\
_out:							\
		rcu_read_unlock();			\
		rcu_read_unlock();			\
		_ret;					\
		_ret;					\
	 })
	 })


#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
	__BPF_PROG_RUN_ARRAY(array, ctx, func, false)

#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)	\
	__BPF_PROG_RUN_ARRAY(array, ctx, func, true)

#ifdef CONFIG_BPF_SYSCALL
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
DECLARE_PER_CPU(int, bpf_prog_active);


+39 −4
Original line number Original line Diff line number Diff line
@@ -272,14 +272,37 @@ struct trace_event_call {
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_PERF_EVENTS
	int				perf_refcount;
	int				perf_refcount;
	struct hlist_head __percpu	*perf_events;
	struct hlist_head __percpu	*perf_events;
	struct bpf_prog			*prog;
	struct bpf_prog_array __rcu	*prog_array;
	struct perf_event		*bpf_prog_owner;


	int	(*perf_perm)(struct trace_event_call *,
	int	(*perf_perm)(struct trace_event_call *,
			     struct perf_event *);
			     struct perf_event *);
#endif
#endif
};
};


#ifdef CONFIG_PERF_EVENTS
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
{
	/*
	 * This inline function checks whether call->prog_array
	 * is valid or not. The function is called in various places,
	 * outside rcu_read_lock/unlock, as a heuristic to speed up execution.
	 *
	 * If this function returns true, and later call->prog_array
	 * becomes false inside rcu_read_lock/unlock region,
	 * we bail out then. If this function return false,
	 * there is a risk that we might miss a few events if the checking
	 * were delayed until inside rcu_read_lock/unlock region and
	 * call->prog_array happened to become non-NULL then.
	 *
	 * Here, READ_ONCE() is used instead of rcu_access_pointer().
	 * rcu_access_pointer() requires the actual definition of
	 * "struct bpf_prog_array" while READ_ONCE() only needs
	 * a declaration of the same type.
	 */
	return !!READ_ONCE(call->prog_array);
}
#endif

static inline const char *
static inline const char *
trace_event_name(struct trace_event_call *call)
trace_event_name(struct trace_event_call *call)
{
{
@@ -430,12 +453,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
}
}


#ifdef CONFIG_BPF_EVENTS
#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
#else
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
{
	return 1;
	return 1;
}
}

static inline int
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}

static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }

#endif
#endif


enum {
enum {
@@ -506,6 +540,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
{
{
	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
}
}

#endif
#endif


#endif /* _LINUX_TRACE_EVENT_H */
#endif /* _LINUX_TRACE_EVENT_H */
+3 −3
Original line number Original line Diff line number Diff line
@@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \
	struct trace_event_call *event_call = __data;			\
	struct trace_event_call *event_call = __data;			\
	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
	struct trace_event_raw_##call *entry;				\
	struct trace_event_raw_##call *entry;				\
	struct bpf_prog *prog = event_call->prog;			\
	struct pt_regs *__regs;						\
	struct pt_regs *__regs;						\
	u64 __count = 1;						\
	u64 __count = 1;						\
	struct task_struct *__task = NULL;				\
	struct task_struct *__task = NULL;				\
@@ -46,7 +45,8 @@ perf_trace_##call(void *__data, proto) \
	__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
	__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
									\
									\
	head = this_cpu_ptr(event_call->perf_events);			\
	head = this_cpu_ptr(event_call->perf_events);			\
	if (!prog && __builtin_constant_p(!__task) && !__task &&	\
	if (!bpf_prog_array_valid(event_call) &&			\
	    __builtin_constant_p(!__task) && !__task &&			\
	    hlist_empty(head))						\
	    hlist_empty(head))						\
		return;							\
		return;							\
									\
									\
+81 −0
Original line number Original line Diff line number Diff line
@@ -1069,6 +1069,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
}
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);


static unsigned int __bpf_prog_ret1(const struct sk_buff *ctx,
				    const struct bpf_insn *insn)
{
	return 1;
}

static struct bpf_prog_dummy {
	struct bpf_prog prog;
} dummy_bpf_prog = {
	.prog = {
		.bpf_func = __bpf_prog_ret1,
	},
};

/* to avoid allocating empty bpf_prog_array for cgroups that
/* to avoid allocating empty bpf_prog_array for cgroups that
 * don't have bpf program attached use one global 'empty_prog_array'
 * don't have bpf program attached use one global 'empty_prog_array'
 * It will not be modified the caller of bpf_prog_array_alloc()
 * It will not be modified the caller of bpf_prog_array_alloc()
@@ -1100,6 +1114,73 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
	kfree_rcu(progs, rcu);
	kfree_rcu(progs, rcu);
}
}


void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
				struct bpf_prog *old_prog)
{
	struct bpf_prog **prog = progs->progs;

	for (; *prog; prog++)
		if (*prog == old_prog) {
			WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
			break;
		}
}

int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
			struct bpf_prog *exclude_prog,
			struct bpf_prog *include_prog,
			struct bpf_prog_array **new_array)
{
	int new_prog_cnt, carry_prog_cnt = 0;
	struct bpf_prog **existing_prog;
	struct bpf_prog_array *array;
	int new_prog_idx = 0;

	/* Figure out how many existing progs we need to carry over to
	 * the new array.
	 */
	if (old_array) {
		existing_prog = old_array->progs;
		for (; *existing_prog; existing_prog++) {
			if (*existing_prog != exclude_prog &&
			    *existing_prog != &dummy_bpf_prog.prog)
				carry_prog_cnt++;
			if (*existing_prog == include_prog)
				return -EEXIST;
		}
	}

	/* How many progs (not NULL) will be in the new array? */
	new_prog_cnt = carry_prog_cnt;
	if (include_prog)
		new_prog_cnt += 1;

	/* Do we have any prog (not NULL) in the new array? */
	if (!new_prog_cnt) {
		*new_array = NULL;
		return 0;
	}

	/* +1 as the end of prog_array is marked with NULL */
	array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
	if (!array)
		return -ENOMEM;

	/* Fill in the new prog array */
	if (carry_prog_cnt) {
		existing_prog = old_array->progs;
		for (; *existing_prog; existing_prog++)
			if (*existing_prog != exclude_prog &&
			    *existing_prog != &dummy_bpf_prog.prog)
				array->progs[new_prog_idx++] = *existing_prog;
	}
	if (include_prog)
		array->progs[new_prog_idx++] = include_prog;
	array->progs[new_prog_idx] = NULL;
	*new_array = array;
	return 0;
}

static void bpf_prog_free_deferred(struct work_struct *work)
static void bpf_prog_free_deferred(struct work_struct *work)
{
{
	struct bpf_prog_aux *aux;
	struct bpf_prog_aux *aux;
+8 −18
Original line number Original line Diff line number Diff line
@@ -7705,11 +7705,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
			       struct pt_regs *regs, struct hlist_head *head,
			       struct pt_regs *regs, struct hlist_head *head,
			       struct task_struct *task)
			       struct task_struct *task)
{
{
	struct bpf_prog *prog = call->prog;
	if (bpf_prog_array_valid(call)) {

	if (prog) {
		*(struct pt_regs **)raw_data = regs;
		*(struct pt_regs **)raw_data = regs;
		if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
		if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
			perf_swevent_put_recursion_context(rctx);
			perf_swevent_put_recursion_context(rctx);
			return;
			return;
		}
		}
@@ -7894,6 +7892,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
{
	bool is_kprobe, is_tracepoint;
	bool is_kprobe, is_tracepoint;
	struct bpf_prog *prog;
	struct bpf_prog *prog;
	int ret;


	if (event->attr.type == PERF_TYPE_HARDWARE ||
	if (event->attr.type == PERF_TYPE_HARDWARE ||
	    event->attr.type == PERF_TYPE_SOFTWARE)
	    event->attr.type == PERF_TYPE_SOFTWARE)
@@ -7902,9 +7901,6 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
	if (event->attr.type != PERF_TYPE_TRACEPOINT)
	if (event->attr.type != PERF_TYPE_TRACEPOINT)
		return -EINVAL;
		return -EINVAL;


	if (event->tp_event->prog)
		return -EEXIST;

	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
	is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
	is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
	if (!is_kprobe && !is_tracepoint)
	if (!is_kprobe && !is_tracepoint)
@@ -7930,26 +7926,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
			return -EACCES;
			return -EACCES;
		}
		}
	}
	}
	event->tp_event->prog = prog;
	event->tp_event->bpf_prog_owner = event;


	return 0;
	ret = perf_event_attach_bpf_prog(event, prog);
	if (ret)
		bpf_prog_put(prog);
	return ret;
}
}


static void perf_event_free_bpf_prog(struct perf_event *event)
static void perf_event_free_bpf_prog(struct perf_event *event)
{
{
	struct bpf_prog *prog;

	if (event->attr.type != PERF_TYPE_TRACEPOINT) {
	if (event->attr.type != PERF_TYPE_TRACEPOINT) {
		perf_event_free_bpf_handler(event);
		perf_event_free_bpf_handler(event);
		return;
		return;
	}
	}

	perf_event_detach_bpf_prog(event);
	prog = event->tp_event->prog;
	if (prog && event->tp_event->bpf_prog_owner == event) {
		event->tp_event->prog = NULL;
		bpf_prog_put(prog);
	}
}
}


#else
#else
Loading