Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f60ad0a0 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_get_stack'



Yonghong Song says:

====================
Currently, stackmap and bpf_get_stackid helper are provided
for bpf program to get the stack trace. This approach has
a limitation though. If two stack traces have the same hash,
only one will get stored in the stackmap table regardless of
whether BPF_F_REUSE_STACKID is specified or not,
so some stack traces may be missing from user perspective.

This patch implements a new helper, bpf_get_stack, will
send stack traces directly to bpf program. The bpf program
is able to see all stack traces, and then can do in-kernel
processing or send stack traces to user space through
shared map or bpf_perf_event_output.

Patches #1 and #2 implemented the core kernel support.
Patch #3 removes two never-hit branches in verifier.
Patches #4 and #5 are two verifier improves to make
bpf programming easier. Patch #6 synced the new helper
to tools headers. Patch #7 moved perf_event polling code
and ksym lookup code from samples/bpf to
tools/testing/selftests/bpf. Patch #8 added a verifier
test in tools/bpf for new verifier change.
Patches #9 and #10 added tests for raw tracepoint prog
and tracepoint prog respectively.

Changelogs:
  v8 -> v9:
    . make function perf_event_mmap (in trace_helpers.c) extern
      to decouple perf_event_mmap and perf_event_poller.
    . add jit enabled handling for kernel stack verification
      in Patch #9. Since we did not have a good way to
      verify jit enabled kernel stack, just return true if
      the kernel stack is not empty.
    . In path #9, using raw_syscalls/sys_enter instead of
      sched/sched_switch, removed calling cmd
      "task 1 dd if=/dev/zero of=/dev/null" which is left
      with dangling process after the program exited.
  v7 -> v8:
    . rebase on top of latest bpf-next
    . simplify BPF_ARSH dst_reg->smin_val/smax_value tracking
    . rewrite the description of bpf_get_stack() in uapi bpf.h
      based on new format.
  v6 -> v7:
    . do perf callchain buffer allocation inside the
      verifier. so if the prog->has_callchain_buf is set,
      it is guaranteed that the buffer has been allocated.
    . change condition "trace_nr <= skip" to "trace_nr < skip"
      so that for zero size buffer, return 0 instead of -EFAULT
  v5 -> v6:
    . after refining return register smax_value and umax_value
      for helpers bpf_get_stack and bpf_probe_read_str,
      bounds and var_off of the return register are further refined.
    . added missing commit message for tools header sync commit.
    . removed one unnecessary empty line.
  v4 -> v5:
    . relied on dst_reg->var_off to refine umin_val/umax_val
      in verifier handling BPF_ARSH value range tracking,
      suggested by Edward.
  v3 -> v4:
    . fixed a bug when meta ptr is set to NULL in check_func_arg.
    . introduced tnum_arshift and added detailed comments for
      the underlying implementation
    . avoided using VLA in tools/bpf test_progs.
  v2 -> v3:
    . used meta to track helper memory size argument
    . implemented range checking for ARSH in verifier
    . moved perf event polling and ksym related functions
      from samples/bpf to tools/bpf
    . added test to compare build id's between bpf_get_stackid
      and bpf_get_stack
  v1 -> v2:
    . fixed compilation error when CONFIG_PERF_EVENTS is not enabled
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 2c25fc9a 79b45350
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -692,6 +692,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;

/* Shared helpers among cBPF and eBPF. */
+2 −1
Original line number Diff line number Diff line
@@ -468,7 +468,8 @@ struct bpf_prog {
				dst_needed:1,	/* Do we need dst entry? */
				blinded:1,	/* Was blinded */
				is_func:1,	/* program is a bpf function */
				kprobe_override:1; /* Do we override a kprobe? */
				kprobe_override:1, /* Do we override a kprobe? */
				has_callchain_buf:1; /* callchain buffer allocated? */
	enum bpf_prog_type	type;		/* Type of BPF program */
	enum bpf_attach_type	expected_attach_type; /* For some prog types */
	u32			len;		/* Number of filter blocks */
+3 −1
Original line number Diff line number Diff line
@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
/* Arithmetic and logical ops */
/* Shift a tnum left (by a fixed shift) */
struct tnum tnum_lshift(struct tnum a, u8 shift);
/* Shift a tnum right (by a fixed shift) */
/* Shift (rsh) a tnum right (by a fixed shift) */
struct tnum tnum_rshift(struct tnum a, u8 shift);
/* Shift (arsh) a tnum right (by a fixed min_shift) */
struct tnum tnum_arshift(struct tnum a, u8 min_shift);
/* Add two tnums, return @a + @b */
struct tnum tnum_add(struct tnum a, struct tnum b);
/* Subtract two tnums, return @a - @b */
+40 −2
Original line number Diff line number Diff line
@@ -1767,6 +1767,40 @@ union bpf_attr {
 * 		**CONFIG_XFRM** configuration option.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
 * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
 * 	Description
 *		Return a user or a kernel stack in bpf program provided buffer.
 *		To achieve this, the helper needs *ctx*, which is a pointer
 *		to the context on which the tracing program is executed.
 *		To store the stacktrace, the bpf program provides *buf* with
 *		a nonnegative *size*.
 *
 *		The last argument, *flags*, holds the number of stack frames to
 *		skip (from 0 to 255), masked with
 *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
 *		the following flags:
 *
 *		**BPF_F_USER_STACK**
 *			Collect a user space stack instead of a kernel stack.
 *		**BPF_F_USER_BUILD_ID**
 *			Collect buildid+offset instead of ips for user stack,
 *			only valid if **BPF_F_USER_STACK** is also specified.
 *
 *		**bpf_get_stack**\ () can collect up to
 *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
 *		to sufficient large buffer size. Note that
 *		this limit can be controlled with the **sysctl** program, and
 *		that it should be manually increased in order to profile long
 *		user stacks (such as stacks for Java programs). To do so, use:
 *
 *	::
 *
 *		# sysctl kernel.perf_event_max_stack=<new value>
 *
 * 	Return
 * 		a non-negative value equal to or less than size on success, or
 * 		a negative error in case of failure.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -1835,7 +1869,8 @@ union bpf_attr {
	FN(msg_pull_data),		\
	FN(bind),			\
	FN(xdp_adjust_tail),		\
	FN(skb_get_xfrm_state),
	FN(skb_get_xfrm_state),		\
	FN(get_stack),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -1869,11 +1904,14 @@ enum bpf_func_id {
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6		(1ULL << 0)

/* BPF_FUNC_get_stackid flags. */
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
#define BPF_F_SKIP_FIELD_MASK		0xffULL
#define BPF_F_USER_STACK		(1ULL << 8)
/* flags used by BPF_FUNC_get_stackid only. */
#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
#define BPF_F_REUSE_STACKID		(1ULL << 10)
/* flags used by BPF_FUNC_get_stack only. */
#define BPF_F_USER_BUILD_ID		(1ULL << 11)

/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+5 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/perf_event.h>

#include <asm/unaligned.h>

@@ -1722,6 +1723,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
	aux = container_of(work, struct bpf_prog_aux, work);
	if (bpf_prog_is_dev_bound(aux))
		bpf_prog_offload_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
	if (aux->prog->has_callchain_buf)
		put_callchain_buffers();
#endif
	for (i = 0; i < aux->func_cnt; i++)
		bpf_jit_free(aux->func[i]);
	if (aux->func_cnt) {
Loading