Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bae77c5e authored by Song Liu's avatar Song Liu Committed by Daniel Borkmann
Browse files

bpf: enable stackmap with build_id in nmi context



Currently, we cannot parse build_id in nmi context because of
up_read(&current->mm->mmap_sem), this makes stackmap with build_id
less useful. This patch enables parsing build_id in nmi by putting
the up_read() call in irq_work. To avoid memory allocation in nmi
context, we use per cpu variable for the irq_work. As a result, only
one irq_work per cpu is allowed. If the irq_work is in-use, we
fallback to only report ips.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent a84880ef
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1391,6 +1391,7 @@ config BPF_SYSCALL
	bool "Enable bpf() system call"
	select ANON_INODES
	select BPF
	select IRQ_WORK
	default n
	help
	  Enable the bpf() system call that allows to manipulate eBPF
+53 −6
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/perf_event.h>
#include <linux/elf.h>
#include <linux/pagemap.h>
#include <linux/irq_work.h>
#include "percpu_freelist.h"

#define STACK_CREATE_FLAG_MASK					\
@@ -32,6 +33,23 @@ struct bpf_stack_map {
	struct stack_map_bucket *buckets[];
};

/* irq_work to run up_read() for build_id lookup in nmi context */
struct stack_map_irq_work {
	struct irq_work irq_work;
	struct rw_semaphore *sem;
};

static void do_up_read(struct irq_work *entry)
{
	struct stack_map_irq_work *work;

	work = container_of(entry, struct stack_map_irq_work, irq_work);
	up_read(work->sem);
	work->sem = NULL;
}

static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);

static inline bool stack_map_use_build_id(struct bpf_map *map)
{
	return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
{
	int i;
	struct vm_area_struct *vma;
	bool in_nmi_ctx = in_nmi();
	bool irq_work_busy = false;
	struct stack_map_irq_work *work;

	if (in_nmi_ctx) {
		work = this_cpu_ptr(&up_read_work);
		if (work->irq_work.flags & IRQ_WORK_BUSY)
			/* cannot queue more up_read, fallback */
			irq_work_busy = true;
	}

	/*
	 * We cannot do up_read() in nmi context, so build_id lookup is
	 * only supported for non-nmi events. If at some point, it is
	 * possible to run find_vma() without taking the semaphore, we
	 * would like to allow build_id lookup in nmi context.
	 * We cannot do up_read() in nmi context. To do build_id lookup
	 * in nmi context, we need to run up_read() in irq_work. We use
	 * a percpu variable to do the irq_work. If the irq_work is
	 * already used by another lookup, we fall back to report ips.
	 *
	 * Same fallback is used for kernel stack (!user) on a stackmap
	 * with build_id.
	 */
	if (!user || !current || !current->mm || in_nmi() ||
	if (!user || !current || !current->mm || irq_work_busy ||
	    down_read_trylock(&current->mm->mmap_sem) == 0) {
		/* cannot access current->mm, fall back to ips */
		for (i = 0; i < trace_nr; i++) {
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
			- vma->vm_start;
		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
	}

	if (!in_nmi_ctx) {
		up_read(&current->mm->mmap_sem);
	} else {
		work->sem = &current->mm->mmap_sem;
		irq_work_queue(&work->irq_work);
	}
}

BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
	.map_update_elem = stack_map_update_elem,
	.map_delete_elem = stack_map_delete_elem,
};

static int __init stack_map_init(void)
{
	int cpu;
	struct stack_map_irq_work *work;

	for_each_possible_cpu(cpu) {
		work = per_cpu_ptr(&up_read_work, cpu);
		init_irq_work(&work->irq_work, do_up_read);
	}
	return 0;
}
subsys_initcall(stack_map_init);