Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1f499d6a authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'filter-next'



Alexei Starovoitov says:

====================
internal BPF jit for x64 and JITed seccomp

Internal BPF JIT compiler for x86_64 replaces classic BPF JIT.
Use it in seccomp and in tracing filters (sent as separate patch)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9509b1c1 8f577cad
Loading
Loading
Loading
Loading
+18 −59
Original line number Diff line number Diff line
@@ -12,13 +12,16 @@

/*
 * Calling convention :
 * rdi : skb pointer
 * rbx : skb pointer (callee saved)
 * esi : offset of byte(s) to fetch in skb (can be scratched)
 * r8  : copy of skb->data
 * r10 : copy of skb->data
 * r9d : hlen = skb->len - skb->data_len
 */
#define SKBDATA	%r8
#define SKBDATA	%r10
#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
#define MAX_BPF_STACK (512 /* from filter.h */ + \
	32 /* space for rbx,r13,r14,r15 */ + \
	8 /* space for skb_copy_bits */)

sk_load_word:
	.globl	sk_load_word
@@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
	movzbl	(SKBDATA,%rsi),%eax
	ret

/**
 * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
 *
 * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
 * Must preserve A accumulator (%eax)
 * Inputs : %esi is the offset value
 */
sk_load_byte_msh:
	.globl	sk_load_byte_msh
	test	%esi,%esi
	js	bpf_slow_path_byte_msh_neg

sk_load_byte_msh_positive_offset:
	.globl	sk_load_byte_msh_positive_offset
	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
	jle	bpf_slow_path_byte_msh
	movzbl	(SKBDATA,%rsi),%ebx
	and	$15,%bl
	shl	$2,%bl
	ret

/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN)		\
	push	%rdi;    /* save skb */		\
	mov	%rbx, %rdi; /* arg1 == skb */	\
	push	%r9;				\
	push	SKBDATA;			\
/* rsi already has offset */			\
	mov	$LEN,%ecx;	/* len */	\
	lea	-12(%rbp),%rdx;			\
	lea	- MAX_BPF_STACK + 32(%rbp),%rdx;			\
	call	skb_copy_bits;			\
	test    %eax,%eax;			\
	pop	SKBDATA;			\
	pop	%r9;				\
	pop	%rdi
	pop	%r9;


bpf_slow_path_word:
	bpf_slow_path_common(4)
	js	bpf_error
	mov	-12(%rbp),%eax
	mov	- MAX_BPF_STACK + 32(%rbp),%eax
	bswap	%eax
	ret

bpf_slow_path_half:
	bpf_slow_path_common(2)
	js	bpf_error
	mov	-12(%rbp),%ax
	mov	- MAX_BPF_STACK + 32(%rbp),%ax
	rol	$8,%ax
	movzwl	%ax,%eax
	ret
@@ -122,21 +103,11 @@ bpf_slow_path_half:
bpf_slow_path_byte:
	bpf_slow_path_common(1)
	js	bpf_error
	movzbl	-12(%rbp),%eax
	ret

bpf_slow_path_byte_msh:
	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
	bpf_slow_path_common(1)
	js	bpf_error
	movzbl	-12(%rbp),%eax
	and	$15,%al
	shl	$2,%al
	xchg	%eax,%ebx
	movzbl	- MAX_BPF_STACK + 32(%rbp),%eax
	ret

#define sk_negative_common(SIZE)				\
	push	%rdi;	/* save skb */				\
	mov	%rbx, %rdi; /* arg1 == skb */			\
	push	%r9;						\
	push	SKBDATA;					\
/* rsi already has offset */					\
@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
	test	%rax,%rax;					\
	pop	SKBDATA;					\
	pop	%r9;						\
	pop	%rdi;						\
	jz	bpf_error


bpf_slow_path_word_neg:
	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
	jl	bpf_error	/* offset lower -> error  */
@@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
	movzbl	(%rax), %eax
	ret

bpf_slow_path_byte_msh_neg:
	cmp	SKF_MAX_NEG_OFF, %esi
	jl	bpf_error
sk_load_byte_msh_negative_offset:
	.globl	sk_load_byte_msh_negative_offset
	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
	sk_negative_common(1)
	movzbl	(%rax),%eax
	and	$15,%al
	shl	$2,%al
	xchg	%eax,%ebx
	ret

bpf_error:
# force a return 0 from jit handler
	xor	%eax,%eax
	mov		-8(%rbp),%rbx
	mov	- MAX_BPF_STACK(%rbp),%rbx
	mov	- MAX_BPF_STACK + 8(%rbp),%r13
	mov	- MAX_BPF_STACK + 16(%rbp),%r14
	mov	- MAX_BPF_STACK + 24(%rbp),%r15
	leaveq
	ret
+775 −624

File changed.

Preview size limit exceeded, changes collapsed.

+3 −0
Original line number Diff line number Diff line
@@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);

u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
void bpf_int_jit_compile(struct sk_filter *fp);

#ifdef CONFIG_BPF_JIT
#include <stdarg.h>
#include <linux/linkage.h>
+20 −9
Original line number Diff line number Diff line
@@ -54,8 +54,7 @@
struct seccomp_filter {
	atomic_t usage;
	struct seccomp_filter *prev;
	unsigned short len;  /* Instruction count */
	struct sock_filter_int insnsi[];
	struct sk_filter *prog;
};

/* Limit any path through the tree to 256KB worth of instructions. */
@@ -189,7 +188,8 @@ static u32 seccomp_run_filters(int syscall)
	 * value always takes priority (ignoring the DATA).
	 */
	for (f = current->seccomp.filter; f; f = f->prev) {
		u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);

		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
			ret = cur_ret;
	}
@@ -215,7 +215,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
		return -EINVAL;

	for (filter = current->seccomp.filter; filter; filter = filter->prev)
		total_insns += filter->len + 4;  /* include a 4 instr penalty */
		total_insns += filter->prog->len + 4;  /* include a 4 instr penalty */
	if (total_insns > MAX_INSNS_PER_PATH)
		return -ENOMEM;

@@ -256,19 +256,27 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)

	/* Allocate a new seccomp_filter */
	ret = -ENOMEM;
	filter = kzalloc(sizeof(struct seccomp_filter) +
			 sizeof(struct sock_filter_int) * new_len,
	filter = kzalloc(sizeof(struct seccomp_filter),
			 GFP_KERNEL|__GFP_NOWARN);
	if (!filter)
		goto free_prog;

	ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
	if (ret)
	filter->prog = kzalloc(sk_filter_size(new_len),
			       GFP_KERNEL|__GFP_NOWARN);
	if (!filter->prog)
		goto free_filter;

	ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
	if (ret)
		goto free_filter_prog;
	kfree(fp);

	atomic_set(&filter->usage, 1);
	filter->len = new_len;
	filter->prog->len = new_len;
	filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;

	/* JIT internal BPF into native HW instructions */
	bpf_int_jit_compile(filter->prog);

	/*
	 * If there is an existing filter, make it the prev and don't drop its
@@ -278,6 +286,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
	current->seccomp.filter = filter;
	return 0;

free_filter_prog:
	kfree(filter->prog);
free_filter:
	kfree(filter);
free_prog:
@@ -330,6 +340,7 @@ void put_seccomp_filter(struct task_struct *tsk)
	while (orig && atomic_dec_and_test(&orig->usage)) {
		struct seccomp_filter *freeme = orig;
		orig = orig->prev;
		bpf_jit_free(freeme->prog);
		kfree(freeme);
	}
}
+8 −1
Original line number Diff line number Diff line
@@ -1524,6 +1524,10 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
	return ERR_PTR(err);
}

void __weak bpf_int_jit_compile(struct sk_filter *prog)
{
}

static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
					     struct sock *sk)
{
@@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
	/* JIT compiler couldn't process this filter, so do the
	 * internal BPF translation for the optimized interpreter.
	 */
	if (!fp->jited)
	if (!fp->jited) {
		fp = __sk_migrate_filter(fp, sk);

		/* Probe if internal BPF can be jit-ed */
		bpf_int_jit_compile(fp);
	}
	return fp;
}