Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb40c9dd authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-jit-cleanups'



Daniel Borkmann says:

====================
This series follows up mostly with with some minor cleanups on top
of 'Move ld_abs/ld_ind to native BPF' as well as implements better
32/64 bit immediate load into register and saves tail call init on
cBPF for the arm64 JIT. Last but not least we add a couple of test
cases. For details please see individual patches. Thanks!

v1 -> v2:
  - Minor fix in i64_i16_blocks() to remove 24 shift.
  - Added last two patches.
  - Added Acks from prior round.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 53ea24c2 a82d8cd3
Loading
Loading
Loading
Loading
+3 −10
Original line number Diff line number Diff line
@@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size)
#define SCRATCH_SIZE 80

/* total stack size used in JITed code */
#define _STACK_SIZE \
	(ctx->prog->aux->stack_depth + \
	 + SCRATCH_SIZE + \
	 + 4 /* extra for skb_copy_bits buffer */)

#define _STACK_SIZE	(ctx->prog->aux->stack_depth + SCRATCH_SIZE)
#define STACK_SIZE	ALIGN(_STACK_SIZE, STACK_ALIGNMENT)

/* Get the offset of eBPF REGISTERs stored on scratch space. */
#define STACK_VAR(off) (STACK_SIZE-off-4)

/* Offset of skb_copy_bits buffer */
#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
#define STACK_VAR(off) (STACK_SIZE - off)

#if __LINUX_ARM_ARCH__ < 7

+69 −46
Original line number Diff line number Diff line
@@ -21,7 +21,6 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/printk.h>
#include <linux/skbuff.h>
#include <linux/slab.h>

#include <asm/byteorder.h>
@@ -80,37 +79,6 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
	ctx->idx++;
}

static inline void emit_a64_mov_i64(const int reg, const u64 val,
				    struct jit_ctx *ctx)
{
	u64 tmp = val;
	int shift = 0;

	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
	tmp >>= 16;
	shift += 16;
	while (tmp) {
		if (tmp & 0xffff)
			emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
		tmp >>= 16;
		shift += 16;
	}
}

static inline void emit_addr_mov_i64(const int reg, const u64 val,
				     struct jit_ctx *ctx)
{
	u64 tmp = val;
	int shift = 0;

	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
	for (;shift < 48;) {
		tmp >>= 16;
		shift += 16;
		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
	}
}

static inline void emit_a64_mov_i(const int is64, const int reg,
				  const s32 val, struct jit_ctx *ctx)
{
@@ -122,6 +90,7 @@ static inline void emit_a64_mov_i(const int is64, const int reg,
			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
		} else {
			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
			if (lo != 0xffff)
				emit(A64_MOVK(is64, reg, lo, 0), ctx);
		}
	} else {
@@ -131,6 +100,59 @@ static inline void emit_a64_mov_i(const int is64, const int reg,
	}
}

static int i64_i16_blocks(const u64 val, bool inverse)
{
	return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
	       (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
	       (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
	       (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
}

static inline void emit_a64_mov_i64(const int reg, const u64 val,
				    struct jit_ctx *ctx)
{
	u64 nrm_tmp = val, rev_tmp = ~val;
	bool inverse;
	int shift;

	if (!(nrm_tmp >> 32))
		return emit_a64_mov_i(0, reg, (u32)val, ctx);

	inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
	shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
					  (fls64(nrm_tmp) - 1)), 16), 0);
	if (inverse)
		emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
	else
		emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
	shift -= 16;
	while (shift >= 0) {
		if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
			emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
		shift -= 16;
	}
}

/*
 * This is an unoptimized 64 immediate emission used for BPF to BPF call
 * addresses. It will always do a full 64 bit decomposition as otherwise
 * more complexity in the last extra pass is required since we previously
 * reserved 4 instructions for the address.
 */
static inline void emit_addr_mov_i64(const int reg, const u64 val,
				     struct jit_ctx *ctx)
{
	u64 tmp = val;
	int shift = 0;

	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
	for (;shift < 48;) {
		tmp >>= 16;
		shift += 16;
		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
	}
}

static inline int bpf2a64_offset(int bpf_to, int bpf_from,
				 const struct jit_ctx *ctx)
{
@@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET 7

static int build_prologue(struct jit_ctx *ctx)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
	const struct bpf_prog *prog = ctx->prog;
	const u8 r6 = bpf2a64[BPF_REG_6];
@@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
	 *                        | ... | BPF prog stack
	 *                        |     |
	 *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
	 *                        |RSVD | JIT scratchpad
	 *                        |RSVD | padding
	 * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
	 *                        |     |
	 *                        | ... | Function call stack
@@ -210,6 +232,7 @@ static int build_prologue(struct jit_ctx *ctx)
	/* Set up BPF prog stack base register */
	emit(A64_MOV(1, fp, A64_SP), ctx);

	if (!ebpf_from_cbpf) {
		/* Initialize tail_call_cnt */
		emit(A64_MOVZ(1, tcc, 0, 0), ctx);

@@ -219,10 +242,9 @@ static int build_prologue(struct jit_ctx *ctx)
				    cur_offset, PROLOGUE_OFFSET);
			return -1;
		}
	}

	/* 4 byte extra for skb_copy_bits buffer */
	ctx->stack_size = prog->aux->stack_depth + 4;
	ctx->stack_size = STACK_ALIGN(ctx->stack_size);
	ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);

	/* Set up function call stack */
	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@@ -786,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	struct bpf_prog *tmp, *orig_prog = prog;
	struct bpf_binary_header *header;
	struct arm64_jit_data *jit_data;
	bool was_classic = bpf_prog_was_classic(prog);
	bool tmp_blinded = false;
	bool extra_pass = false;
	struct jit_ctx ctx;
@@ -840,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
		goto out_off;
	}

	if (build_prologue(&ctx)) {
	if (build_prologue(&ctx, was_classic)) {
		prog = orig_prog;
		goto out_off;
	}
@@ -863,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
	ctx.idx = 0;

	build_prologue(&ctx);
	build_prologue(&ctx, was_classic);

	if (build_body(&ctx)) {
		bpf_jit_binary_free(header);
+0 −26
Original line number Diff line number Diff line
@@ -95,7 +95,6 @@ enum reg_val_type {
 * struct jit_ctx - JIT context
 * @skf:		The sk_filter
 * @stack_size:		eBPF stack size
 * @tmp_offset:		eBPF $sp offset to 8-byte temporary memory
 * @idx:		Instruction index
 * @flags:		JIT flags
 * @offsets:		Instruction offsets
@@ -105,7 +104,6 @@ enum reg_val_type {
struct jit_ctx {
	const struct bpf_prog *skf;
	int stack_size;
	int tmp_offset;
	u32 idx;
	u32 flags;
	u32 *offsets;
@@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx)
	locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;

	stack_adjust += locals_size;
	ctx->tmp_offset = locals_size;

	ctx->stack_size = stack_adjust;

@@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
		emit_instr(ctx, lui, reg, upper >> 16);
		emit_instr(ctx, addiu, reg, reg, lower);
	}

}

static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
@@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
	return 0;
}

static void * __must_check
ool_skb_header_pointer(const struct sk_buff *skb, int offset,
		       int len, void *buffer)
{
	return skb_header_pointer(skb, offset, len, buffer);
}

static int size_to_len(const struct bpf_insn *insn)
{
	switch (BPF_SIZE(insn->code)) {
	case BPF_B:
		return 1;
	case BPF_H:
		return 2;
	case BPF_W:
		return 4;
	case BPF_DW:
		return 8;
	}
	return 0;
}

static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
{
	if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {
+0 −1
Original line number Diff line number Diff line
@@ -894,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
	const int i = insn - ctx->prog->insnsi;
	const s16 off = insn->off;
	const s32 imm = insn->imm;
	u32 *func;

	if (insn->src_reg == BPF_REG_FP)
		ctx->saw_frame_pointer = true;
+14 −15
Original line number Diff line number Diff line
@@ -314,7 +314,7 @@ do { \
	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
	EMIT1(0xC3);             /* retq */			\
} while (0)
#else
# else /* !CONFIG_X86_64 */
#  define RETPOLINE_EDX_BPF_JIT()				\
do {								\
	EMIT1_off32(0xE8, 7);	 /* call do_rop */		\
@@ -328,12 +328,11 @@ do { \
} while (0)
# endif
#else /* !CONFIG_RETPOLINE */

# ifdef CONFIG_X86_64
#  define RETPOLINE_RAX_BPF_JIT_SIZE	2
#  define RETPOLINE_RAX_BPF_JIT()				\
	EMIT2(0xFF, 0xE0);       /* jmp *%rax */
#else
# else /* !CONFIG_X86_64 */
#  define RETPOLINE_EDX_BPF_JIT()				\
	EMIT2(0xFF, 0xE2)        /* jmp *%edx */
# endif
Loading