Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 30cfe3b4 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-multi-prog-improvements'



Sandipan Das says:

====================
[1] Support for bpf-to-bpf function calls in the powerpc64 JIT compiler.

[2] Provide a way for resolving function calls because of the way JITed
    images are allocated in powerpc64.

[3] Fix to get JITed instruction dumps for multi-function programs from
    the bpf system call.

[4] Fix for bpftool to show delimited multi-function JITed image dumps.

v4:
 - Incorporate review comments from Jakub.
 - Fix JSON output for bpftool.

v3:
 - Change base tree tag to bpf-next.
 - Incorporate review comments from Alexei, Daniel and Jakub.
 - Make sure that the JITed image does not grow or shrink after
   the last pass due to the way the instruction sequence used
   to load a callee's address maybe optimized.
 - Make additional changes to the bpf system call and bpftool to
   make multi-function JITed dumps easier to correlate.

v2:
 - Incorporate review comments from Jakub.
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents a2889a4c f7f62c71
Loading
Loading
Loading
Loading
+89 −21
Original line number Diff line number Diff line
@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)

static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
{
	unsigned int i, ctx_idx = ctx->idx;

	/* Load function address into r12 */
	PPC_LI64(12, func);

	/* For bpf-to-bpf function calls, the callee's address is unknown
	 * until the last extra pass. As seen above, we use PPC_LI64() to
	 * load the callee's address, but this may optimize the number of
	 * instructions required based on the nature of the address.
	 *
	 * Since we don't want the number of instructions emitted to change,
	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
	 * we always have a five-instruction sequence, which is the maximum
	 * that PPC_LI64() can emit.
	 */
	for (i = ctx->idx - ctx_idx; i < 5; i++)
		PPC_NOP();

#ifdef PPC64_ELF_ABI_v1
	/* func points to the function descriptor */
	PPC_LI64(b2p[TMP_REG_2], func);
	/* Load actual entry point from function descriptor */
	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
	/* ... and move it to LR */
	PPC_MTLR(b2p[TMP_REG_1]);
	/*
	 * Load TOC from function descriptor at offset 8.
	 * We can clobber r2 since we get called through a
	 * function pointer (so caller will save/restore r2)
	 * and since we don't use a TOC ourself.
	 */
	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
#else
	/* We can clobber r12 */
	PPC_FUNC_ADDR(12, func);
	PPC_MTLR(12);
	PPC_BPF_LL(2, 12, 8);
	/* Load actual entry point from function descriptor */
	PPC_BPF_LL(12, 12, 0);
#endif

	PPC_MTLR(12);
	PPC_BLRL();
}

@@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
			      struct codegen_context *ctx,
			      u32 *addrs)
			      u32 *addrs, bool extra_pass)
{
	const struct bpf_insn *insn = fp->insnsi;
	int flen = fp->len;
@@ -712,10 +724,24 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
			break;

		/*
		 * Call kernel helper
		 * Call kernel helper or bpf function
		 */
		case BPF_JMP | BPF_CALL:
			ctx->seen |= SEEN_FUNC;

			/* bpf function call */
			if (insn[i].src_reg == BPF_PSEUDO_CALL)
				if (!extra_pass)
					func = NULL;
				else if (fp->aux->func && off < fp->aux->func_cnt)
					/* use the subprog id from the off
					 * field to lookup the callee address
					 */
					func = (u8 *) fp->aux->func[off]->bpf_func;
				else
					return -EINVAL;
			/* kernel helper call */
			else
				func = (u8 *) __bpf_call_base + imm;

			bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -864,6 +890,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
	return 0;
}

struct powerpc64_jit_data {
	struct bpf_binary_header *header;
	u32 *addrs;
	u8 *image;
	u32 proglen;
	struct codegen_context ctx;
};

struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
	u32 proglen;
@@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
	u8 *image = NULL;
	u32 *code_base;
	u32 *addrs;
	struct powerpc64_jit_data *jit_data;
	struct codegen_context cgctx;
	int pass;
	int flen;
@@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
	struct bpf_prog *org_fp = fp;
	struct bpf_prog *tmp_fp;
	bool bpf_blinded = false;
	bool extra_pass = false;

	if (!fp->jit_requested)
		return org_fp;
@@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
		fp = tmp_fp;
	}

	jit_data = fp->aux->jit_data;
	if (!jit_data) {
		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
		if (!jit_data) {
			fp = org_fp;
			goto out;
		}
		fp->aux->jit_data = jit_data;
	}

	flen = fp->len;
	addrs = jit_data->addrs;
	if (addrs) {
		cgctx = jit_data->ctx;
		image = jit_data->image;
		bpf_hdr = jit_data->header;
		proglen = jit_data->proglen;
		alloclen = proglen + FUNCTION_DESCR_SIZE;
		extra_pass = true;
		goto skip_init_ctx;
	}

	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
	if (addrs == NULL) {
		fp = org_fp;
		goto out;
		goto out_addrs;
	}

	memset(&cgctx, 0, sizeof(struct codegen_context));
@@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);

	/* Scouting faux-generate pass 0 */
	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
		/* We hit something illegal or unsupported. */
		fp = org_fp;
		goto out;
		goto out_addrs;
	}

	/*
@@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
			bpf_jit_fill_ill_insns);
	if (!bpf_hdr) {
		fp = org_fp;
		goto out;
		goto out_addrs;
	}

skip_init_ctx:
	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);

	/* Code generation passes 1-2 */
@@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
		/* Now build the prologue, body code & epilogue for real. */
		cgctx.idx = 0;
		bpf_jit_build_prologue(code_base, &cgctx);
		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
		bpf_jit_build_epilogue(code_base, &cgctx);

		if (bpf_jit_enable > 1)
@@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
	fp->jited_len = alloclen;

	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));

out:
	if (!fp->is_func || extra_pass) {
out_addrs:
		kfree(addrs);
		kfree(jit_data);
		fp->aux->jit_data = NULL;
	} else {
		jit_data->addrs = addrs;
		jit_data->ctx = cgctx;
		jit_data->proglen = proglen;
		jit_data->image = image;
		jit_data->header = bpf_hdr;
	}

out:
	if (bpf_blinded)
		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);

+4 −0
Original line number Diff line number Diff line
@@ -2205,6 +2205,10 @@ struct bpf_prog_info {
	__u32 gpl_compatible:1;
	__u64 netns_dev;
	__u64 netns_ino;
	__u32 nr_jited_ksyms;
	__u32 nr_jited_func_lens;
	__aligned_u64 jited_ksyms;
	__aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));

struct bpf_map_info {
+79 −3
Original line number Diff line number Diff line
@@ -1933,6 +1933,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
	if (!capable(CAP_SYS_ADMIN)) {
		info.jited_prog_len = 0;
		info.xlated_prog_len = 0;
		info.nr_jited_ksyms = 0;
		goto done;
	}

@@ -1969,18 +1970,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
	 * for offload.
	 */
	ulen = info.jited_prog_len;
	if (prog->aux->func_cnt) {
		u32 i;

		info.jited_prog_len = 0;
		for (i = 0; i < prog->aux->func_cnt; i++)
			info.jited_prog_len += prog->aux->func[i]->jited_len;
	} else {
		info.jited_prog_len = prog->jited_len;
	}

	if (info.jited_prog_len && ulen) {
		if (bpf_dump_raw_ok()) {
			uinsns = u64_to_user_ptr(info.jited_prog_insns);
			ulen = min_t(u32, info.jited_prog_len, ulen);

			/* for multi-function programs, copy the JITed
			 * instructions for all the functions
			 */
			if (prog->aux->func_cnt) {
				u32 len, free, i;
				u8 *img;

				free = ulen;
				for (i = 0; i < prog->aux->func_cnt; i++) {
					len = prog->aux->func[i]->jited_len;
					len = min_t(u32, len, free);
					img = (u8 *) prog->aux->func[i]->bpf_func;
					if (copy_to_user(uinsns, img, len))
						return -EFAULT;
					uinsns += len;
					free -= len;
					if (!free)
						break;
				}
			} else {
				if (copy_to_user(uinsns, prog->bpf_func, ulen))
					return -EFAULT;
			}
		} else {
			info.jited_prog_insns = 0;
		}
	}

	ulen = info.nr_jited_ksyms;
	info.nr_jited_ksyms = prog->aux->func_cnt;
	if (info.nr_jited_ksyms && ulen) {
		if (bpf_dump_raw_ok()) {
			u64 __user *user_ksyms;
			ulong ksym_addr;
			u32 i;

			/* copy the address of the kernel symbol
			 * corresponding to each function
			 */
			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
			for (i = 0; i < ulen; i++) {
				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
				ksym_addr &= PAGE_MASK;
				if (put_user((u64) ksym_addr, &user_ksyms[i]))
					return -EFAULT;
			}
		} else {
			info.jited_ksyms = 0;
		}
	}

	ulen = info.nr_jited_func_lens;
	info.nr_jited_func_lens = prog->aux->func_cnt;
	if (info.nr_jited_func_lens && ulen) {
		if (bpf_dump_raw_ok()) {
			u32 __user *user_lens;
			u32 func_len, i;

			/* copy the JITed image lengths for each function */
			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
			user_lens = u64_to_user_ptr(info.jited_func_lens);
			for (i = 0; i < ulen; i++) {
				func_len = prog->aux->func[i]->jited_len;
				if (put_user(func_len, &user_lens[i]))
					return -EFAULT;
			}
		} else {
			info.jited_func_lens = 0;
		}
	}

done:
	if (copy_to_user(uinfo, &info, info_len) ||
	    put_user(info_len, &uattr->info.info_len))
+15 −7
Original line number Diff line number Diff line
@@ -5383,11 +5383,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
			    insn->src_reg != BPF_PSEUDO_CALL)
				continue;
			subprog = insn->off;
			insn->off = 0;
			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
				func[subprog]->bpf_func -
				__bpf_call_base;
		}

		/* we use the aux data to keep a list of the start addresses
		 * of the JITed images for each function in the program
		 *
		 * for some architectures, such as powerpc64, the imm field
		 * might not be large enough to hold the offset of the start
		 * address of the callee's JITed image from __bpf_call_base
		 *
		 * in such cases, we can lookup the start address of a callee
		 * by using its subprog id, available from the off field of
		 * the call instruction, as an index for this list
		 */
		func[i]->aux->func = func;
		func[i]->aux->func_cnt = env->subprog_cnt;
	}
	for (i = 0; i < env->subprog_cnt; i++) {
		old_bpf_func = func[i]->bpf_func;
@@ -5413,17 +5426,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
	 * later look the same as if they were interpreted only.
	 */
	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
		unsigned long addr;

		if (insn->code != (BPF_JMP | BPF_CALL) ||
		    insn->src_reg != BPF_PSEUDO_CALL)
			continue;
		insn->off = env->insn_aux_data[i].call_imm;
		subprog = find_subprog(env, i + insn->off + 1);
		addr  = (unsigned long)func[subprog]->bpf_func;
		addr &= PAGE_MASK;
		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
			    addr - __bpf_call_base;
		insn->imm = subprog;
	}

	prog->jited = 1;
+96 −1
Original line number Diff line number Diff line
@@ -420,7 +420,11 @@ static int do_show(int argc, char **argv)

static int do_dump(int argc, char **argv)
{
	unsigned long *func_ksyms = NULL;
	struct bpf_prog_info info = {};
	unsigned int *func_lens = NULL;
	unsigned int nr_func_ksyms;
	unsigned int nr_func_lens;
	struct dump_data dd = {};
	__u32 len = sizeof(info);
	unsigned int buf_size;
@@ -496,10 +500,34 @@ static int do_dump(int argc, char **argv)
		return -1;
	}

	nr_func_ksyms = info.nr_jited_ksyms;
	if (nr_func_ksyms) {
		func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
		if (!func_ksyms) {
			p_err("mem alloc failed");
			close(fd);
			goto err_free;
		}
	}

	nr_func_lens = info.nr_jited_func_lens;
	if (nr_func_lens) {
		func_lens = malloc(nr_func_lens * sizeof(__u32));
		if (!func_lens) {
			p_err("mem alloc failed");
			close(fd);
			goto err_free;
		}
	}

	memset(&info, 0, sizeof(info));

	*member_ptr = ptr_to_u64(buf);
	*member_len = buf_size;
	info.jited_ksyms = ptr_to_u64(func_ksyms);
	info.nr_jited_ksyms = nr_func_ksyms;
	info.jited_func_lens = ptr_to_u64(func_lens);
	info.nr_jited_func_lens = nr_func_lens;

	err = bpf_obj_get_info_by_fd(fd, &info, &len);
	close(fd);
@@ -513,6 +541,16 @@ static int do_dump(int argc, char **argv)
		goto err_free;
	}

	if (info.nr_jited_ksyms > nr_func_ksyms) {
		p_err("too many addresses returned");
		goto err_free;
	}

	if (info.nr_jited_func_lens > nr_func_lens) {
		p_err("too many values returned");
		goto err_free;
	}

	if ((member_len == &info.jited_prog_len &&
	     info.jited_prog_insns == 0) ||
	    (member_len == &info.xlated_prog_len &&
@@ -550,7 +588,57 @@ static int do_dump(int argc, char **argv)
				goto err_free;
		}

		if (info.nr_jited_func_lens && info.jited_func_lens) {
			struct kernel_sym *sym = NULL;
			char sym_name[SYM_MAX_NAME];
			unsigned char *img = buf;
			__u64 *ksyms = NULL;
			__u32 *lens;
			__u32 i;

			if (info.nr_jited_ksyms) {
				kernel_syms_load(&dd);
				ksyms = (__u64 *) info.jited_ksyms;
			}

			if (json_output)
				jsonw_start_array(json_wtr);

			lens = (__u32 *) info.jited_func_lens;
			for (i = 0; i < info.nr_jited_func_lens; i++) {
				if (ksyms) {
					sym = kernel_syms_search(&dd, ksyms[i]);
					if (sym)
						sprintf(sym_name, "%s", sym->name);
					else
						sprintf(sym_name, "0x%016llx", ksyms[i]);
				} else {
					strcpy(sym_name, "unknown");
				}

				if (json_output) {
					jsonw_start_object(json_wtr);
					jsonw_name(json_wtr, "name");
					jsonw_string(json_wtr, sym_name);
					jsonw_name(json_wtr, "insns");
				} else {
					printf("%s:\n", sym_name);
				}

				disasm_print_insn(img, lens[i], opcodes, name);
				img += lens[i];

				if (json_output)
					jsonw_end_object(json_wtr);
				else
					printf("\n");
			}

			if (json_output)
				jsonw_end_array(json_wtr);
		} else {
			disasm_print_insn(buf, *member_len, opcodes, name);
		}
	} else if (visual) {
		if (json_output)
			jsonw_null(json_wtr);
@@ -558,6 +646,9 @@ static int do_dump(int argc, char **argv)
			dump_xlated_cfg(buf, *member_len);
	} else {
		kernel_syms_load(&dd);
		dd.nr_jited_ksyms = info.nr_jited_ksyms;
		dd.jited_ksyms = (__u64 *) info.jited_ksyms;

		if (json_output)
			dump_xlated_json(&dd, buf, *member_len, opcodes);
		else
@@ -566,10 +657,14 @@ static int do_dump(int argc, char **argv)
	}

	free(buf);
	free(func_ksyms);
	free(func_lens);
	return 0;

err_free:
	free(buf);
	free(func_ksyms);
	free(func_lens);
	return -1;
}

Loading