Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 548aacdd authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bpf-ARG_PTR_TO_RAW_STACK'



Merge branch 'bpf-ARG_PTR_TO_RAW_STACK'

Daniel Borkmann says:

====================
BPF updates

This series adds a new verifier argument type called
ARG_PTR_TO_RAW_STACK and converts related helpers to make
use of it. Basic idea is that we can save init of stack
memory when the helper function is guaranteed to fully
fill out the passed buffer in every path. Series also adds
test cases and converts samples. For more details, please
see individual patches.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 486bdee0 3f2050e2
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -66,6 +66,11 @@ enum bpf_arg_type {
	 * functions that access data on eBPF program stack
	 */
	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
	ARG_PTR_TO_RAW_STACK,	/* any pointer to eBPF program stack, area does not
				 * need to be initialized, helper function must fill
				 * all bytes or clear them in error case.
				 */

	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
	ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */

+13 −4
Original line number Diff line number Diff line
@@ -163,17 +163,26 @@ static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
	struct task_struct *task = current;
	char *buf = (char *) (long) r1;

	if (!task)
		return -EINVAL;
	if (unlikely(!task))
		goto err_clear;

	strncpy(buf, task->comm, size);

	strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
	/* Verifier guarantees that size > 0. For task->comm exceeding
	 * size, guarantee that buf is %NUL-terminated. Unconditionally
	 * done here to save the size test.
	 */
	buf[size - 1] = 0;
	return 0;
err_clear:
	memset(buf, 0, size);
	return -EINVAL;
}

const struct bpf_func_proto bpf_get_current_comm_proto = {
	.func		= bpf_get_current_comm,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_STACK,
	.arg1_type	= ARG_PTR_TO_RAW_STACK,
	.arg2_type	= ARG_CONST_STACK_SIZE,
};
+76 −21
Original line number Diff line number Diff line
@@ -205,6 +205,13 @@ struct verifier_env {
#define BPF_COMPLEXITY_LIMIT_INSNS	65536
#define BPF_COMPLEXITY_LIMIT_STACK	1024

struct bpf_call_arg_meta {
	struct bpf_map *map_ptr;
	bool raw_mode;
	int regno;
	int access_size;
};

/* verbose verifier prints what it's seeing
 * bpf_check() is called under lock, so no race to access these global vars
 */
@@ -785,7 +792,8 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
 * and all elements of stack are initialized
 */
static int check_stack_boundary(struct verifier_env *env, int regno,
				int access_size, bool zero_size_allowed)
				int access_size, bool zero_size_allowed,
				struct bpf_call_arg_meta *meta)
{
	struct verifier_state *state = &env->cur_state;
	struct reg_state *regs = state->regs;
@@ -811,6 +819,12 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
		return -EACCES;
	}

	if (meta && meta->raw_mode) {
		meta->access_size = access_size;
		meta->regno = regno;
		return 0;
	}

	for (i = 0; i < access_size; i++) {
		if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
			verbose("invalid indirect read from stack off %d+%d size %d\n",
@@ -822,7 +836,8 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
}

static int check_func_arg(struct verifier_env *env, u32 regno,
			  enum bpf_arg_type arg_type, struct bpf_map **mapp)
			  enum bpf_arg_type arg_type,
			  struct bpf_call_arg_meta *meta)
{
	struct reg_state *reg = env->cur_state.regs + regno;
	enum bpf_reg_type expected_type;
@@ -854,7 +869,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
		expected_type = CONST_PTR_TO_MAP;
	} else if (arg_type == ARG_PTR_TO_CTX) {
		expected_type = PTR_TO_CTX;
	} else if (arg_type == ARG_PTR_TO_STACK) {
	} else if (arg_type == ARG_PTR_TO_STACK ||
		   arg_type == ARG_PTR_TO_RAW_STACK) {
		expected_type = PTR_TO_STACK;
		/* One exception here. In case function allows for NULL to be
		 * passed in as argument, it's a CONST_IMM type. Final test
@@ -862,6 +878,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
		 */
		if (reg->type == CONST_IMM && reg->imm == 0)
			expected_type = CONST_IMM;
		meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
	} else {
		verbose("unsupported arg_type %d\n", arg_type);
		return -EFAULT;
@@ -875,14 +892,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,

	if (arg_type == ARG_CONST_MAP_PTR) {
		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
		*mapp = reg->map_ptr;

		meta->map_ptr = reg->map_ptr;
	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
		/* bpf_map_xxx(..., map_ptr, ..., key) call:
		 * check that [key, key + map->key_size) are within
		 * stack limits and initialized
		 */
		if (!*mapp) {
		if (!meta->map_ptr) {
			/* in function declaration map_ptr must come before
			 * map_key, so that it's verified and known before
			 * we have to check map_key here. Otherwise it means
@@ -891,19 +907,20 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
			verbose("invalid map_ptr to access map->key\n");
			return -EACCES;
		}
		err = check_stack_boundary(env, regno, (*mapp)->key_size,
					   false);
		err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
					   false, NULL);
	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
		/* bpf_map_xxx(..., map_ptr, ..., value) call:
		 * check [value, value + map->value_size) validity
		 */
		if (!*mapp) {
		if (!meta->map_ptr) {
			/* kernel subsystem misconfigured verifier */
			verbose("invalid map_ptr to access map->value\n");
			return -EACCES;
		}
		err = check_stack_boundary(env, regno, (*mapp)->value_size,
					   false);
		err = check_stack_boundary(env, regno,
					   meta->map_ptr->value_size,
					   false, NULL);
	} else if (arg_type == ARG_CONST_STACK_SIZE ||
		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
		bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -918,7 +935,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
			return -EACCES;
		}
		err = check_stack_boundary(env, regno - 1, reg->imm,
					   zero_size_allowed);
					   zero_size_allowed, meta);
	}

	return err;
@@ -949,13 +966,31 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
	return 0;
}

static int check_raw_mode(const struct bpf_func_proto *fn)
{
	int count = 0;

	if (fn->arg1_type == ARG_PTR_TO_RAW_STACK)
		count++;
	if (fn->arg2_type == ARG_PTR_TO_RAW_STACK)
		count++;
	if (fn->arg3_type == ARG_PTR_TO_RAW_STACK)
		count++;
	if (fn->arg4_type == ARG_PTR_TO_RAW_STACK)
		count++;
	if (fn->arg5_type == ARG_PTR_TO_RAW_STACK)
		count++;

	return count > 1 ? -EINVAL : 0;
}

static int check_call(struct verifier_env *env, int func_id)
{
	struct verifier_state *state = &env->cur_state;
	const struct bpf_func_proto *fn = NULL;
	struct reg_state *regs = state->regs;
	struct bpf_map *map = NULL;
	struct reg_state *reg;
	struct bpf_call_arg_meta meta;
	int i, err;

	/* find function prototype */
@@ -978,23 +1013,43 @@ static int check_call(struct verifier_env *env, int func_id)
		return -EINVAL;
	}

	memset(&meta, 0, sizeof(meta));

	/* We only support one arg being in raw mode at the moment, which
	 * is sufficient for the helper functions we have right now.
	 */
	err = check_raw_mode(fn);
	if (err) {
		verbose("kernel subsystem misconfigured func %d\n", func_id);
		return err;
	}

	/* check args */
	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
	if (err)
		return err;
	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
	if (err)
		return err;
	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
	if (err)
		return err;
	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
	if (err)
		return err;
	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
	if (err)
		return err;

	/* Mark slots with STACK_MISC in case of raw mode, stack offset
	 * is inferred from register state.
	 */
	for (i = 0; i < meta.access_size; i++) {
		err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1);
		if (err)
			return err;
	}

	/* reset caller saved regs */
	for (i = 0; i < CALLER_SAVED_REGS; i++) {
		reg = regs + caller_saved[i];
@@ -1013,18 +1068,18 @@ static int check_call(struct verifier_env *env, int func_id)
		 * can check 'value_size' boundary of memory access
		 * to map element returned from bpf_map_lookup_elem()
		 */
		if (map == NULL) {
		if (meta.map_ptr == NULL) {
			verbose("kernel subsystem misconfigured verifier\n");
			return -EINVAL;
		}
		regs[BPF_REG_0].map_ptr = map;
		regs[BPF_REG_0].map_ptr = meta.map_ptr;
	} else {
		verbose("unknown return type %d of func %d\n",
			fn->ret_type, func_id);
		return -EINVAL;
	}

	err = check_map_func_compatibility(map, func_id);
	err = check_map_func_compatibility(meta.map_ptr, func_id);
	if (err)
		return err;

+7 −3
Original line number Diff line number Diff line
@@ -62,17 +62,21 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
	void *dst = (void *) (long) r1;
	int size = (int) r2;
	int ret, size = (int) r2;
	void *unsafe_ptr = (void *) (long) r3;

	return probe_kernel_read(dst, unsafe_ptr, size);
	ret = probe_kernel_read(dst, unsafe_ptr, size);
	if (unlikely(ret < 0))
		memset(dst, 0, size);

	return ret;
}

static const struct bpf_func_proto bpf_probe_read_proto = {
	.func		= bpf_probe_read,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_STACK,
	.arg1_type	= ARG_PTR_TO_RAW_STACK,
	.arg2_type	= ARG_CONST_STACK_SIZE,
	.arg3_type	= ARG_ANYTHING,
};
+40 −17
Original line number Diff line number Diff line
@@ -1409,16 +1409,19 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
	unsigned int len = (unsigned int) r4;
	void *ptr;

	if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
		return -EFAULT;
	if (unlikely((u32) offset > 0xffff))
		goto err_clear;

	ptr = skb_header_pointer(skb, offset, len, to);
	if (unlikely(!ptr))
		return -EFAULT;
		goto err_clear;
	if (ptr != to)
		memcpy(to, ptr, len);

	return 0;
err_clear:
	memset(to, 0, len);
	return -EFAULT;
}

static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
@@ -1427,7 +1430,7 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_ANYTHING,
	.arg3_type	= ARG_PTR_TO_STACK,
	.arg3_type	= ARG_PTR_TO_RAW_STACK,
	.arg4_type	= ARG_CONST_STACK_SIZE,
};

@@ -1756,12 +1759,19 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
	struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
	u8 compat[sizeof(struct bpf_tunnel_key)];
	void *to_orig = to;
	int err;

	if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6))))
		return -EINVAL;
	if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags))
		return -EPROTO;
	if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
		err = -EINVAL;
		goto err_clear;
	}
	if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
		err = -EPROTO;
		goto err_clear;
	}
	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
		err = -EINVAL;
		switch (size) {
		case offsetof(struct bpf_tunnel_key, tunnel_label):
		case offsetof(struct bpf_tunnel_key, tunnel_ext):
@@ -1771,12 +1781,12 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
			 * a common path later on.
			 */
			if (ip_tunnel_info_af(info) != AF_INET)
				return -EINVAL;
				goto err_clear;
set_compat:
			to = (struct bpf_tunnel_key *)compat;
			break;
		default:
			return -EINVAL;
			goto err_clear;
		}
	}

@@ -1793,9 +1803,12 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
	}

	if (unlikely(size != sizeof(struct bpf_tunnel_key)))
		memcpy((void *)(long) r2, to, size);
		memcpy(to_orig, to, size);

	return 0;
err_clear:
	memset(to_orig, 0, size);
	return err;
}

static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
@@ -1803,7 +1816,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_PTR_TO_STACK,
	.arg2_type	= ARG_PTR_TO_RAW_STACK,
	.arg3_type	= ARG_CONST_STACK_SIZE,
	.arg4_type	= ARG_ANYTHING,
};
@@ -1813,16 +1826,26 @@ static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
	struct sk_buff *skb = (struct sk_buff *) (long) r1;
	u8 *to = (u8 *) (long) r2;
	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
	int err;

	if (unlikely(!info ||
		     !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
		return -ENOENT;
	if (unlikely(size < info->options_len))
		return -ENOMEM;
		     !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
		err = -ENOENT;
		goto err_clear;
	}
	if (unlikely(size < info->options_len)) {
		err = -ENOMEM;
		goto err_clear;
	}

	ip_tunnel_info_opts_get(to, info);
	if (size > info->options_len)
		memset(to + info->options_len, 0, size - info->options_len);

	return info->options_len;
err_clear:
	memset(to, 0, size);
	return err;
}

static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
@@ -1830,7 +1853,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_PTR_TO_STACK,
	.arg2_type	= ARG_PTR_TO_RAW_STACK,
	.arg3_type	= ARG_CONST_STACK_SIZE,
};

Loading