Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f48a9205 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'fix-fullsock-access-after-bpf_sk_release'



Martin KaFai Lau says:

====================
This set addresses issue about accessing invalid
ptr returned from bpf_tcp_sock() and bpf_sk_fullsock()
after bpf_sk_release().

v4:
- Tried the one "id" approach.  It does not work well and the reason is in
  the Patch 1 commit message.
- Rename refcount_id to ref_obj_id.
- With ref_obj_id, resetting reg->id to 0 is fine in mark_ptr_or_null_reg()
  because ref_obj_id is passed to release_reference() instead of reg->id.
- Also reset reg->ref_obj_id in mark_ptr_or_null_reg() when is_null == true
- sk_to_full_sk() is removed from bpf_sk_fullsock() and bpf_tcp_sock().
- bpf_get_listener_sock() is added to do sk_to_full_sk() in Patch 2.
- If tp is from bpf_tcp_sock(sk) and sk is a refcounted ptr,
  bpf_sk_release(tp) is also allowed.

v3:
- reset reg->refcount_id for the is_null case in mark_ptr_or_null_reg()

v2:
- Remove refcount_id arg from release_reference() because
  id == refcount_id
- Add a WARN_ON_ONCE to mark_ptr_or_null_regs() to catch
  an internal verifier bug.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6bf21b54 7681e7b2
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -193,7 +193,6 @@ enum bpf_arg_type {

	ARG_PTR_TO_CTX,		/* pointer to context */
	ARG_ANYTHING,		/* any (initialized) argument is ok */
	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
};
+40 −0
Original line number Diff line number Diff line
@@ -66,6 +66,46 @@ struct bpf_reg_state {
	 * same reference to the socket, to determine proper reference freeing.
	 */
	u32 id;
	/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
	 * from a pointer-cast helper, bpf_sk_fullsock() and
	 * bpf_tcp_sock().
	 *
	 * Consider the following where "sk" is a reference counted
	 * pointer returned from "sk = bpf_sk_lookup_tcp();":
	 *
	 * 1: sk = bpf_sk_lookup_tcp();
	 * 2: if (!sk) { return 0; }
	 * 3: fullsock = bpf_sk_fullsock(sk);
	 * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
	 * 5: tp = bpf_tcp_sock(fullsock);
	 * 6: if (!tp) { bpf_sk_release(sk); return 0; }
	 * 7: bpf_sk_release(sk);
	 * 8: snd_cwnd = tp->snd_cwnd;  // verifier will complain
	 *
	 * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
	 * "tp" ptr should be invalidated also.  In order to do that,
	 * the reg holding "fullsock" and "sk" need to remember
	 * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
	 * such that the verifier can reset all regs which have
	 * ref_obj_id matching the sk_reg->id.
	 *
	 * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
	 * sk_reg->id will stay as NULL-marking purpose only.
	 * After NULL-marking is done, sk_reg->id can be reset to 0.
	 *
	 * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
	 * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
	 *
	 * After "tp = bpf_tcp_sock(fullsock);" at line 5,
	 * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
	 * which is the same as sk_reg->ref_obj_id.
	 *
	 * From the verifier perspective, if sk, fullsock and tp
	 * are not NULL, they are the same ptr with different
	 * reg->type.  In particular, bpf_sk_release(tp) is also
	 * allowed and has the same effect as bpf_sk_release(sk).
	 */
	u32 ref_obj_id;
	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
	 * the actual value.
	 * For pointer types, this represents the variable part of the offset
+10 −1
Original line number Diff line number Diff line
@@ -2366,6 +2366,14 @@ union bpf_attr {
 *             current value is ect (ECN capable). Works with IPv6 and IPv4.
 *     Return
 *             1 if set, 0 if not set.
 *
 * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
 *	Description
 *		Return a **struct bpf_sock** pointer in TCP_LISTEN state.
 *		bpf_sk_release() is unnecessary and not allowed.
 *	Return
 *		A **struct bpf_sock** pointer on success, or NULL in
 *		case of failure.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -2465,7 +2473,8 @@ union bpf_attr {
	FN(spin_unlock),		\
	FN(sk_fullsock),		\
	FN(tcp_sock),			\
	FN(skb_ecn_set_ce),
	FN(skb_ecn_set_ce),		\
	FN(get_listener_sock),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+74 −57
Original line number Diff line number Diff line
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
	int access_size;
	s64 msize_smax_value;
	u64 msize_umax_value;
	int ptr_id;
	int ref_obj_id;
	int func_id;
};

@@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
	       type == PTR_TO_TCP_SOCK_OR_NULL;
}

static bool type_is_refcounted(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET;
}

static bool type_is_refcounted_or_null(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
}

static bool reg_is_refcounted(const struct bpf_reg_state *reg)
{
	return type_is_refcounted(reg->type);
}

static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
	return reg->type == PTR_TO_MAP_VALUE &&
		map_value_has_spin_lock(reg->map_ptr);
}

static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{
	return type_is_refcounted_or_null(reg->type);
}

static bool arg_type_is_refcounted(enum bpf_arg_type type)
{
	return type == ARG_PTR_TO_SOCKET;
	return type == ARG_PTR_TO_SOCK_COMMON;
}

/* Determine whether the function releases some resources allocated by another
@@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
		func_id == BPF_FUNC_sk_lookup_udp;
}

static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
	return func_id == BPF_FUNC_tcp_sock ||
		func_id == BPF_FUNC_sk_fullsock;
}

/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
	[NOT_INIT]		= "?",
@@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
			if (t == PTR_TO_STACK)
				verbose(env, ",call_%d", func(env, reg)->callsite);
		} else {
			verbose(env, "(id=%d", reg->id);
			verbose(env, "(id=%d ref_obj_id=%d", reg->id,
				reg->ref_obj_id);
			if (t != SCALAR_VALUE)
				verbose(env, ",off=%d", reg->off);
			if (type_is_pkt_pointer(t))
@@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
		if (!type_is_sk_pointer(type))
			goto err_type;
	} else if (arg_type == ARG_PTR_TO_SOCKET) {
		expected_type = PTR_TO_SOCKET;
		if (type != expected_type)
			goto err_type;
		if (meta->ptr_id || !reg->id) {
			verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
				meta->ptr_id, reg->id);
		if (reg->ref_obj_id) {
			if (meta->ref_obj_id) {
				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
					regno, reg->ref_obj_id,
					meta->ref_obj_id);
				return -EFAULT;
			}
		meta->ptr_id = reg->id;
			meta->ref_obj_id = reg->ref_obj_id;
		}
	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
		if (meta->func_id == BPF_FUNC_spin_lock) {
			if (process_spin_lock(env, regno, true))
@@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
	return true;
}

static bool check_refcount_ok(const struct bpf_func_proto *fn)
static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
{
	int count = 0;

	if (arg_type_is_refcounted(fn->arg1_type))
	if (arg_type_may_be_refcounted(fn->arg1_type))
		count++;
	if (arg_type_is_refcounted(fn->arg2_type))
	if (arg_type_may_be_refcounted(fn->arg2_type))
		count++;
	if (arg_type_is_refcounted(fn->arg3_type))
	if (arg_type_may_be_refcounted(fn->arg3_type))
		count++;
	if (arg_type_is_refcounted(fn->arg4_type))
	if (arg_type_may_be_refcounted(fn->arg4_type))
		count++;
	if (arg_type_is_refcounted(fn->arg5_type))
	if (arg_type_may_be_refcounted(fn->arg5_type))
		count++;

	/* A reference acquiring function cannot acquire
	 * another refcounted ptr.
	 */
	if (is_acquire_function(func_id) && count)
		return false;

	/* We only support one arg being unreferenced at the moment,
	 * which is sufficient for the helper functions we have right now.
	 */
	return count <= 1;
}

static int check_func_proto(const struct bpf_func_proto *fn)
static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
	return check_raw_mode_ok(fn) &&
	       check_arg_pair_ok(fn) &&
	       check_refcount_ok(fn) ? 0 : -EINVAL;
	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}

/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
}

static void release_reg_references(struct bpf_verifier_env *env,
				   struct bpf_func_state *state, int id)
				   struct bpf_func_state *state,
				   int ref_obj_id)
{
	struct bpf_reg_state *regs = state->regs, *reg;
	int i;

	for (i = 0; i < MAX_BPF_REG; i++)
		if (regs[i].id == id)
		if (regs[i].ref_obj_id == ref_obj_id)
			mark_reg_unknown(env, regs, i);

	bpf_for_each_spilled_reg(i, state, reg) {
		if (!reg)
			continue;
		if (reg_is_refcounted(reg) && reg->id == id)
		if (reg->ref_obj_id == ref_obj_id)
			__mark_reg_unknown(reg);
	}
}
@@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
 * resources. Identify all copies of the same pointer and clear the reference.
 */
static int release_reference(struct bpf_verifier_env *env,
			     struct bpf_call_arg_meta *meta)
			     int ref_obj_id)
{
	struct bpf_verifier_state *vstate = env->cur_state;
	int err;
	int i;

	err = release_reference_state(cur_func(env), ref_obj_id);
	if (err)
		return err;

	for (i = 0; i <= vstate->curframe; i++)
		release_reg_references(env, vstate->frame[i], meta->ptr_id);
		release_reg_references(env, vstate->frame[i], ref_obj_id);

	return release_reference_state(cur_func(env), meta->ptr_id);
	return 0;
}

static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
	memset(&meta, 0, sizeof(meta));
	meta.pkt_access = fn->pkt_access;

	err = check_func_proto(fn);
	err = check_func_proto(fn, func_id);
	if (err) {
		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
			func_id_name(func_id), func_id);
@@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
			return err;
		}
	} else if (is_release_function(func_id)) {
		err = release_reference(env, &meta);
		err = release_reference(env, meta.ref_obj_id);
		if (err) {
			verbose(env, "func %s#%d reference has not been acquired before\n",
				func_id_name(func_id), func_id);
@@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn

			if (id < 0)
				return id;
			/* For release_reference() */
			/* For mark_ptr_or_null_reg() */
			regs[BPF_REG_0].id = id;
			/* For release_reference() */
			regs[BPF_REG_0].ref_obj_id = id;
		} else {
			/* For mark_ptr_or_null_reg() */
			regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
		return -EINVAL;
	}

	if (is_ptr_cast_function(func_id))
		/* For release_reference() */
		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;

	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);

	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
			reg->type = PTR_TO_TCP_SOCK;
		}
		if (is_null || !(reg_is_refcounted(reg) ||
				 reg_may_point_to_spin_lock(reg))) {
			/* We don't need id from this point onwards anymore,
			 * thus we should better reset it, so that state
			 * pruning has chances to take effect.
		if (is_null) {
			/* We don't need id and ref_obj_id from this point
			 * onwards anymore, thus we should better reset it,
			 * so that state pruning has chances to take effect.
			 */
			reg->id = 0;
			reg->ref_obj_id = 0;
		} else if (!reg_may_point_to_spin_lock(reg)) {
			/* For not-NULL ptr, reg->ref_obj_id will be reset
			 * in release_reg_references().
			 *
			 * reg->id is still used by spin_lock ptr. Other
			 * than spin_lock ptr type, reg->id can be reset.
			 */
			reg->id = 0;
		}
@@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
{
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
	struct bpf_reg_state *reg, *regs = state->regs;
	u32 ref_obj_id = regs[regno].ref_obj_id;
	u32 id = regs[regno].id;
	int i, j;

	if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
		release_reference_state(state, id);
	if (ref_obj_id && ref_obj_id == id && is_null)
		/* regs[regno] is in the " == NULL" branch.
		 * No one could have freed the reference state before
		 * doing the NULL check.
		 */
		WARN_ON_ONCE(release_reference_state(state, id));

	for (i = 0; i < MAX_BPF_REG; i++)
		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
+22 −5
Original line number Diff line number Diff line
@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {

BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{
	sk = sk_to_full_sk(sk);

	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
}

@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
	.func		= bpf_sk_release,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_SOCKET,
	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
};

BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,

BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
	sk = sk_to_full_sk(sk);

	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
		return (unsigned long)sk;

@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
};

BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
{
	sk = sk_to_full_sk(sk);

	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
		return (unsigned long)sk;

	return (unsigned long)NULL;
}

static const struct bpf_func_proto bpf_get_listener_sock_proto = {
	.func		= bpf_get_listener_sock,
	.gpl_only	= false,
	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
};

BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
	unsigned int iphdr_len;
@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_INET
	case BPF_FUNC_tcp_sock:
		return &bpf_tcp_sock_proto;
	case BPF_FUNC_get_listener_sock:
		return &bpf_get_listener_sock_proto;
	case BPF_FUNC_skb_ecn_set_ce:
		return &bpf_skb_ecn_set_ce_proto;
#endif
@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_sk_release_proto;
	case BPF_FUNC_tcp_sock:
		return &bpf_tcp_sock_proto;
	case BPF_FUNC_get_listener_sock:
		return &bpf_get_listener_sock_proto;
#endif
	default:
		return bpf_base_func_proto(func_id);
Loading