Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 655a51e5 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock



This patch adds a helper function BPF_FUNC_tcp_sock and it
is currently available for cg_skb and sched_(cls|act):

struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk);

int cg_skb_foo(struct __sk_buff *skb) {
	struct bpf_tcp_sock *tp;
	struct bpf_sock *sk;
	__u32 snd_cwnd;

	sk = skb->sk;
	if (!sk)
		return 1;

	tp = bpf_tcp_sock(sk);
	if (!tp)
		return 1;

	snd_cwnd = tp->snd_cwnd;
	/* ... */

	return 1;
}

A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide
read-only access.  bpf_tcp_sock has all the existing tcp_sock's fields
that has already been exposed by the bpf_sock_ops.
i.e. no new tcp_sock's fields are exposed in bpf.h.

This helper returns a pointer to the tcp_sock.  If it is not a tcp_sock
or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it
returns NULL.  Hence, the caller needs to check for NULL before
accessing it.

The current use case is to expose members from tcp_sock
to allow a cg_skb_bpf_prog to provide per cgroup traffic
policing/shaping.

Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 9b1f3d6e
Loading
Loading
Loading
Loading
+30 −0
Original line number Diff line number Diff line
@@ -204,6 +204,7 @@ enum bpf_return_type {
	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
};

/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -259,6 +260,8 @@ enum bpf_reg_type {
	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
};

/* The information passed from prog-specific *_is_valid_access
@@ -956,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
}
#endif

#ifdef CONFIG_INET
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
				  struct bpf_insn_access_aux *info);

u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
				    const struct bpf_insn *si,
				    struct bpf_insn *insn_buf,
				    struct bpf_prog *prog,
				    u32 *target_size);
#else
static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
						enum bpf_access_type type,
						struct bpf_insn_access_aux *info)
{
	return false;
}

static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
						  const struct bpf_insn *si,
						  struct bpf_insn *insn_buf,
						  struct bpf_prog *prog,
						  u32 *target_size)
{
	return 0;
}
#endif /* CONFIG_INET */

#endif /* _LINUX_BPF_H */
+50 −1
Original line number Diff line number Diff line
@@ -2337,6 +2337,15 @@ union bpf_attr {
 *	Return
 *		A **struct bpf_sock** pointer on success, or NULL in
 *		case of failure.
 *
 * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
 *	Description
 *		This helper gets a **struct bpf_tcp_sock** pointer from a
 *		**struct bpf_sock** pointer.
 *
 *	Return
 *		A **struct bpf_tcp_sock** pointer on success, or NULL in
 *		case of failure.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -2434,7 +2443,8 @@ union bpf_attr {
	FN(rc_pointer_rel),		\
	FN(spin_lock),			\
	FN(spin_unlock),		\
	FN(sk_fullsock),
	FN(sk_fullsock),		\
	FN(tcp_sock),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -2616,6 +2626,45 @@ struct bpf_sock {
	__u32 state;
};

struct bpf_tcp_sock {
	__u32 snd_cwnd;		/* Sending congestion window		*/
	__u32 srtt_us;		/* smoothed round trip time << 3 in usecs */
	__u32 rtt_min;
	__u32 snd_ssthresh;	/* Slow start size threshold		*/
	__u32 rcv_nxt;		/* What we want to receive next		*/
	__u32 snd_nxt;		/* Next sequence we send		*/
	__u32 snd_una;		/* First byte we want an ack for	*/
	__u32 mss_cache;	/* Cached effective mss, not including SACKS */
	__u32 ecn_flags;	/* ECN status bits.			*/
	__u32 rate_delivered;	/* saved rate sample: packets delivered */
	__u32 rate_interval_us;	/* saved rate sample: time elapsed */
	__u32 packets_out;	/* Packets which are "in flight"	*/
	__u32 retrans_out;	/* Retransmitted packets out		*/
	__u32 total_retrans;	/* Total retransmits for entire connection */
	__u32 segs_in;		/* RFC4898 tcpEStatsPerfSegsIn
				 * total number of segments in.
				 */
	__u32 data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
				 * total number of data segments in.
				 */
	__u32 segs_out;		/* RFC4898 tcpEStatsPerfSegsOut
				 * The total number of segments sent.
				 */
	__u32 data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
				 * total number of data segments sent.
				 */
	__u32 lost_out;		/* Lost packets			*/
	__u32 sacked_out;	/* SACK'd packets			*/
	__u64 bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
				 * sum(delta(rcv_nxt)), or how many bytes
				 * were acked.
				 */
	__u64 bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
				 * sum(delta(snd_una)), or how many bytes
				 * were acked.
				 */
};

struct bpf_sock_tuple {
	union {
		struct {
+29 −2
Original line number Diff line number Diff line
@@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
static bool type_is_sk_pointer(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET ||
		type == PTR_TO_SOCK_COMMON;
		type == PTR_TO_SOCK_COMMON ||
		type == PTR_TO_TCP_SOCK;
}

static bool reg_type_may_be_null(enum bpf_reg_type type)
{
	return type == PTR_TO_MAP_VALUE_OR_NULL ||
	       type == PTR_TO_SOCKET_OR_NULL ||
	       type == PTR_TO_SOCK_COMMON_OR_NULL;
	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
	       type == PTR_TO_TCP_SOCK_OR_NULL;
}

static bool type_is_refcounted(enum bpf_reg_type type)
@@ -407,6 +409,8 @@ static const char * const reg_type_str[] = {
	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
	[PTR_TO_SOCK_COMMON]	= "sock_common",
	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
	[PTR_TO_TCP_SOCK]	= "tcp_sock",
	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
};

static char slot_type_char[] = {
@@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
	case PTR_TO_SOCKET_OR_NULL:
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
		return true;
	default:
		return false;
@@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
	case PTR_TO_SOCKET:
		valid = bpf_sock_is_valid_access(off, size, t, &info);
		break;
	case PTR_TO_TCP_SOCK:
		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
		break;
	default:
		valid = false;
	}
@@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
	case PTR_TO_SOCK_COMMON:
		pointer_desc = "sock_common ";
		break;
	case PTR_TO_TCP_SOCK:
		pointer_desc = "tcp_sock ";
		break;
	default:
		break;
	}
@@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
			/* For mark_ptr_or_null_reg() */
			regs[BPF_REG_0].id = ++env->id_gen;
		}
	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
		mark_reg_known_zero(env, regs, BPF_REG_0);
		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
		regs[BPF_REG_0].id = ++env->id_gen;
	} else {
		verbose(env, "unknown return type %d of func %s#%d\n",
			fn->ret_type, func_id_name(func_id), func_id);
@@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
	case PTR_TO_SOCKET_OR_NULL:
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
			dst, reg_type_str[ptr_reg->type]);
		return -EACCES;
@@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
			reg->type = PTR_TO_SOCKET;
		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
			reg->type = PTR_TO_SOCK_COMMON;
		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
			reg->type = PTR_TO_TCP_SOCK;
		}
		if (is_null || !(reg_is_refcounted(reg) ||
				 reg_may_point_to_spin_lock(reg))) {
@@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
	case PTR_TO_SOCKET_OR_NULL:
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
		/* Only valid matches are exact, which memcmp() above
		 * would have accepted
		 */
@@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
	case PTR_TO_SOCKET_OR_NULL:
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
		return false;
	default:
		return true;
@@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
		case PTR_TO_SOCK_COMMON:
			convert_ctx_access = bpf_sock_convert_ctx_access;
			break;
		case PTR_TO_TCP_SOCK:
			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
			break;
		default:
			continue;
		}
+79 −0
Original line number Diff line number Diff line
@@ -5315,6 +5315,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
	.arg5_type	= ARG_ANYTHING,
};

bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
				  struct bpf_insn_access_aux *info)
{
	if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
		return false;

	if (off % size != 0)
		return false;

	switch (off) {
	case offsetof(struct bpf_tcp_sock, bytes_received):
	case offsetof(struct bpf_tcp_sock, bytes_acked):
		return size == sizeof(__u64);
	default:
		return size == sizeof(__u32);
	}
}

u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
				    const struct bpf_insn *si,
				    struct bpf_insn *insn_buf,
				    struct bpf_prog *prog, u32 *target_size)
{
	struct bpf_insn *insn = insn_buf;

#define BPF_TCP_SOCK_GET_COMMON(FIELD)					\
	do {								\
		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) >	\
			     FIELD_SIZEOF(struct bpf_tcp_sock, FIELD));	\
		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
				      si->dst_reg, si->src_reg,		\
				      offsetof(struct tcp_sock, FIELD)); \
	} while (0)

	CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
				       BPF_TCP_SOCK_GET_COMMON);

	if (insn > insn_buf)
		return insn - insn_buf;

	switch (si->off) {
	case offsetof(struct bpf_tcp_sock, rtt_min):
		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
			     sizeof(struct minmax));
		BUILD_BUG_ON(sizeof(struct minmax) <
			     sizeof(struct minmax_sample));

		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
				      offsetof(struct tcp_sock, rtt_min) +
				      offsetof(struct minmax_sample, v));
		break;
	}

	return insn - insn_buf;
}

BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
	sk = sk_to_full_sk(sk);

	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
		return (unsigned long)sk;

	return (unsigned long)NULL;
}

static const struct bpf_func_proto bpf_tcp_sock_proto = {
	.func		= bpf_tcp_sock,
	.gpl_only	= false,
	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL,
	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
};

#endif /* CONFIG_INET */

bool bpf_helper_changes_pkt_data(void *func)
@@ -5470,6 +5543,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_get_local_storage_proto;
	case BPF_FUNC_sk_fullsock:
		return &bpf_sk_fullsock_proto;
#ifdef CONFIG_INET
	case BPF_FUNC_tcp_sock:
		return &bpf_tcp_sock_proto;
#endif
	default:
		return sk_filter_func_proto(func_id, prog);
	}
@@ -5560,6 +5637,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_sk_lookup_udp_proto;
	case BPF_FUNC_sk_release:
		return &bpf_sk_release_proto;
	case BPF_FUNC_tcp_sock:
		return &bpf_tcp_sock_proto;
#endif
	default:
		return bpf_base_func_proto(func_id);