Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fad0c40f authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-sockmap-estab-fixes'



John Fastabend says:

====================
Eric noted that using the close callback is not sufficient
to catch all transitions from ESTABLISHED state to a LISTEN
state. So this series does two things. First, only allow
adding socks in ESTABLISH state and second use unhash callback
to catch tcp_disconnect() transitions.

v2: added check for ESTABLISH state in hash update sockmap as well
v3: Do not release lock from unhash in error path, no lock was
    used in the first place. And drop not so useful code comments
v4: convert,
	if (unhash()) return unhash(); return
     to if (unhash()) unhash(); return;

Thanks for reviewing Yonghong I carried your ACKs forward.
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 080220b6 50280278
Loading
Loading
Loading
Loading
+71 −20
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ struct smap_psock {
	struct work_struct gc_work;

	struct proto *sk_proto;
	void (*save_unhash)(struct sock *sk);
	void (*save_close)(struct sock *sk, long timeout);
	void (*save_data_ready)(struct sock *sk);
	void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
			    int offset, size_t size, int flags);
static void bpf_tcp_unhash(struct sock *sk);
static void bpf_tcp_close(struct sock *sk, long timeout);

static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
			 struct proto *base)
{
	prot[SOCKMAP_BASE]			= *base;
	prot[SOCKMAP_BASE].unhash		= bpf_tcp_unhash;
	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
		return -EBUSY;
	}

	psock->save_unhash = sk->sk_prot->unhash;
	psock->save_close = sk->sk_prot->close;
	psock->sk_proto = sk->sk_prot;

@@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
	return e;
}

static void bpf_tcp_close(struct sock *sk, long timeout)
static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
{
	void (*close_fun)(struct sock *sk, long timeout);
	struct smap_psock_map_entry *e;
	struct sk_msg_buff *md, *mtmp;
	struct smap_psock *psock;
	struct sock *osk;

	lock_sock(sk);
	rcu_read_lock();
	psock = smap_psock_sk(sk);
	if (unlikely(!psock)) {
		rcu_read_unlock();
		release_sock(sk);
		return sk->sk_prot->close(sk, timeout);
	}

	/* The psock may be destroyed anytime after exiting the RCU critial
	 * section so by the time we use close_fun the psock may no longer
	 * be valid. However, bpf_tcp_close is called with the sock lock
	 * held so the close hook and sk are still valid.
	 */
	close_fun = psock->save_close;

	if (psock->cork) {
		free_start_sg(psock->sock, psock->cork, true);
		kfree(psock->cork);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
		kfree(e);
		e = psock_map_pop(sk, psock);
	}
}

static void bpf_tcp_unhash(struct sock *sk)
{
	void (*unhash_fun)(struct sock *sk);
	struct smap_psock *psock;

	rcu_read_lock();
	psock = smap_psock_sk(sk);
	if (unlikely(!psock)) {
		rcu_read_unlock();
		if (sk->sk_prot->unhash)
			sk->sk_prot->unhash(sk);
		return;
	}
	unhash_fun = psock->save_unhash;
	bpf_tcp_remove(sk, psock);
	rcu_read_unlock();
	unhash_fun(sk);
}

static void bpf_tcp_close(struct sock *sk, long timeout)
{
	void (*close_fun)(struct sock *sk, long timeout);
	struct smap_psock *psock;

	lock_sock(sk);
	rcu_read_lock();
	psock = smap_psock_sk(sk);
	if (unlikely(!psock)) {
		rcu_read_unlock();
		release_sock(sk);
		return sk->sk_prot->close(sk, timeout);
	}
	close_fun = psock->save_close;
	bpf_tcp_remove(sk, psock);
	rcu_read_unlock();
	release_sock(sk);
	close_fun(sk, timeout);
@@ -2097,8 +2119,12 @@ static int sock_map_update_elem(struct bpf_map *map,
		return -EINVAL;
	}

	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
	 * state.
	 */
	if (skops.sk->sk_type != SOCK_STREAM ||
	    skops.sk->sk_protocol != IPPROTO_TCP) {
	    skops.sk->sk_protocol != IPPROTO_TCP ||
	    skops.sk->sk_state != TCP_ESTABLISHED) {
		fput(socket->file);
		return -EOPNOTSUPP;
	}
@@ -2453,6 +2479,16 @@ static int sock_hash_update_elem(struct bpf_map *map,
		return -EINVAL;
	}

	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
	 * state.
	 */
	if (skops.sk->sk_type != SOCK_STREAM ||
	    skops.sk->sk_protocol != IPPROTO_TCP ||
	    skops.sk->sk_state != TCP_ESTABLISHED) {
		fput(socket->file);
		return -EOPNOTSUPP;
	}

	lock_sock(skops.sk);
	preempt_disable();
	rcu_read_lock();
@@ -2543,10 +2579,22 @@ const struct bpf_map_ops sock_hash_ops = {
	.map_check_btf = map_check_no_btf,
};

static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
{
	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
}
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
	   struct bpf_map *, map, void *, key, u64, flags)
{
	WARN_ON_ONCE(!rcu_read_lock_held());

	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
	 * state. This checks that the sock ops triggering the update is
	 * one indicating we are (or will be soon) in an ESTABLISHED state.
	 */
	if (!bpf_is_valid_sock_op(bpf_sock))
		return -EOPNOTSUPP;
	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
}

@@ -2565,6 +2613,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
	   struct bpf_map *, map, void *, key, u64, flags)
{
	WARN_ON_ONCE(!rcu_read_lock_held());

	if (!bpf_is_valid_sock_op(bpf_sock))
		return -EOPNOTSUPP;
	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
}

+7 −3
Original line number Diff line number Diff line
@@ -580,7 +580,11 @@ static void test_sockmap(int tasks, void *data)
	/* Test update without programs */
	for (i = 0; i < 6; i++) {
		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
		if (err) {
		if (i < 2 && !err) {
			printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
			       i, sfd[i]);
			goto out_sockmap;
		} else if (i >= 2 && err) {
			printf("Failed noprog update sockmap '%i:%i'\n",
			       i, sfd[i]);
			goto out_sockmap;
@@ -741,7 +745,7 @@ static void test_sockmap(int tasks, void *data)
	}

	/* Test map update elem afterwards fd lives in fd and map_fd */
	for (i = 0; i < 6; i++) {
	for (i = 2; i < 6; i++) {
		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
		if (err) {
			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
@@ -845,7 +849,7 @@ static void test_sockmap(int tasks, void *data)
	}

	/* Delete the elems without programs */
	for (i = 0; i < 6; i++) {
	for (i = 2; i < 6; i++) {
		err = bpf_map_delete_elem(fd, &i);
		if (err) {
			printf("Failed delete sockmap %i '%i:%i'\n",