Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 90a9631c authored by John Fastabend's avatar John Fastabend Committed by David S. Miller
Browse files

bpf: sockmap update/simplify memory accounting scheme



Instead of tracking wmem_queued and sk_mem_charge by incrementing
in the verdict SK_REDIRECT paths and decrementing in the tx work
path use skb_set_owner_w and sock_writeable helpers. This solves
a few issues with the current code. First, in SK_REDIRECT inc on
sk_wmem_queued and sk_mem_charge were being done without the peers
sock lock being held. Under stress this can result in accounting
errors when tx work and/or multiple verdict decisions are working
on the peer psock.

Additionally, this cleans up the code because we can rely on the
default destructor to decrement memory accounting on kfree_skb. Also
this will trigger sk_write_space when space becomes available on
kfree_skb() which wasn't happening before and prevent __sk_free
from being called until all in-flight packets are completed.

Fixes: 174a79ff ("bpf: sockmap with sk redirect support")
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 250b0f78
Loading
Loading
Loading
Loading
+7 −11
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)

static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
{
	struct sock *sock;
	struct sock *sk;
	int rc;

	/* Because we use per cpu values to feed input from sock redirect
@@ -123,16 +123,16 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
	rc = smap_verdict_func(psock, skb);
	switch (rc) {
	case SK_REDIRECT:
		sock = do_sk_redirect_map();
		sk = do_sk_redirect_map();
		preempt_enable();
		if (likely(sock)) {
			struct smap_psock *peer = smap_psock_sk(sock);
		if (likely(sk)) {
			struct smap_psock *peer = smap_psock_sk(sk);

			if (likely(peer &&
				   test_bit(SMAP_TX_RUNNING, &peer->state) &&
				   sk_stream_memory_free(peer->sock))) {
				peer->sock->sk_wmem_queued += skb->truesize;
				sk_mem_charge(peer->sock, skb->truesize);
				   !sock_flag(sk, SOCK_DEAD) &&
				   sock_writeable(sk))) {
				skb_set_owner_w(skb, sk);
				skb_queue_tail(&peer->rxqueue, skb);
				schedule_work(&peer->tx_work);
				break;
@@ -282,16 +282,12 @@ static void smap_tx_work(struct work_struct *w)
				/* Hard errors break pipe and stop xmit */
				smap_report_sk_error(psock, n ? -n : EPIPE);
				clear_bit(SMAP_TX_RUNNING, &psock->state);
				sk_mem_uncharge(psock->sock, skb->truesize);
				psock->sock->sk_wmem_queued -= skb->truesize;
				kfree_skb(skb);
				goto out;
			}
			rem -= n;
			off += n;
		} while (rem);
		sk_mem_uncharge(psock->sock, skb->truesize);
		psock->sock->sk_wmem_queued -= skb->truesize;
		kfree_skb(skb);
	}
out: