Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c377411f authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net: sk_add_backlog() take rmem_alloc into account



Current socket backlog limit is not enough to really stop DDOS attacks,
because user thread spend many time to process a full backlog each
round, and user might crazy spin on socket lock.

We should add backlog size and receive_queue size (aka rmem_alloc) to
pace writers, and let user run without being slow down too much.

Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in
stress situations.

Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp
receiver can now process ~200.000 pps (instead of ~100 pps before the
patch) on a 8 core machine.

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6e7676c1
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -256,7 +256,6 @@ struct sock {
		struct sk_buff *head;
		struct sk_buff *tail;
		int len;
		int limit;
	} sk_backlog;
	wait_queue_head_t	*sk_sleep;
	struct dst_entry	*sk_dst_cache;
@@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
	skb->next = NULL;
}

/*
 * Take into account size of receive queue and backlog queue
 */
static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
{
	unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);

	return qsize + skb->truesize > sk->sk_rcvbuf;
}

/* The per-socket spinlock must be held here. */
static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{
	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
	if (sk_rcvqueues_full(sk, skb))
		return -ENOBUFS;

	__sk_add_backlog(sk, skb);
+4 −1
Original line number Diff line number Diff line
@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)

	skb->dev = NULL;

	if (sk_rcvqueues_full(sk, skb)) {
		atomic_inc(&sk->sk_drops);
		goto discard_and_relse;
	}
	if (nested)
		bh_lock_sock_nested(sk);
	else
@@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
	sk->sk_allocation	=	GFP_KERNEL;
	sk->sk_rcvbuf		=	sysctl_rmem_default;
	sk->sk_sndbuf		=	sysctl_wmem_default;
	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
	sk->sk_state		=	TCP_CLOSE;
	sk_set_socket(sk, sock);

+4 −0
Original line number Diff line number Diff line
@@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
			goto drop;
	}


	if (sk_rcvqueues_full(sk, skb))
		goto drop;

	rc = 0;

	bh_lock_sock(sk);
+8 −0
Original line number Diff line number Diff line
@@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count,

		sk = stack[i];
		if (skb1) {
			if (sk_rcvqueues_full(sk, skb)) {
				kfree_skb(skb1);
				goto drop;
			}
			bh_lock_sock(sk);
			if (!sock_owned_by_user(sk))
				udpv6_queue_rcv_skb(sk, skb1);
@@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,

	/* deliver */

	if (sk_rcvqueues_full(sk, skb)) {
		sock_put(sk);
		goto discard;
	}
	bh_lock_sock(sk);
	if (!sock_owned_by_user(sk))
		udpv6_queue_rcv_skb(sk, skb);
+0 −3
Original line number Diff line number Diff line
@@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
	SCTP_DBG_OBJCNT_INC(sock);
	percpu_counter_inc(&sctp_sockets_allocated);

	/* Set socket backlog limit. */
	sk->sk_backlog.limit = sysctl_sctp_rmem[1];

	local_bh_disable();
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
	local_bh_enable();