Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca6fb065 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

tcp: attach SYNACK messages to request sockets instead of listener



If a listen backlog is very big (to avoid syncookies), then
the listener sk->sk_wmem_alloc is the main source of false
sharing, as we need to touch it twice per SYNACK re-transmit
and TX completion.

(One SYN packet takes listener lock once, but up to 6 SYNACK
are generated)

By attaching the skb to the request socket, we remove this
source of contention.

Tested:

 listen(fd, 10485760); // single listener (no SO_REUSEPORT)
 16 RX/TX queue NIC
 Sustain a SYNFLOOD attack of ~320,000 SYN per second,
 Sending ~1,400,000 SYNACK per second.
 Perf profiles now show listener spinlock being next bottleneck.

    20.29%  [kernel]  [k] queued_spin_lock_slowpath
    10.06%  [kernel]  [k] __inet_lookup_established
     5.12%  [kernel]  [k] reqsk_timer_handler
     3.22%  [kernel]  [k] get_next_timer_interrupt
     3.00%  [kernel]  [k] tcp_make_synack
     2.77%  [kernel]  [k] ipt_do_table
     2.70%  [kernel]  [k] run_timer_softirq
     2.50%  [kernel]  [k] ip_finish_output
     2.04%  [kernel]  [k] cascade

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1b33bc3e
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -462,7 +462,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int tcp_connect(struct sock *sk);
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
				struct request_sock *req,
				struct tcp_fastopen_cookie *foc);
				struct tcp_fastopen_cookie *foc,
				bool attach_req);
int tcp_disconnect(struct sock *sk, int flags);

void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -1715,7 +1716,8 @@ struct tcp_request_sock_ops {
	__u32 (*init_seq)(const struct sk_buff *skb);
	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
			   struct flowi *fl, struct request_sock *req,
			   u16 queue_mapping, struct tcp_fastopen_cookie *foc);
			   u16 queue_mapping, struct tcp_fastopen_cookie *foc,
			   bool attach_req);
};

#ifdef CONFIG_SYN_COOKIES
+1 −1
Original line number Diff line number Diff line
@@ -628,7 +628,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
	 * are committed to memory and refcnt initialized.
	 */
	smp_wmb();
	atomic_set(&req->rsk_refcnt, 2);
	atomic_set(&req->rsk_refcnt, 2 + 1);
}

void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+2 −2
Original line number Diff line number Diff line
@@ -161,13 +161,13 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);

	/* Activate the retrans timer so that SYNACK can be retransmitted.
	 * The request socket is not added to the SYN table of the parent
	 * The request socket is not added to the ehash
	 * because it's been added to the accept queue directly.
	 */
	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);

	atomic_set(&req->rsk_refcnt, 1);
	atomic_set(&req->rsk_refcnt, 2);
	/* Add the child socket directly into the accept queue */
	inet_csk_reqsk_queue_add(sk, req, child);

+12 −11
Original line number Diff line number Diff line
@@ -6120,8 +6120,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
	struct request_sock *req;
	bool want_cookie = false;
	struct flowi fl;
	int err;


	/* TW buckets are converted to open requests without
	 * limitations, they conserve resources and peer is
@@ -6230,21 +6228,24 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
	tcp_rsk(req)->snt_isn = isn;
	tcp_rsk(req)->txhash = net_tx_rndhash();
	tcp_openreq_init_rwin(req, sk, dst);
	if (!want_cookie)
	if (!want_cookie) {
		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
	err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req,
				  skb_get_queue_mapping(skb), &foc);
		tcp_reqsk_record_syn(sk, req, skb);
	}
	if (fastopen_sk) {
		af_ops->send_synack(fastopen_sk, dst, &fl, req,
				    skb_get_queue_mapping(skb), &foc, false);
		sock_put(fastopen_sk);
	} else {
		if (err || want_cookie)
			goto drop_and_free;

		tcp_rsk(req)->tfo_listener = false;
		if (!want_cookie)
			inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
		af_ops->send_synack(sk, dst, &fl, req,
				    skb_get_queue_mapping(skb), &foc, !want_cookie);
		if (want_cookie)
			goto drop_and_free;
	}
	tcp_reqsk_record_syn(sk, req, skb);

	reqsk_put(req);
	return 0;

drop_and_release:
+3 −2
Original line number Diff line number Diff line
@@ -822,7 +822,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
			      struct flowi *fl,
			      struct request_sock *req,
			      u16 queue_mapping,
			      struct tcp_fastopen_cookie *foc)
			      struct tcp_fastopen_cookie *foc,
				  bool attach_req)
{
	const struct inet_request_sock *ireq = inet_rsk(req);
	struct flowi4 fl4;
@@ -833,7 +834,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
		return -1;

	skb = tcp_make_synack(sk, dst, req, foc);
	skb = tcp_make_synack(sk, dst, req, foc, attach_req);

	if (skb) {
		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Loading