Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 27204aaa authored by Wei Wang's avatar Wei Wang Committed by David S. Miller
Browse files

tcp: uniform the set up of sockets after successful connection



Currently in the TCP code, the initialization sequence for cached
metrics, congestion control, BPF, etc, after successful connection
is very inconsistent. This introduces inconsistent bevhavior and is
prone to bugs. The current call sequence is as follows:

(1) for active case (tcp_finish_connect() case):
        tcp_mtup_init(sk);
        icsk->icsk_af_ops->rebuild_header(sk);
        tcp_init_metrics(sk);
        tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
        tcp_init_congestion_control(sk);
        tcp_init_buffer_space(sk);

(2) for passive case (tcp_rcv_state_process() TCP_SYN_RECV case):
        icsk->icsk_af_ops->rebuild_header(sk);
        tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
        tcp_init_congestion_control(sk);
        tcp_mtup_init(sk);
        tcp_init_buffer_space(sk);
        tcp_init_metrics(sk);

(3) for TFO passive case (tcp_fastopen_create_child()):
        inet_csk(child)->icsk_af_ops->rebuild_header(child);
        tcp_init_congestion_control(child);
        tcp_mtup_init(child);
        tcp_init_metrics(child);
        tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
        tcp_init_buffer_space(child);

This commit uniforms the above functions to have the following sequence:
        tcp_mtup_init(sk);
        icsk->icsk_af_ops->rebuild_header(sk);
        tcp_init_metrics(sk);
        tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE/PASSIVE_ESTABLISHED_CB);
        tcp_init_congestion_control(sk);
        tcp_init_buffer_space(sk);
This sequence is the same as the (1) active case. We pick this sequence
because this order correctly allows BPF to override the settings
including congestion control module and initial cwnd, etc from
the route, and then allows the CC module to see those settings.

Suggested-by: default avatarNeal Cardwell <ncardwell@google.com>
Tested-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Acked-by: default avatarYuchung Cheng <ycheng@google.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5820299a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -416,6 +416,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_disable_fack(struct tcp_sock *tp);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
void tcp_init_transfer(struct sock *sk, int bpf_op);
unsigned int tcp_poll(struct file *file, struct socket *sock,
		      struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
+12 −0
Original line number Diff line number Diff line
@@ -456,6 +456,18 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);

void tcp_init_transfer(struct sock *sk, int bpf_op)
{
	struct inet_connection_sock *icsk = inet_csk(sk);

	tcp_mtup_init(sk);
	icsk->icsk_af_ops->rebuild_header(sk);
	tcp_init_metrics(sk);
	tcp_call_bpf(sk, bpf_op);
	tcp_init_congestion_control(sk);
	tcp_init_buffer_space(sk);
}

static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
{
	if (tsflags && skb) {
+1 −6
Original line number Diff line number Diff line
@@ -236,12 +236,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
	refcount_set(&req->rsk_refcnt, 2);

	/* Now finish processing the fastopen child socket. */
	inet_csk(child)->icsk_af_ops->rebuild_header(child);
	tcp_init_congestion_control(child);
	tcp_mtup_init(child);
	tcp_init_metrics(child);
	tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
	tcp_init_buffer_space(child);
	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);

	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;

+3 −18
Original line number Diff line number Diff line
@@ -5513,20 +5513,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
		security_inet_conn_established(sk, skb);
	}

	/* Make sure socket is routed, for correct metrics.  */
	icsk->icsk_af_ops->rebuild_header(sk);

	tcp_init_metrics(sk);
	tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
	tcp_init_congestion_control(sk);
	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);

	/* Prevent spurious tcp_cwnd_restart() on first data
	 * packet.
	 */
	tp->lsndtime = tcp_jiffies32;

	tcp_init_buffer_space(sk);

	if (sock_flag(sk, SOCK_KEEPOPEN))
		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));

@@ -5693,7 +5686,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
		if (tcp_is_sack(tp) && sysctl_tcp_fack)
			tcp_enable_fack(tp);

		tcp_mtup_init(sk);
		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
		tcp_initialize_rcv_mss(sk);

@@ -5920,14 +5912,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
			inet_csk(sk)->icsk_retransmits = 0;
			reqsk_fastopen_remove(sk, req, false);
		} else {
			/* Make sure socket is routed, for correct metrics. */
			icsk->icsk_af_ops->rebuild_header(sk);
			tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
			tcp_init_congestion_control(sk);

			tcp_mtup_init(sk);
			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
			tp->copied_seq = tp->rcv_nxt;
			tcp_init_buffer_space(sk);
		}
		smp_mb();
		tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5957,8 +5943,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
			 * are sent out.
			 */
			tcp_rearm_rto(sk);
		} else
			tcp_init_metrics(sk);
		}

		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
			tcp_update_pacing_rate(sk);