tcp: TCP Fast Open Server - main code path (168a8f58) · Commits · e / devices / android_kernel_teracube_2e

net/ipv4/tcp_input.c

+58 −13

Original line number	Diff line number	Diff line
		@@ -3127,6 +3127,12 @@ void tcp_rearm_rto(struct sock *sk)
		{
		struct tcp_sock *tp = tcp_sk(sk);

		/* If the retrans timer is currently being used by Fast Open
		* for SYN-ACK retrans purpose, stay put.
		*/
		if (tp->fastopen_rsk)
		return;

		if (!tp->packets_out) {
		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
		} else {
		@@ -5895,7 +5901,9 @@ static int tcp_rcv_synsent_state_process(struct sock sk, struct sk_buff skb,
		tcp_send_synack(sk);
		#if 0
		/* Note, we could accept data and URG from this segment.
		* There are no obstacles to make this.
		* There are no obstacles to make this (except that we must
		* either change tcp_recvmsg() to prevent it from returning data
		* before 3WHS completes per RFC793, or employ TCP Fast Open).
		*
		* However, if we ignore data in ACKless segments sometimes,
		* we have no reasons to accept it sometimes.
		@@ -5935,6 +5943,7 @@ int tcp_rcv_state_process(struct sock sk, struct sk_buff skb,
		{
		struct tcp_sock *tp = tcp_sk(sk);
		struct inet_connection_sock *icsk = inet_csk(sk);
		struct request_sock *req;
		int queued = 0;

		tp->rx_opt.saw_tstamp = 0;
		@@ -5990,7 +5999,14 @@ int tcp_rcv_state_process(struct sock sk, struct sk_buff skb,
		return 0;
		}

		if (!tcp_validate_incoming(sk, skb, th, 0))
		req = tp->fastopen_rsk;
		if (req != NULL) {
		BUG_ON(sk->sk_state != TCP_SYN_RECV &&
		sk->sk_state != TCP_FIN_WAIT1);

		if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
		goto discard;
		} else if (!tcp_validate_incoming(sk, skb, th, 0))
		return 0;

		/* step 5: check the ACK field */
		@@ -6000,7 +6016,22 @@ int tcp_rcv_state_process(struct sock sk, struct sk_buff skb,
		switch (sk->sk_state) {
		case TCP_SYN_RECV:
		if (acceptable) {
		/* Once we leave TCP_SYN_RECV, we no longer
		* need req so release it.
		*/
		if (req) {
		reqsk_fastopen_remove(sk, req, false);
		} else {
		/* Make sure socket is routed, for
		* correct metrics.
		*/
		icsk->icsk_af_ops->rebuild_header(sk);
		tcp_init_congestion_control(sk);

		tcp_mtup_init(sk);
		tcp_init_buffer_space(sk);
		tp->copied_seq = tp->rcv_nxt;
		}
		smp_mb();
		tcp_set_state(sk, TCP_ESTABLISHED);
		sk->sk_state_change(sk);
		@@ -6022,23 +6053,27 @@ int tcp_rcv_state_process(struct sock sk, struct sk_buff skb,
		if (tp->rx_opt.tstamp_ok)
		tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

		/* Make sure socket is routed, for
		* correct metrics.
		if (req) {
		/* Re-arm the timer because data may
		* have been sent out. This is similar
		* to the regular data transmission case
		* when new data has just been ack'ed.
		*
		* (TFO) - we could try to be more
		* aggressive and retranmitting any data
		* sooner based on when they were sent
		* out.
		*/
		icsk->icsk_af_ops->rebuild_header(sk);

		tcp_rearm_rto(sk);
		} else
		tcp_init_metrics(sk);

		tcp_init_congestion_control(sk);

		/* Prevent spurious tcp_cwnd_restart() on
		* first data packet.
		*/
		tp->lsndtime = tcp_time_stamp;

		tcp_mtup_init(sk);
		tcp_initialize_rcv_mss(sk);
		tcp_init_buffer_space(sk);
		tcp_fast_path_on(tp);
		} else {
		return 1;
		@@ -6046,6 +6081,16 @@ int tcp_rcv_state_process(struct sock sk, struct sk_buff skb,
		break;

		case TCP_FIN_WAIT1:
		/* If we enter the TCP_FIN_WAIT1 state and we are a
		* Fast Open socket and this is the first acceptable
		* ACK we have received, this would have acknowledged
		* our SYNACK so stop the SYNACK timer.
		*/
		if (acceptable && req != NULL) {
		/* We no longer need the request sock. */
		reqsk_fastopen_remove(sk, req, false);
		tcp_rearm_rto(sk);
		}
		if (tp->snd_una == tp->write_seq) {
		struct dst_entry *dst;

net/ipv4/tcp_ipv4.c

+251 −14

Original line number	Diff line number	Diff line
		@@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
		const int code = icmp_hdr(icmp_skb)->code;
		struct sock *sk;
		struct sk_buff *skb;
		struct request_sock *req;
		__u32 seq;
		__u32 remaining;
		int err;
		@@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)

		icsk = inet_csk(sk);
		tp = tcp_sk(sk);
		req = tp->fastopen_rsk;
		seq = ntohl(th->seq);
		if (sk->sk_state != TCP_LISTEN &&
		!between(seq, tp->snd_una, tp->snd_nxt)) {
		!between(seq, tp->snd_una, tp->snd_nxt) &&
		(req == NULL \|\| seq != tcp_rsk(req)->snt_isn)) {
		/* For a Fast Open socket, allow seq to be snt_isn. */
		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
		goto out;
		}
		@@ -435,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
		!icsk->icsk_backoff)
		break;

		/* XXX (TFO) - revisit the following logic for TFO */

		if (sock_owned_by_user(sk))
		break;

		@@ -466,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
		goto out;
		}

		/* XXX (TFO) - if it's a TFO socket and has been accepted, rather
		* than following the TCP_SYN_RECV case and closing the socket,
		* we ignore the ICMP error and keep trying like a fully established
		* socket. Is this the right thing to do?
		*/
		if (req && req->sk == NULL)
		goto out;

		switch (sk->sk_state) {
		struct request_sock req, *prev;
		case TCP_LISTEN:
		@@ -498,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)

		case TCP_SYN_SENT:
		case TCP_SYN_RECV: /* Cannot happen.
		It can f.e. if SYNs crossed.
		It can f.e. if SYNs crossed,
		or Fast Open.
		*/
		if (!sock_owned_by_user(sk)) {
		sk->sk_err = err;
		@@ -809,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock sk, struct sk_buff skb)
		static void tcp_v4_reqsk_send_ack(struct sock sk, struct sk_buff skb,
		struct request_sock *req)
		{
		tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
		tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
		/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
		* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
		*/
		tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
		tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
		tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
		req->ts_recent,
		0,
		tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
		@@ -1272,6 +1291,178 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
		};
		#endif

		static bool tcp_fastopen_check(struct sock sk, struct sk_buff skb,
		struct request_sock *req,
		struct tcp_fastopen_cookie *foc,
		struct tcp_fastopen_cookie *valid_foc)
		{
		bool skip_cookie = false;
		struct fastopen_queue *fastopenq;

		if (likely(!fastopen_cookie_present(foc))) {
		/* See include/net/tcp.h for the meaning of these knobs */
		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) \|\|
		((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
		(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
		skip_cookie = true; /* no cookie to validate */
		else
		return false;
		}
		fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
		/* A FO option is present; bump the counter. */
		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);

		/* Make sure the listener has enabled fastopen, and we don't
		* exceed the max # of pending TFO requests allowed before trying
		* to validating the cookie in order to avoid burning CPU cycles
		* unnecessarily.
		*
		* XXX (TFO) - The implication of checking the max_qlen before
		* processing a cookie request is that clients can't differentiate
		* between qlen overflow causing Fast Open to be disabled
		* temporarily vs a server not supporting Fast Open at all.
		*/
		if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 \|\|
		fastopenq == NULL \|\| fastopenq->max_qlen == 0)
		return false;

		if (fastopenq->qlen >= fastopenq->max_qlen) {
		struct request_sock *req1;
		spin_lock(&fastopenq->lock);
		req1 = fastopenq->rskq_rst_head;
		if ((req1 == NULL) \|\| time_after(req1->expires, jiffies)) {
		spin_unlock(&fastopenq->lock);
		NET_INC_STATS_BH(sock_net(sk),
		LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
		/* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
		foc->len = -1;
		return false;
		}
		fastopenq->rskq_rst_head = req1->dl_next;
		fastopenq->qlen--;
		spin_unlock(&fastopenq->lock);
		reqsk_free(req1);
		}
		if (skip_cookie) {
		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
		return true;
		}
		if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
		if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) \|\|
		memcmp(&foc->val[0], &valid_foc->val[0],
		TCP_FASTOPEN_COOKIE_SIZE) != 0)
		return false;
		valid_foc->len = -1;
		}
		/* Acknowledge the data received from the peer. */
		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
		return true;
		} else if (foc->len == 0) { /* Client requesting a cookie */
		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
		NET_INC_STATS_BH(sock_net(sk),
		LINUX_MIB_TCPFASTOPENCOOKIEREQD);
		} else {
		/* Client sent a cookie with wrong size. Treat it
		* the same as invalid and return a valid one.
		*/
		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
		}
		return false;
		}

		static int tcp_v4_conn_req_fastopen(struct sock *sk,
		struct sk_buff *skb,
		struct sk_buff *skb_synack,
		struct request_sock *req,
		struct request_values *rvp)
		{
		struct tcp_sock *tp = tcp_sk(sk);
		struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
		const struct inet_request_sock *ireq = inet_rsk(req);
		struct sock *child;

		req->retrans = 0;
		req->sk = NULL;

		child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
		if (child == NULL) {
		NET_INC_STATS_BH(sock_net(sk),
		LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
		kfree_skb(skb_synack);
		return -1;
		}
		ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
		ireq->rmt_addr, ireq->opt);
		/* XXX (TFO) - is it ok to ignore error and continue? */

		spin_lock(&queue->fastopenq->lock);
		queue->fastopenq->qlen++;
		spin_unlock(&queue->fastopenq->lock);

		/* Initialize the child socket. Have to fix some values to take
		* into account the child is a Fast Open socket and is created
		* only out of the bits carried in the SYN packet.
		*/
		tp = tcp_sk(child);

		tp->fastopen_rsk = req;
		/* Do a hold on the listner sk so that if the listener is being
		* closed, the child that has been accepted can live on and still
		* access listen_lock.
		*/
		sock_hold(sk);
		tcp_rsk(req)->listener = sk;

		/* RFC1323: The window in SYN & SYN/ACK segments is never
		* scaled. So correct it appropriately.
		*/
		tp->snd_wnd = ntohs(tcp_hdr(skb)->window);

		/* Activate the retrans timer so that SYNACK can be retransmitted.
		* The request socket is not added to the SYN table of the parent
		* because it's been added to the accept queue directly.
		*/
		inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
		TCP_TIMEOUT_INIT, TCP_RTO_MAX);

		/* Add the child socket directly into the accept queue */
		inet_csk_reqsk_queue_add(sk, req, child);

		/* Now finish processing the fastopen child socket. */
		inet_csk(child)->icsk_af_ops->rebuild_header(child);
		tcp_init_congestion_control(child);
		tcp_mtup_init(child);
		tcp_init_buffer_space(child);
		tcp_init_metrics(child);

		/* Queue the data carried in the SYN packet. We need to first
		* bump skb's refcnt because the caller will attempt to free it.
		*
		* XXX (TFO) - we honor a zero-payload TFO request for now.
		* (Any reason not to?)
		*/
		if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
		/* Don't queue the skb if there is no payload in SYN.
		* XXX (TFO) - How about SYN+FIN?
		*/
		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
		} else {
		skb = skb_get(skb);
		skb_dst_drop(skb);
		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
		skb_set_owner_r(skb, child);
		__skb_queue_tail(&child->sk_receive_queue, skb);
		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
		}
		sk->sk_data_ready(sk, 0);
		bh_unlock_sock(child);
		sock_put(child);
		WARN_ON(req->sk == NULL);
		return 0;
		}

		int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
		{
		struct tcp_extend_values tmp_ext;
		@@ -1285,6 +1476,11 @@ int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
		__be32 daddr = ip_hdr(skb)->daddr;
		__u32 isn = TCP_SKB_CB(skb)->when;
		bool want_cookie = false;
		struct flowi4 fl4;
		struct tcp_fastopen_cookie foc = { .len = -1 };
		struct tcp_fastopen_cookie valid_foc = { .len = -1 };
		struct sk_buff *skb_synack;
		int do_fastopen;

		/* Never answer to SYNs send to broadcast or multicast */
		if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
		@@ -1319,7 +1515,8 @@ int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
		tcp_clear_options(&tmp_opt);
		tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
		tmp_opt.user_mss = tp->rx_opt.user_mss;
		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
		tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
		want_cookie ? NULL : &foc);

		if (tmp_opt.cookie_plus > 0 &&
		tmp_opt.saw_tstamp &&
		@@ -1377,8 +1574,6 @@ int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
		req->cookie_ts = tmp_opt.tstamp_ok;
		} else if (!isn) {
		struct flowi4 fl4;

		/* VJ's idea. We save last timestamp seen
		* from the destination in peer table, when entering
		* state TIME-WAIT, and check against it before
		@@ -1419,14 +1614,52 @@ int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
		tcp_rsk(req)->snt_isn = isn;
		tcp_rsk(req)->snt_synack = tcp_time_stamp;

		if (tcp_v4_send_synack(sk, dst, req,
		if (dst == NULL) {
		dst = inet_csk_route_req(sk, &fl4, req);
		if (dst == NULL)
		goto drop_and_free;
		}
		do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);

		/* We don't call tcp_v4_send_synack() directly because we need
		* to make sure a child socket can be created successfully before
		* sending back synack!
		*
		* XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
		* (or better yet, call tcp_send_synack() in the child context
		* directly, but will have to fix bunch of other code first)
		* after syn_recv_sock() except one will need to first fix the
		* latter to remove its dependency on the current implementation
		* of tcp_v4_send_synack()->tcp_select_initial_window().
		*/
		skb_synack = tcp_make_synack(sk, dst, req,
		(struct request_values *)&tmp_ext,
		skb_get_queue_mapping(skb),
		want_cookie) \|\|
		want_cookie)
		fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);

		if (skb_synack) {
		__tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
		} else
		goto drop_and_free;

		if (likely(!do_fastopen)) {
		int err;
		err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
		ireq->rmt_addr, ireq->opt);
		err = net_xmit_eval(err);
		if (err \|\| want_cookie)
		goto drop_and_free;

		tcp_rsk(req)->listener = NULL;
		/* Add the request_sock to the SYN table */
		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
		if (fastopen_cookie_present(&foc) && foc.len != 0)
		NET_INC_STATS_BH(sock_net(sk),
		LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
		} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
		(struct request_values *)&tmp_ext))
		goto drop_and_free;

		return 0;

		drop_and_release:
		@@ -1977,6 +2210,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
		tcp_cookie_values_release);
		tp->cookie_values = NULL;
		}
		BUG_ON(tp->fastopen_rsk != NULL);

		/* If socket is aborted during connect operation */
		tcp_free_fastopen_req(tp);
		@@ -2425,6 +2659,7 @@ static void get_tcp4_sock(struct sock sk, struct seq_file f, int i, int *len)
		const struct tcp_sock *tp = tcp_sk(sk);
		const struct inet_connection_sock *icsk = inet_csk(sk);
		const struct inet_sock *inet = inet_sk(sk);
		struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
		__be32 dest = inet->inet_daddr;
		__be32 src = inet->inet_rcv_saddr;
		__u16 destp = ntohs(inet->inet_dport);
		@@ -2469,7 +2704,9 @@ static void get_tcp4_sock(struct sock sk, struct seq_file f, int i, int *len)
		jiffies_to_clock_t(icsk->icsk_ack.ato),
		(icsk->icsk_ack.quick << 1) \| icsk->icsk_ack.pingpong,
		tp->snd_cwnd,
		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
		sk->sk_state == TCP_LISTEN ?
		(fastopenq ? fastopenq->max_qlen : 0) :
		(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
		len);
		}