Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bb4d991a authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller
Browse files

tcp: adjust tail loss probe timeout



This patch adjusts the timeout formula to schedule the TCP loss probe
(TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
only one packet is in flight. It keeps a lower bound of 10 msec which
is too large for short RTT connections (e.g. within a data-center).
The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
performs better for short and fast connections.

Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c4b2bf6b
Loading
Loading
Loading
Loading
+1 −2
Original line number Original line Diff line number Diff line
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#endif
#endif
#define TCP_RTO_MAX	((unsigned)(120*HZ))
#define TCP_RTO_MAX	((unsigned)(120*HZ))
#define TCP_RTO_MIN	((unsigned)(HZ/5))
#define TCP_RTO_MIN	((unsigned)(HZ/5))
#define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */
#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
						 * used as a fallback RTO for the
						 * used as a fallback RTO for the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
					                 * for local resources.
					                 * for local resources.
					                 */
					                 */
#define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */

#define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
#define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
#define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
#define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
#define TCP_KEEPALIVE_INTVL	(75*HZ)
#define TCP_KEEPALIVE_INTVL	(75*HZ)
+10 −7
Original line number Original line Diff line number Diff line
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	u32 timeout, tlp_time_stamp, rto_time_stamp;
	u32 timeout, tlp_time_stamp, rto_time_stamp;
	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);


	/* No consecutive loss probes. */
	/* No consecutive loss probes. */
	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
	     tcp_send_head(sk))
	     tcp_send_head(sk))
		return false;
		return false;


	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
	/* Probe timeout is 2*rtt. Add minimum RTO to account
	 * for delayed ack when there's one outstanding packet. If no RTT
	 * for delayed ack when there's one outstanding packet. If no RTT
	 * sample is available then probe after TCP_TIMEOUT_INIT.
	 * sample is available then probe after TCP_TIMEOUT_INIT.
	 */
	 */
	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
	if (tp->srtt_us) {
		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
		if (tp->packets_out == 1)
		if (tp->packets_out == 1)
		timeout = max_t(u32, timeout,
			timeout += TCP_RTO_MIN;
				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
		else
	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
			timeout += TCP_TIMEOUT_MIN;
	} else {
		timeout = TCP_TIMEOUT_INIT;
	}


	/* If RTO is shorter, just schedule TLP in its place. */
	/* If RTO is shorter, just schedule TLP in its place. */
	tlp_time_stamp = tcp_jiffies32 + timeout;
	tlp_time_stamp = tcp_jiffies32 + timeout;
+1 −1
Original line number Original line Diff line number Diff line
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
	tp->rack.advanced = 0;
	tp->rack.advanced = 0;
	tcp_rack_detect_loss(sk, &timeout);
	tcp_rack_detect_loss(sk, &timeout);
	if (timeout) {
	if (timeout) {
		timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
		timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
					  timeout, inet_csk(sk)->icsk_rto);
					  timeout, inet_csk(sk)->icsk_rto);
	}
	}