Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 43e122b0 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

tcp: refine pacing rate determination



When TCP pacing was added back in linux-3.12, we chose
to apply a fixed ratio of 200 % against current rate,
to allow probing for optimal throughput even during
slow start phase, where cwnd can be doubled every other gRTT.

At Google, we found it was better applying a different ratio
while in Congestion Avoidance phase.
This ratio was set to 120 %.

We've used the normal tcp_in_slow_start() helper for a while,
then tuned the condition to select the conservative ratio
as soon as cwnd >= ssthresh/2 :

- After cwnd reduction, it is safer to ramp up more slowly,
  as we approach optimal cwnd.
- Initial ramp up (ssthresh == INFINITY) still allows doubling
  cwnd every other RTT.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4ec3b28c
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -586,6 +586,21 @@ tcp_min_tso_segs - INTEGER
	if available window is too small.
	Default: 2

tcp_pacing_ss_ratio - INTEGER
	sk->sk_pacing_rate is set by TCP stack using a ratio applied
	to current rate. (current_rate = cwnd * mss / srtt)
	If TCP is in slow start, tcp_pacing_ss_ratio is applied
	to let TCP probe for bigger speeds, assuming cwnd can be
	doubled every other RTT.
	Default: 200

tcp_pacing_ca_ratio - INTEGER
	sk->sk_pacing_rate is set by TCP stack using a ratio applied
	to current rate. (current_rate = cwnd * mss / srtt)
	If TCP is in congestion avoidance phase, tcp_pacing_ca_ratio
	is applied to conservatively probe for bigger throughput.
	Default: 120

tcp_tso_win_divisor - INTEGER
	This allows control over what percentage of the congestion window
	can be consumed by a single TSO frame.
+2 −0
Original line number Diff line number Diff line
@@ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_autocorking;
extern int sysctl_tcp_invalid_ratelimit;
extern int sysctl_tcp_pacing_ss_ratio;
extern int sysctl_tcp_pacing_ca_ratio;

extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
+19 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
static int zero;
static int one = 1;
static int four = 4;
static int thousand = 1000;
static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
@@ -711,6 +712,24 @@ static struct ctl_table ipv4_table[] = {
		.extra1		= &one,
		.extra2		= &gso_max_segs,
	},
	{
		.procname	= "tcp_pacing_ss_ratio",
		.data		= &sysctl_tcp_pacing_ss_ratio,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &thousand,
	},
	{
		.procname	= "tcp_pacing_ca_ratio",
		.data		= &sysctl_tcp_pacing_ca_ratio,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &thousand,
	},
	{
		.procname	= "tcp_autocorking",
		.data		= &sysctl_tcp_autocorking,
+17 −1
Original line number Diff line number Diff line
@@ -753,13 +753,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
 * TCP pacing, to smooth the burst on large writes when packets
 * in flight is significantly lower than cwnd (or rwin)
 */
int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;

static void tcp_update_pacing_rate(struct sock *sk)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	u64 rate;

	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
	rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
	rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);

	/* current rate is (cwnd * mss) / srtt
	 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
	 * In Congestion Avoidance phase, set it to 120 % the current rate.
	 *
	 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
	 *	 If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
	 *	 end of slow start and should slow down.
	 */
	if (tp->snd_cwnd < tp->snd_ssthresh / 2)
		rate *= sysctl_tcp_pacing_ss_ratio;
	else
		rate *= sysctl_tcp_pacing_ca_ratio;

	rate *= max(tp->snd_cwnd, tp->packets_out);