Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f9843a7 authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller
Browse files

tcp: properly handle stretch acks in slow start



Slow start now increases cwnd by 1 if an ACK acknowledges some packets,
regardless the number of packets. Consequently slow start performance
is highly dependent on the degree of the stretch ACKs caused by
receiver or network ACK compression mechanisms (e.g., delayed-ACK,
GRO, etc).  But slow start algorithm is to send twice the amount of
packets of packets left so it should process a stretch ACK of degree
N as if N ACKs of degree 1, then exits when cwnd exceeds ssthresh. A
follow up patch will use the remainder of the N (if greater than 1)
to adjust cwnd in the congestion avoidance phase.

In addition this patch retires the experimental limited slow start
(LSS) feature. LSS has multiple drawbacks but questionable benefit. The
fractional cwnd increase in LSS requires a loop in slow start even
though it's rarely used. Configuring such an increase step via a global
sysctl on different BDPS seems hard. Finally and most importantly the
slow start overshoot concern is now better covered by the Hybrid slow
start (hystart) enabled by default.

Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0d41cca4
Loading
Loading
Loading
Loading
+0 −11
Original line number Diff line number Diff line
@@ -267,17 +267,6 @@ tcp_max_orphans - INTEGER
	more aggressively. Let me to remind again: each orphan eats
	up to ~64K of unswappable memory.

tcp_max_ssthresh - INTEGER
	Limited Slow-Start for TCP with large congestion windows (cwnd) defined in
	RFC3742. Limited slow-start is a mechanism to limit growth of the cwnd
	on the region where cwnd is larger than tcp_max_ssthresh. TCP increases cwnd
	by at most tcp_max_ssthresh segments, and by at least tcp_max_ssthresh/2
	segments per RTT when the cwnd is above tcp_max_ssthresh.
	If TCP connection increased cwnd to thousands (or tens of thousands) segments,
	and thousands of packets were being dropped during slow-start, you can set
	tcp_max_ssthresh to improve performance for new TCP connection.
	Default: 0 (off)

tcp_max_syn_backlog - INTEGER
	Maximal number of remembered connection requests, which have not
	received an acknowledgment from connecting client.
+3 −4
Original line number Diff line number Diff line
@@ -275,7 +275,6 @@ extern int sysctl_tcp_mtu_probing;
extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
@@ -797,7 +796,7 @@ struct tcp_congestion_ops {
	/* lower bound for congestion window (optional) */
	u32 (*min_cwnd)(const struct sock *sk);
	/* do new cwnd calculation (required) */
	void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);
	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
	/* call before changing ca_state (optional) */
	void (*set_state)(struct sock *sk, u8 new_state);
	/* call when cwnd event occurs (optional) */
@@ -824,12 +823,12 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name);
void tcp_slow_start(struct tcp_sock *tp);
int tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);

extern struct tcp_congestion_ops tcp_init_congestion_ops;
u32 tcp_reno_ssthresh(struct sock *sk);
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight);
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
u32 tcp_reno_min_cwnd(const struct sock *sk);
extern struct tcp_congestion_ops tcp_reno;

+0 −7
Original line number Diff line number Diff line
@@ -700,13 +700,6 @@ static struct ctl_table ipv4_table[] = {
		.mode		= 0644,
		.proc_handler   = proc_allowed_congestion_control,
	},
	{
		.procname	= "tcp_max_ssthresh",
		.data		= &sysctl_tcp_max_ssthresh,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{
		.procname       = "tcp_thin_linear_timeouts",
		.data           = &sysctl_tcp_thin_linear_timeouts,
+3 −2
Original line number Diff line number Diff line
@@ -140,7 +140,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
		ca->cnt = 1;
}

static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
			      u32 in_flight)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct bictcp *ca = inet_csk_ca(sk);
@@ -149,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
		return;

	if (tp->snd_cwnd <= tp->snd_ssthresh)
		tcp_slow_start(tp);
		tcp_slow_start(tp, acked);
	else {
		bictcp_update(ca, tp->snd_cwnd);
		tcp_cong_avoid_ai(tp, ca->cnt);
+17 −30
Original line number Diff line number Diff line
@@ -15,8 +15,6 @@
#include <linux/gfp.h>
#include <net/tcp.h>

int sysctl_tcp_max_ssthresh = 0;

static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);

@@ -299,35 +297,24 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
}
EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);

/*
 * Slow start is used when congestion window is less than slow start
 * threshold. This version implements the basic RFC2581 version
 * and optionally supports:
 * 	RFC3742 Limited Slow Start  	  - growth limited to max_ssthresh
 *	RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged
/* Slow start is used when congestion window is no greater than the slow start
 * threshold. We base on RFC2581 and also handle stretch ACKs properly.
 * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
 * something better;) a packet is only considered (s)acked in its entirety to
 * defend the ACK attacks described in the RFC. Slow start processes a stretch
 * ACK of degree N as if N acks of degree 1 are received back to back except
 * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
 * returns the leftover acks to adjust cwnd in congestion avoidance mode.
 */
void tcp_slow_start(struct tcp_sock *tp)
int tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
	int cnt; /* increase in packets */
	unsigned int delta = 0;
	u32 snd_cwnd = tp->snd_cwnd;

	if (unlikely(!snd_cwnd)) {
		pr_err_once("snd_cwnd is nul, please report this bug.\n");
		snd_cwnd = 1U;
	}
	u32 cwnd = tp->snd_cwnd + acked;

	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
	else
		cnt = snd_cwnd;				/* exponential increase */

	tp->snd_cwnd_cnt += cnt;
	while (tp->snd_cwnd_cnt >= snd_cwnd) {
		tp->snd_cwnd_cnt -= snd_cwnd;
		delta++;
	}
	tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp);
	if (cwnd > tp->snd_ssthresh)
		cwnd = tp->snd_ssthresh + 1;
	acked -= cwnd - tp->snd_cwnd;
	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
	return acked;
}
EXPORT_SYMBOL_GPL(tcp_slow_start);

@@ -351,7 +338,7 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
/* This is Jacobson's slow start and congestion avoidance.
 * SIGCOMM '88, p. 328.
 */
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
{
	struct tcp_sock *tp = tcp_sk(sk);

@@ -360,7 +347,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)

	/* In "safe" area, increase. */
	if (tp->snd_cwnd <= tp->snd_ssthresh)
		tcp_slow_start(tp);
		tcp_slow_start(tp, acked);
	/* In dangerous area, increase slowly. */
	else
		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
Loading