Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 010b64f7 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-move-14-sysctls-to-namespaces'



Eric Dumazet says:

====================
tcp: move 14 sysctls to namespaces

Ideally all TCP sysctls should be per netns.
This patch series takes care of 14 of sysctls.
More to come later.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4dc12ffe af9b69a7
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -128,6 +128,20 @@ struct netns_ipv4 {
	int sysctl_tcp_sack;
	int sysctl_tcp_window_scaling;
	int sysctl_tcp_timestamps;
	int sysctl_tcp_early_retrans;
	int sysctl_tcp_recovery;
	int sysctl_tcp_thin_linear_timeouts;
	int sysctl_tcp_slow_start_after_idle;
	int sysctl_tcp_retrans_collapse;
	int sysctl_tcp_stdurg;
	int sysctl_tcp_rfc1337;
	int sysctl_tcp_abort_on_overflow;
	int sysctl_tcp_fack;
	int sysctl_tcp_max_reordering;
	int sysctl_tcp_dsack;
	int sysctl_tcp_app_win;
	int sysctl_tcp_adv_win_scale;
	int sysctl_tcp_frto;
	struct inet_timewait_death_row tcp_death_row;
	int sysctl_max_syn_backlog;
	int sysctl_tcp_fastopen;
+6 −21
Original line number Diff line number Diff line
@@ -243,30 +243,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);


/* sysctl variables for tcp */
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans;
extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_max_reordering;
extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_frto;
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
extern int sysctl_tcp_recovery;

#define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */

extern int sysctl_tcp_limit_output_bytes;
@@ -1311,7 +1296,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
	struct tcp_sock *tp = tcp_sk(sk);
	s32 delta;

	if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
	if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
	    ca_ops->cong_control)
		return;
	delta = tcp_jiffies32 - tp->lsndtime;
@@ -1324,9 +1309,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
			       __u32 *window_clamp, int wscale_ok,
			       __u8 *rcv_wscale, __u32 init_rcv_wnd);

static inline int tcp_win_from_space(int space)
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
	int tcp_adv_win_scale = sysctl_tcp_adv_win_scale;
	int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;

	return tcp_adv_win_scale <= 0 ?
		(space>>(-tcp_adv_win_scale)) :
@@ -1336,13 +1321,13 @@ static inline int tcp_win_from_space(int space)
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
	return tcp_win_from_space(sk->sk_rcvbuf -
	return tcp_win_from_space(sk, sk->sk_rcvbuf -
				  atomic_read(&sk->sk_rmem_alloc));
}

static inline int tcp_full_space(const struct sock *sk)
{
	return tcp_win_from_space(sk->sk_rcvbuf);
	return tcp_win_from_space(sk, sk->sk_rcvbuf);
}

extern void tcp_openreq_init_rwin(struct request_sock *req,
+102 −102
Original line number Diff line number Diff line
@@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
}

static struct ctl_table ipv4_table[] = {
	{
		.procname	= "tcp_retrans_collapse",
		.data		= &sysctl_tcp_retrans_collapse,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_max_orphans",
		.data		= &sysctl_tcp_max_orphans,
@@ -400,27 +393,6 @@ static struct ctl_table ipv4_table[] = {
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_abort_on_overflow",
		.data		= &sysctl_tcp_abort_on_overflow,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_stdurg",
		.data		= &sysctl_tcp_stdurg,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_rfc1337",
		.data		= &sysctl_tcp_rfc1337,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "inet_peer_threshold",
		.data		= &inet_peer_threshold,
@@ -442,34 +414,6 @@ static struct ctl_table ipv4_table[] = {
		.mode		= 0644,
		.proc_handler	= proc_dointvec_jiffies,
	},
	{
		.procname	= "tcp_fack",
		.data		= &sysctl_tcp_fack,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_recovery",
		.data		= &sysctl_tcp_recovery,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{
		.procname	= "tcp_max_reordering",
		.data		= &sysctl_tcp_max_reordering,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_dsack",
		.data		= &sysctl_tcp_dsack,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_mem",
		.maxlen		= sizeof(sysctl_tcp_mem),
@@ -493,29 +437,6 @@ static struct ctl_table ipv4_table[] = {
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &one,
	},
	{
		.procname	= "tcp_app_win",
		.data		= &sysctl_tcp_app_win,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_adv_win_scale",
		.data		= &sysctl_tcp_adv_win_scale,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &tcp_adv_win_scale_min,
		.extra2		= &tcp_adv_win_scale_max,
	},
	{
		.procname	= "tcp_frto",
		.data		= &sysctl_tcp_frto,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_min_rtt_wlen",
		.data		= &sysctl_tcp_min_rtt_wlen,
@@ -578,13 +499,6 @@ static struct ctl_table ipv4_table[] = {
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_slow_start_after_idle",
		.data		= &sysctl_tcp_slow_start_after_idle,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
#ifdef CONFIG_NETLABEL
	{
		.procname	= "cipso_cache_enable",
@@ -627,22 +541,6 @@ static struct ctl_table ipv4_table[] = {
		.mode		= 0644,
		.proc_handler   = proc_allowed_congestion_control,
	},
	{
		.procname       = "tcp_thin_linear_timeouts",
		.data           = &sysctl_tcp_thin_linear_timeouts,
		.maxlen         = sizeof(int),
		.mode           = 0644,
		.proc_handler   = proc_dointvec
	},
	{
		.procname	= "tcp_early_retrans",
		.data		= &sysctl_tcp_early_retrans,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &four,
	},
	{
		.procname	= "tcp_min_tso_segs",
		.data		= &sysctl_tcp_min_tso_segs,
@@ -1145,6 +1043,108 @@ static struct ctl_table ipv4_net_table[] = {
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_early_retrans",
		.data		= &init_net.ipv4.sysctl_tcp_early_retrans,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &four,
	},
	{
		.procname	= "tcp_recovery",
		.data		= &init_net.ipv4.sysctl_tcp_recovery,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{
		.procname       = "tcp_thin_linear_timeouts",
		.data           = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
		.maxlen         = sizeof(int),
		.mode           = 0644,
		.proc_handler   = proc_dointvec
	},
	{
		.procname	= "tcp_slow_start_after_idle",
		.data		= &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_retrans_collapse",
		.data		= &init_net.ipv4.sysctl_tcp_retrans_collapse,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_stdurg",
		.data		= &init_net.ipv4.sysctl_tcp_stdurg,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_rfc1337",
		.data		= &init_net.ipv4.sysctl_tcp_rfc1337,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_abort_on_overflow",
		.data		= &init_net.ipv4.sysctl_tcp_abort_on_overflow,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_fack",
		.data		= &init_net.ipv4.sysctl_tcp_fack,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_max_reordering",
		.data		= &init_net.ipv4.sysctl_tcp_max_reordering,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_dsack",
		.data		= &init_net.ipv4.sysctl_tcp_dsack,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_app_win",
		.data		= &init_net.ipv4.sysctl_tcp_app_win,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "tcp_adv_win_scale",
		.data		= &init_net.ipv4.sysctl_tcp_adv_win_scale,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &tcp_adv_win_scale_min,
		.extra2		= &tcp_adv_win_scale_max,
	},
	{
		.procname	= "tcp_frto",
		.data		= &init_net.ipv4.sysctl_tcp_frto,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{ }
};

+1 −1
Original line number Diff line number Diff line
@@ -2517,7 +2517,7 @@ static int tcp_repair_options_est(struct sock *sk,
				return -EINVAL;

			tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
			if (sysctl_tcp_fack)
			if (sock_net(sk)->ipv4.sysctl_tcp_fack)
				tcp_enable_fack(tp);
			break;
		case TCPOPT_TIMESTAMP:
+16 −26
Original line number Diff line number Diff line
@@ -79,23 +79,12 @@
#include <linux/unaligned/access_ok.h>
#include <linux/static_key.h>

int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300;
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);

/* rfc5961 challenge ack rate limiting */
int sysctl_tcp_challenge_ack_limit = 1000;

int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
int sysctl_tcp_frto __read_mostly = 2;
int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_early_retrans __read_mostly = 3;
int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;

#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
@@ -370,8 +359,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	/* Optimize this! */
	int truesize = tcp_win_from_space(skb->truesize) >> 1;
	int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
	int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
	int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;

	while (tp->rcv_ssthresh <= window) {
		if (truesize <= skb->len)
@@ -396,7 +385,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
		/* Check #2. Increase window, if skb with such overhead
		 * will fit to rcvbuf in future.
		 */
		if (tcp_win_from_space(skb->truesize) <= skb->len)
		if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
			incr = 2 * tp->advmss;
		else
			incr = __tcp_grow_window(sk, skb);
@@ -434,6 +423,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 */
void tcp_init_buffer_space(struct sock *sk)
{
	int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
	struct tcp_sock *tp = tcp_sk(sk);
	int maxwin;

@@ -452,14 +442,14 @@ void tcp_init_buffer_space(struct sock *sk)
	if (tp->window_clamp >= maxwin) {
		tp->window_clamp = maxwin;

		if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
		if (tcp_app_win && maxwin > 4 * tp->advmss)
			tp->window_clamp = max(maxwin -
					       (maxwin >> sysctl_tcp_app_win),
					       (maxwin >> tcp_app_win),
					       4 * tp->advmss);
	}

	/* Force reservation of one segment. */
	if (sysctl_tcp_app_win &&
	if (tcp_app_win &&
	    tp->window_clamp > 2 * tp->advmss &&
	    tp->window_clamp + tp->advmss > maxwin)
		tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@ -636,7 +626,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
		}

		rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
		while (tcp_win_from_space(rcvmem) < tp->advmss)
		while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
			rcvmem += 128;

		rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
@@ -893,7 +883,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
		return;

	if (metric > tp->reordering) {
		tp->reordering = min(sysctl_tcp_max_reordering, metric);
		tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);

#if FASTRETRANS_DEBUG > 1
		pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
@@ -2035,7 +2025,7 @@ void tcp_enter_loss(struct sock *sk)
	 * falsely raise the receive window, which results in repeated
	 * timeouts and stop-and-go behavior.
	 */
	tp->frto = sysctl_tcp_frto &&
	tp->frto = net->ipv4.sysctl_tcp_frto &&
		   (new_recovery || icsk->icsk_retransmits) &&
		   !inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -2789,7 +2779,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
	struct tcp_sock *tp = tcp_sk(sk);

	/* Use RACK to detect loss */
	if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
	if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
		u32 prior_retrans = tp->retrans_out;

		tcp_rack_mark_lost(sk);
@@ -4155,7 +4145,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
	struct tcp_sock *tp = tcp_sk(sk);

	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
	if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
		int mib_idx;

		if (before(seq, tp->rcv_nxt))
@@ -4190,7 +4180,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
		tcp_enter_quickack_mode(sk);

		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
			u32 end_seq = TCP_SKB_CB(skb)->end_seq;

			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4815,7 +4805,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
		 *   overlaps to the next one.
		 */
		if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
		    (tcp_win_from_space(skb->truesize) > skb->len ||
		    (tcp_win_from_space(sk, skb->truesize) > skb->len ||
		     before(TCP_SKB_CB(skb)->seq, start))) {
			end_of_skbs = false;
			break;
@@ -5124,7 +5114,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
	struct tcp_sock *tp = tcp_sk(sk);
	u32 ptr = ntohs(th->urg_ptr);

	if (ptr && !sysctl_tcp_stdurg)
	if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
		ptr--;
	ptr += ntohl(th->seq);

@@ -5723,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
			tp->tcp_header_len = sizeof(struct tcphdr);
		}

		if (tcp_is_sack(tp) && sysctl_tcp_fack)
		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
			tcp_enable_fack(tp);

		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
Loading