Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3dc43e3e authored by Glauber Costa's avatar Glauber Costa Committed by David S. Miller
Browse files

per-netns ipv4 sysctl_tcp_mem



This patch allows each namespace to independently set up
its levels for tcp memory pressure thresholds. This patch
alone does not buy much: we need to make this values
per group of process somehow. This is achieved in the
patches that follows in this patchset.

Signed-off-by: default avatarGlauber Costa <glommer@parallels.com>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: David S. Miller <davem@davemloft.net>
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d1a4c0b3
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -55,6 +55,7 @@ struct netns_ipv4 {
	int current_rt_cache_rebuild_count;
	int current_rt_cache_rebuild_count;


	unsigned int sysctl_ping_group_range[2];
	unsigned int sysctl_ping_group_range[2];
	long sysctl_tcp_mem[3];


	atomic_t rt_genid;
	atomic_t rt_genid;
	atomic_t dev_addr_genid;
	atomic_t dev_addr_genid;
+0 −1
Original line number Original line Diff line number Diff line
@@ -230,7 +230,6 @@ extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn;
extern int sysctl_tcp_ecn;
extern int sysctl_tcp_dsack;
extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_app_win;
+2 −0
Original line number Original line Diff line number Diff line
@@ -1672,6 +1672,8 @@ static int __init inet_init(void)
	ip_static_sysctl_init();
	ip_static_sysctl_init();
#endif
#endif


	tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;

	/*
	/*
	 *	Add all the base protocols.
	 *	Add all the base protocols.
	 */
	 */
+44 −7
Original line number Original line Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
#include <linux/nsproxy.h>
#include <linux/swap.h>
#include <net/snmp.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ip.h>
@@ -174,6 +175,36 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
	return ret;
	return ret;
}
}


static int ipv4_tcp_mem(ctl_table *ctl, int write,
			   void __user *buffer, size_t *lenp,
			   loff_t *ppos)
{
	int ret;
	unsigned long vec[3];
	struct net *net = current->nsproxy->net_ns;

	ctl_table tmp = {
		.data = &vec,
		.maxlen = sizeof(vec),
		.mode = ctl->mode,
	};

	if (!write) {
		ctl->data = &net->ipv4.sysctl_tcp_mem;
		return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
	}

	ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
	if (ret)
		return ret;

	net->ipv4.sysctl_tcp_mem[0] = vec[0];
	net->ipv4.sysctl_tcp_mem[1] = vec[1];
	net->ipv4.sysctl_tcp_mem[2] = vec[2];

	return 0;
}

static struct ctl_table ipv4_table[] = {
static struct ctl_table ipv4_table[] = {
	{
	{
		.procname	= "tcp_timestamps",
		.procname	= "tcp_timestamps",
@@ -432,13 +463,6 @@ static struct ctl_table ipv4_table[] = {
		.mode		= 0644,
		.mode		= 0644,
		.proc_handler	= proc_dointvec
		.proc_handler	= proc_dointvec
	},
	},
	{
		.procname	= "tcp_mem",
		.data		= &sysctl_tcp_mem,
		.maxlen		= sizeof(sysctl_tcp_mem),
		.mode		= 0644,
		.proc_handler	= proc_doulongvec_minmax
	},
	{
	{
		.procname	= "tcp_wmem",
		.procname	= "tcp_wmem",
		.data		= &sysctl_tcp_wmem,
		.data		= &sysctl_tcp_wmem,
@@ -721,6 +745,12 @@ static struct ctl_table ipv4_net_table[] = {
		.mode		= 0644,
		.mode		= 0644,
		.proc_handler	= ipv4_ping_group_range,
		.proc_handler	= ipv4_ping_group_range,
	},
	},
	{
		.procname	= "tcp_mem",
		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_mem),
		.mode		= 0644,
		.proc_handler	= ipv4_tcp_mem,
	},
	{ }
	{ }
};
};


@@ -734,6 +764,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
static __net_init int ipv4_sysctl_init_net(struct net *net)
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
{
	struct ctl_table *table;
	struct ctl_table *table;
	unsigned long limit;


	table = ipv4_net_table;
	table = ipv4_net_table;
	if (!net_eq(net, &init_net)) {
	if (!net_eq(net, &init_net)) {
@@ -769,6 +800,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)


	net->ipv4.sysctl_rt_cache_rebuild_count = 4;
	net->ipv4.sysctl_rt_cache_rebuild_count = 4;


	limit = nr_free_buffer_pages() / 8;
	limit = max(limit, 128UL);
	net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
	net->ipv4.sysctl_tcp_mem[1] = limit;
	net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;

	net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
	net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
			net_ipv4_ctl_path, table);
			net_ipv4_ctl_path, table);
	if (net->ipv4.ipv4_hdr == NULL)
	if (net->ipv4.ipv4_hdr == NULL)
+2 −9
Original line number Original line Diff line number Diff line
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
EXPORT_SYMBOL_GPL(tcp_orphan_count);


long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;


EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);


@@ -3278,14 +3276,9 @@ void __init tcp_init(void)
	sysctl_tcp_max_orphans = cnt / 2;
	sysctl_tcp_max_orphans = cnt / 2;
	sysctl_max_syn_backlog = max(128, cnt / 256);
	sysctl_max_syn_backlog = max(128, cnt / 256);


	limit = nr_free_buffer_pages() / 8;
	limit = max(limit, 128UL);
	sysctl_tcp_mem[0] = limit / 4 * 3;
	sysctl_tcp_mem[1] = limit;
	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;

	/* Set per-socket limits to no more than 1/128 the pressure threshold */
	/* Set per-socket limits to no more than 1/128 the pressure threshold */
	limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
	limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
		<< (PAGE_SHIFT - 7);
	max_share = min(4UL*1024*1024, limit);
	max_share = min(4UL*1024*1024, limit);


	sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
	sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
Loading