Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d696c7bd authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller
Browse files

netfilter: nf_conntrack: fix hash resizing with namespaces



As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 14c7dbe0
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
struct netns_ct {
struct netns_ct {
	atomic_t		count;
	atomic_t		count;
	unsigned int		expect_count;
	unsigned int		expect_count;
	unsigned int		htable_size;
	struct kmem_cache	*nf_conntrack_cachep;
	struct kmem_cache	*nf_conntrack_cachep;
	struct hlist_nulls_head	*hash;
	struct hlist_nulls_head	*hash;
	struct hlist_head	*expect_hash;
	struct hlist_head	*expect_hash;
+1 −0
Original line number Original line Diff line number Diff line
@@ -40,6 +40,7 @@ struct netns_ipv4 {
	struct xt_table		*iptable_security;
	struct xt_table		*iptable_security;
	struct xt_table		*nat_table;
	struct xt_table		*nat_table;
	struct hlist_head	*nat_bysource;
	struct hlist_head	*nat_bysource;
	unsigned int		nat_htable_size;
	int			nat_vmalloced;
	int			nat_vmalloced;
#endif
#endif


+1 −1
Original line number Original line Diff line number Diff line
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
	},
	},
	{
	{
		.procname	= "ip_conntrack_buckets",
		.procname	= "ip_conntrack_buckets",
		.data		= &nf_conntrack_htable_size,
		.data		= &init_net.ct.htable_size,
		.maxlen		= sizeof(unsigned int),
		.maxlen		= sizeof(unsigned int),
		.mode		= 0444,
		.mode		= 0444,
		.proc_handler	= proc_dointvec,
		.proc_handler	= proc_dointvec,
+2 −2
Original line number Original line Diff line number Diff line
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
	struct hlist_nulls_node *n;
	struct hlist_nulls_node *n;


	for (st->bucket = 0;
	for (st->bucket = 0;
	     st->bucket < nf_conntrack_htable_size;
	     st->bucket < net->ct.htable_size;
	     st->bucket++) {
	     st->bucket++) {
		n = rcu_dereference(net->ct.hash[st->bucket].first);
		n = rcu_dereference(net->ct.hash[st->bucket].first);
		if (!is_a_nulls(n))
		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
	head = rcu_dereference(head->next);
	head = rcu_dereference(head->next);
	while (is_a_nulls(head)) {
	while (is_a_nulls(head)) {
		if (likely(get_nulls_value(head) == st->bucket)) {
		if (likely(get_nulls_value(head) == st->bucket)) {
			if (++st->bucket >= nf_conntrack_htable_size)
			if (++st->bucket >= net->ct.htable_size)
				return NULL;
				return NULL;
		}
		}
		head = rcu_dereference(net->ct.hash[st->bucket].first);
		head = rcu_dereference(net->ct.hash[st->bucket].first);
+9 −13
Original line number Original line Diff line number Diff line
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);


static struct nf_conntrack_l3proto *l3proto __read_mostly;
static struct nf_conntrack_l3proto *l3proto __read_mostly;


/* Calculated at init based on memory size */
static unsigned int nf_nat_htable_size __read_mostly;

#define MAX_IP_NAT_PROTO 256
#define MAX_IP_NAT_PROTO 256
static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
						__read_mostly;
						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);


/* We keep an extra hash for each conntrack, for fast searching. */
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
static inline unsigned int
hash_by_src(const struct nf_conntrack_tuple *tuple)
hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
{
{
	unsigned int hash;
	unsigned int hash;


@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
	hash = jhash_3words((__force u32)tuple->src.u3.ip,
	hash = jhash_3words((__force u32)tuple->src.u3.ip,
			    (__force u32)tuple->src.u.all,
			    (__force u32)tuple->src.u.all,
			    tuple->dst.protonum, 0);
			    tuple->dst.protonum, 0);
	return ((u64)hash * nf_nat_htable_size) >> 32;
	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
}
}


/* Is this tuple already taken? (not by us) */
/* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
		     struct nf_conntrack_tuple *result,
		     struct nf_conntrack_tuple *result,
		     const struct nf_nat_range *range)
		     const struct nf_nat_range *range)
{
{
	unsigned int h = hash_by_src(tuple);
	unsigned int h = hash_by_src(net, tuple);
	const struct nf_conn_nat *nat;
	const struct nf_conn_nat *nat;
	const struct nf_conn *ct;
	const struct nf_conn *ct;
	const struct hlist_node *n;
	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
	if (have_to_hash) {
	if (have_to_hash) {
		unsigned int srchash;
		unsigned int srchash;


		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
		spin_lock_bh(&nf_nat_lock);
		spin_lock_bh(&nf_nat_lock);
		/* nf_conntrack_alter_reply might re-allocate exntension aera */
		/* nf_conntrack_alter_reply might re-allocate exntension aera */
		nat = nfct_nat(ct);
		nat = nfct_nat(ct);
@@ -679,7 +676,9 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,


static int __net_init nf_nat_net_init(struct net *net)
static int __net_init nf_nat_net_init(struct net *net)
{
{
	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
	/* Leave them the same for the moment. */
	net->ipv4.nat_htable_size = net->ct.htable_size;
	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
						       &net->ipv4.nat_vmalloced, 0);
						       &net->ipv4.nat_vmalloced, 0);
	if (!net->ipv4.nat_bysource)
	if (!net->ipv4.nat_bysource)
		return -ENOMEM;
		return -ENOMEM;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
	synchronize_rcu();
	synchronize_rcu();
	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
			     nf_nat_htable_size);
			     net->ipv4.nat_htable_size);
}
}


static struct pernet_operations nf_nat_net_ops = {
static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
		return ret;
		return ret;
	}
	}


	/* Leave them the same for the moment. */
	nf_nat_htable_size = nf_conntrack_htable_size;

	ret = register_pernet_subsys(&nf_nat_net_ops);
	ret = register_pernet_subsys(&nf_nat_net_ops);
	if (ret < 0)
	if (ret < 0)
		goto cleanup_extend;
		goto cleanup_extend;
Loading