Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9e69e12 authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso
Browse files

netfilter: xtables: don't hook tables by default



delay hook registration until the table is being requested inside a
namespace.

Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.

When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.

This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.

In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).

Thanks to Eric Biederman, hooks are no longer global, but per namespace.

This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.

We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.

If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.

Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).

Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent a67dd266
Loading
Loading
Loading
Loading
+4 −2
Original line number Original line Diff line number Diff line
@@ -200,6 +200,9 @@ struct xt_table {
	u_int8_t af;		/* address/protocol family */
	u_int8_t af;		/* address/protocol family */
	int priority;		/* hook order */
	int priority;		/* hook order */


	/* called when table is needed in the given netns */
	int (*table_init)(struct net *net);

	/* A unique name... */
	/* A unique name... */
	const char name[XT_TABLE_MAXNAMELEN];
	const char name[XT_TABLE_MAXNAMELEN];
};
};
@@ -408,8 +411,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
	return cnt;
	return cnt;
}
}


struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);


#ifdef CONFIG_COMPAT
#ifdef CONFIG_COMPAT
#include <net/compat.h>
#include <net/compat.h>
+27 −14
Original line number Original line Diff line number Diff line
@@ -1780,6 +1780,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
	return ret;
	return ret;
}
}


static void __arpt_unregister_table(struct xt_table *table)
{
	struct xt_table_info *private;
	void *loc_cpu_entry;
	struct module *table_owner = table->me;
	struct arpt_entry *iter;

	private = xt_unregister_table(table);

	/* Decrease module usage counts and free resources */
	loc_cpu_entry = private->entries;
	xt_entry_foreach(iter, loc_cpu_entry, private->size)
		cleanup_entry(iter);
	if (private->number > private->initial_entries)
		module_put(table_owner);
	xt_free_table_info(private);
}

int arpt_register_table(struct net *net,
int arpt_register_table(struct net *net,
			const struct xt_table *table,
			const struct xt_table *table,
			const struct arpt_replace *repl,
			const struct arpt_replace *repl,
@@ -1810,8 +1828,15 @@ int arpt_register_table(struct net *net,
		goto out_free;
		goto out_free;
	}
	}


	/* set res now, will see skbs right after nf_register_net_hooks */
	WRITE_ONCE(*res, new_table);
	WRITE_ONCE(*res, new_table);


	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
	if (ret != 0) {
		__arpt_unregister_table(new_table);
		*res = NULL;
	}

	return ret;
	return ret;


out_free:
out_free:
@@ -1822,20 +1847,8 @@ int arpt_register_table(struct net *net,
void arpt_unregister_table(struct net *net, struct xt_table *table,
void arpt_unregister_table(struct net *net, struct xt_table *table,
			   const struct nf_hook_ops *ops)
			   const struct nf_hook_ops *ops)
{
{
	struct xt_table_info *private;
	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
	void *loc_cpu_entry;
	__arpt_unregister_table(table);
	struct module *table_owner = table->me;
	struct arpt_entry *iter;

	private = xt_unregister_table(table);

	/* Decrease module usage counts and free resources */
	loc_cpu_entry = private->entries;
	xt_entry_foreach(iter, loc_cpu_entry, private->size)
		cleanup_entry(iter);
	if (private->number > private->initial_entries)
		module_put(table_owner);
	xt_free_table_info(private);
}
}


/* The built-in targets: standard (NULL) and error. */
/* The built-in targets: standard (NULL) and error. */
+17 −12
Original line number Original line Diff line number Diff line
@@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
			   (1 << NF_ARP_FORWARD))
			   (1 << NF_ARP_FORWARD))


static int __net_init arptable_filter_table_init(struct net *net);

static const struct xt_table packet_filter = {
static const struct xt_table packet_filter = {
	.name		= "filter",
	.name		= "filter",
	.valid_hooks	= FILTER_VALID_HOOKS,
	.valid_hooks	= FILTER_VALID_HOOKS,
	.me		= THIS_MODULE,
	.me		= THIS_MODULE,
	.af		= NFPROTO_ARP,
	.af		= NFPROTO_ARP,
	.priority	= NF_IP_PRI_FILTER,
	.priority	= NF_IP_PRI_FILTER,
	.table_init	= arptable_filter_table_init,
};
};


/* The work comes in here from netfilter.c */
/* The work comes in here from netfilter.c */
@@ -35,11 +38,14 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,


static struct nf_hook_ops *arpfilter_ops __read_mostly;
static struct nf_hook_ops *arpfilter_ops __read_mostly;


static int __net_init arptable_filter_net_init(struct net *net)
static int __net_init arptable_filter_table_init(struct net *net)
{
{
	struct arpt_replace *repl;
	struct arpt_replace *repl;
	int err;
	int err;


	if (net->ipv4.arptable_filter)
		return 0;

	repl = arpt_alloc_initial_table(&packet_filter);
	repl = arpt_alloc_initial_table(&packet_filter);
	if (repl == NULL)
	if (repl == NULL)
		return -ENOMEM;
		return -ENOMEM;
@@ -51,11 +57,13 @@ static int __net_init arptable_filter_net_init(struct net *net)


static void __net_exit arptable_filter_net_exit(struct net *net)
static void __net_exit arptable_filter_net_exit(struct net *net)
{
{
	if (!net->ipv4.arptable_filter)
		return;
	arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
	arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
	net->ipv4.arptable_filter = NULL;
}
}


static struct pernet_operations arptable_filter_net_ops = {
static struct pernet_operations arptable_filter_net_ops = {
	.init = arptable_filter_net_init,
	.exit = arptable_filter_net_exit,
	.exit = arptable_filter_net_exit,
};
};


@@ -63,26 +71,23 @@ static int __init arptable_filter_init(void)
{
{
	int ret;
	int ret;


	arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
	if (IS_ERR(arpfilter_ops))
		return PTR_ERR(arpfilter_ops);

	ret = register_pernet_subsys(&arptable_filter_net_ops);
	ret = register_pernet_subsys(&arptable_filter_net_ops);
	if (ret < 0)
	if (ret < 0) {
		kfree(arpfilter_ops);
		return ret;
		return ret;

	arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
	if (IS_ERR(arpfilter_ops)) {
		ret = PTR_ERR(arpfilter_ops);
		goto cleanup_table;
	}
	}
	return ret;


cleanup_table:
	unregister_pernet_subsys(&arptable_filter_net_ops);
	return ret;
	return ret;
}
}


static void __exit arptable_filter_fini(void)
static void __exit arptable_filter_fini(void)
{
{
	xt_hook_unlink(&packet_filter, arpfilter_ops);
	unregister_pernet_subsys(&arptable_filter_net_ops);
	unregister_pernet_subsys(&arptable_filter_net_ops);
	kfree(arpfilter_ops);
}
}


module_init(arptable_filter_init);
module_init(arptable_filter_init);
+28 −14
Original line number Original line Diff line number Diff line
@@ -2062,6 +2062,24 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
	return ret;
	return ret;
}
}


static void __ipt_unregister_table(struct net *net, struct xt_table *table)
{
	struct xt_table_info *private;
	void *loc_cpu_entry;
	struct module *table_owner = table->me;
	struct ipt_entry *iter;

	private = xt_unregister_table(table);

	/* Decrease module usage counts and free resources */
	loc_cpu_entry = private->entries;
	xt_entry_foreach(iter, loc_cpu_entry, private->size)
		cleanup_entry(iter, net);
	if (private->number > private->initial_entries)
		module_put(table_owner);
	xt_free_table_info(private);
}

int ipt_register_table(struct net *net, const struct xt_table *table,
int ipt_register_table(struct net *net, const struct xt_table *table,
		       const struct ipt_replace *repl,
		       const struct ipt_replace *repl,
		       const struct nf_hook_ops *ops, struct xt_table **res)
		       const struct nf_hook_ops *ops, struct xt_table **res)
@@ -2089,7 +2107,15 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
		goto out_free;
		goto out_free;
	}
	}


	/* set res now, will see skbs right after nf_register_net_hooks */
	WRITE_ONCE(*res, new_table);
	WRITE_ONCE(*res, new_table);

	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
	if (ret != 0) {
		__ipt_unregister_table(net, new_table);
		*res = NULL;
	}

	return ret;
	return ret;


out_free:
out_free:
@@ -2100,20 +2126,8 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
void ipt_unregister_table(struct net *net, struct xt_table *table,
void ipt_unregister_table(struct net *net, struct xt_table *table,
			  const struct nf_hook_ops *ops)
			  const struct nf_hook_ops *ops)
{
{
	struct xt_table_info *private;
	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
	void *loc_cpu_entry;
	__ipt_unregister_table(net, table);
	struct module *table_owner = table->me;
	struct ipt_entry *iter;

	private = xt_unregister_table(table);

	/* Decrease module usage counts and free resources */
	loc_cpu_entry = private->entries;
	xt_entry_foreach(iter, loc_cpu_entry, private->size)
		cleanup_entry(iter, net);
	if (private->number > private->initial_entries)
		module_put(table_owner);
	xt_free_table_info(private);
}
}


/* Returns 1 if the type and code is matched by the range, 0 otherwise */
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+24 −11
Original line number Original line Diff line number Diff line
@@ -23,6 +23,7 @@ MODULE_DESCRIPTION("iptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
			    (1 << NF_INET_FORWARD) | \
			    (1 << NF_INET_FORWARD) | \
			    (1 << NF_INET_LOCAL_OUT))
			    (1 << NF_INET_LOCAL_OUT))
static int __net_init iptable_filter_table_init(struct net *net);


static const struct xt_table packet_filter = {
static const struct xt_table packet_filter = {
	.name		= "filter",
	.name		= "filter",
@@ -30,6 +31,7 @@ static const struct xt_table packet_filter = {
	.me		= THIS_MODULE,
	.me		= THIS_MODULE,
	.af		= NFPROTO_IPV4,
	.af		= NFPROTO_IPV4,
	.priority	= NF_IP_PRI_FILTER,
	.priority	= NF_IP_PRI_FILTER,
	.table_init	= iptable_filter_table_init,
};
};


static unsigned int
static unsigned int
@@ -48,14 +50,17 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *filter_ops __read_mostly;
static struct nf_hook_ops *filter_ops __read_mostly;


/* Default to forward because I got too much mail already. */
/* Default to forward because I got too much mail already. */
static bool forward = true;
static bool forward __read_mostly = true;
module_param(forward, bool, 0000);
module_param(forward, bool, 0000);


static int __net_init iptable_filter_net_init(struct net *net)
static int __net_init iptable_filter_table_init(struct net *net)
{
{
	struct ipt_replace *repl;
	struct ipt_replace *repl;
	int err;
	int err;


	if (net->ipv4.iptable_filter)
		return 0;

	repl = ipt_alloc_initial_table(&packet_filter);
	repl = ipt_alloc_initial_table(&packet_filter);
	if (repl == NULL)
	if (repl == NULL)
		return -ENOMEM;
		return -ENOMEM;
@@ -69,9 +74,20 @@ static int __net_init iptable_filter_net_init(struct net *net)
	return err;
	return err;
}
}


static int __net_init iptable_filter_net_init(struct net *net)
{
	if (net == &init_net || !forward)
		return iptable_filter_table_init(net);

	return 0;
}

static void __net_exit iptable_filter_net_exit(struct net *net)
static void __net_exit iptable_filter_net_exit(struct net *net)
{
{
	if (!net->ipv4.iptable_filter)
		return;
	ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
	ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
	net->ipv4.iptable_filter = NULL;
}
}


static struct pernet_operations iptable_filter_net_ops = {
static struct pernet_operations iptable_filter_net_ops = {
@@ -83,24 +99,21 @@ static int __init iptable_filter_init(void)
{
{
	int ret;
	int ret;


	filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
	if (IS_ERR(filter_ops))
		return PTR_ERR(filter_ops);

	ret = register_pernet_subsys(&iptable_filter_net_ops);
	ret = register_pernet_subsys(&iptable_filter_net_ops);
	if (ret < 0)
	if (ret < 0)
		return ret;
		kfree(filter_ops);

	/* Register hooks */
	filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
	if (IS_ERR(filter_ops)) {
		ret = PTR_ERR(filter_ops);
		unregister_pernet_subsys(&iptable_filter_net_ops);
	}


	return ret;
	return ret;
}
}


static void __exit iptable_filter_fini(void)
static void __exit iptable_filter_fini(void)
{
{
	xt_hook_unlink(&packet_filter, filter_ops);
	unregister_pernet_subsys(&iptable_filter_net_ops);
	unregister_pernet_subsys(&iptable_filter_net_ops);
	kfree(filter_ops);
}
}


module_init(iptable_filter_init);
module_init(iptable_filter_init);
Loading