Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8fb91c35 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'inet-frags-avoid-possible-races-at-netns-dismantle'



Eric Dumazet says:

====================
inet: frags: avoid possible races at netns dismantle

This patch series fixes a race happening on netns dismantle with
frag queues. While rhashtable_free_and_destroy() is running,
concurrent timers might run inet_frag_kill() and attempt
rhashtable_remove_fast() calls. This is not allowed by
rhashtable logic.

Since I do not want to add expensive synchronize_rcu() calls
in the netns dismantle path, I had to no longer inline
netns_frags structures, but dynamically allocate them.

The ten first patches make this preparation, so that
the last patch clearly shows the fix.

As this patch series is not exactly trivial, I chose to
target 5.3. We will backport it once soaked a bit.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ddf6ddb0 3c8fc878
Loading
Loading
Loading
Loading
+34 −14
Original line number Diff line number Diff line
@@ -4,18 +4,22 @@

#include <linux/rhashtable-types.h>

struct netns_frags {
/* Per netns frag queues directory */
struct fqdir {
	/* sysctls */
	long			high_thresh;
	long			low_thresh;
	int			timeout;
	int			max_dist;
	struct inet_frags	*f;
	struct net		*net;
	bool			dead;

	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;

	/* Keep atomic mem on separate cachelines in structs that include it */
	atomic_long_t		mem ____cacheline_aligned_in_smp;
	struct rcu_work		destroy_rwork;
};

/**
@@ -24,11 +28,13 @@ struct netns_frags {
 * @INET_FRAG_FIRST_IN: first fragment has arrived
 * @INET_FRAG_LAST_IN: final fragment has arrived
 * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
 * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
 */
enum {
	INET_FRAG_FIRST_IN	= BIT(0),
	INET_FRAG_LAST_IN	= BIT(1),
	INET_FRAG_COMPLETE	= BIT(2),
	INET_FRAG_HASH_DEAD	= BIT(3),
};

struct frag_v4_compare_key {
@@ -64,7 +70,7 @@ struct frag_v6_compare_key {
 * @meat: length of received fragments so far
 * @flags: fragment queue flags
 * @max_size: maximum received fragment size
 * @net: namespace that this frag belongs to
 * @fqdir: pointer to struct fqdir
 * @rcu: rcu head for freeing deferall
 */
struct inet_frag_queue {
@@ -84,7 +90,7 @@ struct inet_frag_queue {
	int			meat;
	__u8			flags;
	u16			max_size;
	struct netns_frags      *net;
	struct fqdir		*fqdir;
	struct rcu_head		rcu;
};

@@ -103,16 +109,30 @@ struct inet_frags {
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);

static inline int inet_frags_init_net(struct netns_frags *nf)
static inline int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f,
			     struct net *net)
{
	atomic_long_set(&nf->mem, 0);
	return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
	struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
	int res;

	if (!fqdir)
		return -ENOMEM;
	fqdir->f = f;
	fqdir->net = net;
	res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
	if (res < 0) {
		kfree(fqdir);
		return res;
	}
void inet_frags_exit_net(struct netns_frags *nf);
	*fqdirp = fqdir;
	return 0;
}

void fqdir_exit(struct fqdir *fqdir);

void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);

/* Free all skbs in the queue; return the sum of their truesizes. */
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
@@ -125,19 +145,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)

/* Memory Tracking Functions. */

static inline long frag_mem_limit(const struct netns_frags *nf)
static inline long frag_mem_limit(const struct fqdir *fqdir)
{
	return atomic_long_read(&nf->mem);
	return atomic_long_read(&fqdir->mem);
}

static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
{
	atomic_long_sub(val, &nf->mem);
	atomic_long_sub(val, &fqdir->mem);
}

static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
{
	atomic_long_add(val, &nf->mem);
	atomic_long_add(val, &fqdir->mem);
}

/* RFC 3168 support :
+1 −1
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ struct netns_sysctl_lowpan {

struct netns_ieee802154_lowpan {
	struct netns_sysctl_lowpan sysctl;
	struct netns_frags	frags;
	struct fqdir		*fqdir;
};

#endif
+1 −1
Original line number Diff line number Diff line
@@ -72,7 +72,7 @@ struct netns_ipv4 {

	struct inet_peer_base	*peers;
	struct sock  * __percpu	*tcp_sk;
	struct netns_frags	frags;
	struct fqdir		*fqdir;
#ifdef CONFIG_NETFILTER
	struct xt_table		*iptable_filter;
	struct xt_table		*iptable_mangle;
+2 −2
Original line number Diff line number Diff line
@@ -58,7 +58,7 @@ struct netns_ipv6 {
	struct ipv6_devconf	*devconf_all;
	struct ipv6_devconf	*devconf_dflt;
	struct inet_peer_base	*peers;
	struct netns_frags	frags;
	struct fqdir		*fqdir;
#ifdef CONFIG_NETFILTER
	struct xt_table		*ip6table_filter;
	struct xt_table		*ip6table_mangle;
@@ -116,7 +116,7 @@ struct netns_ipv6 {

#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag {
	struct netns_frags	frags;
	struct fqdir	*fqdir;
};
#endif

+16 −20
Original line number Diff line number Diff line
@@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
	key.src = *src;
	key.dst = *dst;

	q = inet_frag_find(&ieee802154_lowpan->frags, &key);
	q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
	if (!q)
		return NULL;

@@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
		fq->q.flags |= INET_FRAG_FIRST_IN;

	fq->q.meat += skb->len;
	add_frag_mem_limit(fq->q.net, skb->truesize);
	add_frag_mem_limit(fq->q.fqdir, skb->truesize);

	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	    fq->q.meat == fq->q.len) {
@@ -326,23 +326,18 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
	{
		.procname	= "6lowpanfrag_high_thresh",
		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
		.maxlen		= sizeof(unsigned long),
		.mode		= 0644,
		.proc_handler	= proc_doulongvec_minmax,
		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
	},
	{
		.procname	= "6lowpanfrag_low_thresh",
		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
		.maxlen		= sizeof(unsigned long),
		.mode		= 0644,
		.proc_handler	= proc_doulongvec_minmax,
		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
	},
	{
		.procname	= "6lowpanfrag_time",
		.data		= &init_net.ieee802154_lowpan.frags.timeout,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_jiffies,
@@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
		if (table == NULL)
			goto err_alloc;

		table[0].data = &ieee802154_lowpan->frags.high_thresh;
		table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
		table[1].data = &ieee802154_lowpan->frags.low_thresh;
		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
		table[2].data = &ieee802154_lowpan->frags.timeout;

		/* Don't export sysctls to unprivileged users */
		if (net->user_ns != &init_user_ns)
			table[0].procname = NULL;
	}

	table[0].data	= &ieee802154_lowpan->fqdir->high_thresh;
	table[0].extra1	= &ieee802154_lowpan->fqdir->low_thresh;
	table[1].data	= &ieee802154_lowpan->fqdir->low_thresh;
	table[1].extra2	= &ieee802154_lowpan->fqdir->high_thresh;
	table[2].data	= &ieee802154_lowpan->fqdir->timeout;

	hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
	if (hdr == NULL)
		goto err_reg;
@@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net)
		net_ieee802154_lowpan(net);
	int res;

	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
	ieee802154_lowpan->frags.f = &lowpan_frags;

	res = inet_frags_init_net(&ieee802154_lowpan->frags);
	res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
	if (res < 0)
		return res;

	ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
	ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
	ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;

	res = lowpan_frags_ns_sysctl_register(net);
	if (res < 0)
		inet_frags_exit_net(&ieee802154_lowpan->frags);
		fqdir_exit(ieee802154_lowpan->fqdir);
	return res;
}

@@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
		net_ieee802154_lowpan(net);

	lowpan_frags_ns_sysctl_unregister(net);
	inet_frags_exit_net(&ieee802154_lowpan->frags);
	fqdir_exit(ieee802154_lowpan->fqdir);
}

static struct pernet_operations lowpan_frags_ops = {
Loading