Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 87c418bf authored by Yuval Mintz's avatar Yuval Mintz Committed by David S. Miller
Browse files

ip6mr: Align hash implementation to ipmr



Since commit 8fb472c0 ("ipmr: improve hash scalability") ipmr has
been using rhashtable as a basis for its mfc routes, but ip6mr is
currently still using the old private MFC hash implementation.

Align ip6mr to the current ipmr implementation.

Signed-off-by: default avatarYuval Mintz <yuvalm@mellanox.com>
Acked-by: default avatarNikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8571ab47
Loading
Loading
Loading
Loading
+16 −14
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <net/net_namespace.h>
#include <uapi/linux/mroute6.h>
#include <linux/mroute_base.h>
#include <linux/rhashtable.h>

#ifdef CONFIG_IPV6_MROUTE
static inline int ip6_mroute_opt(int opt)
@@ -65,10 +66,20 @@ static inline void ip6_mr_cleanup(void)

#define VIFF_STATIC 0x8000

struct mfc6_cache_cmp_arg {
	struct in6_addr mf6c_mcastgrp;
	struct in6_addr mf6c_origin;
};

struct mfc6_cache {
	struct list_head list;
	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
	struct in6_addr mf6c_origin;			/* Source of packet 		*/
	struct rhlist_head mnode;
	union {
		struct {
			struct in6_addr mf6c_mcastgrp;
			struct in6_addr mf6c_origin;
		};
		struct mfc6_cache_cmp_arg cmparg;
	};
	mifi_t mf6c_parent;			/* Source interface		*/
	int mfc_flags;				/* Flags on line		*/

@@ -88,22 +99,13 @@ struct mfc6_cache {
			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
		} res;
	} mfc_un;
	struct list_head list;
	struct rcu_head rcu;
};

#define MFC_STATIC		1
#define MFC_NOTIFY		2

#define MFC6_LINES		64

#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
			  (__force u32)(a)->s6_addr32[1] ^ \
			  (__force u32)(a)->s6_addr32[2] ^ \
			  (__force u32)(a)->s6_addr32[3] ^ \
			  (__force u32)(g)->s6_addr32[0] ^ \
			  (__force u32)(g)->s6_addr32[1] ^ \
			  (__force u32)(g)->s6_addr32[2] ^ \
			  (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)

#define MFC_ASSERT_THRESH (3*HZ)		/* Maximal freq. of asserts */

struct rtmsg;
+168 −145
Original line number Diff line number Diff line
@@ -61,8 +61,9 @@ struct mr6_table {
	struct sock __rcu	*mroute6_sk;
	struct timer_list	ipmr_expire_timer;
	struct list_head	mfc6_unres_queue;
	struct list_head	mfc6_cache_array[MFC6_LINES];
	struct vif_device	vif6_table[MAXMIFS];
	struct rhltable		mfc6_hash;
	struct list_head	mfc6_cache_list;
	int			maxvif;
	atomic_t		cache_resolve_queue_len;
	bool			mroute_do_assert;
@@ -299,10 +300,29 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
}
#endif

static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
			  const void *ptr)
{
	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
	struct mfc6_cache *c = (struct mfc6_cache *)ptr;

	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
}

static const struct rhashtable_params ip6mr_rht_params = {
	.head_offset = offsetof(struct mfc6_cache, mnode),
	.key_offset = offsetof(struct mfc6_cache, cmparg),
	.key_len = sizeof(struct mfc6_cache_cmp_arg),
	.nelem_hint = 3,
	.locks_mul = 1,
	.obj_cmpfn = ip6mr_hash_cmp,
	.automatic_shrinking = true,
};

static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
{
	struct mr6_table *mrt;
	unsigned int i;

	mrt = ip6mr_get_table(net, id);
	if (mrt)
@@ -314,10 +334,8 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
	mrt->id = id;
	write_pnet(&mrt->net, net);

	/* Forwarding cache */
	for (i = 0; i < MFC6_LINES; i++)
		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);

	rhltable_init(&mrt->mfc6_hash, &ip6mr_rht_params);
	INIT_LIST_HEAD(&mrt->mfc6_cache_list);
	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);

	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
@@ -335,6 +353,7 @@ static void ip6mr_free_table(struct mr6_table *mrt)
{
	del_timer_sync(&mrt->ipmr_expire_timer);
	mroute_clean_tables(mrt, true);
	rhltable_destroy(&mrt->mfc6_hash);
	kfree(mrt);
}

@@ -344,7 +363,6 @@ struct ipmr_mfc_iter {
	struct seq_net_private p;
	struct mr6_table *mrt;
	struct list_head *cache;
	int ct;
};


@@ -354,14 +372,12 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
	struct mr6_table *mrt = it->mrt;
	struct mfc6_cache *mfc;

	read_lock(&mrt_lock);
	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
		it->cache = &mrt->mfc6_cache_array[it->ct];
		list_for_each_entry(mfc, it->cache, list)
	rcu_read_lock();
	it->cache = &mrt->mfc6_cache_list;
	list_for_each_entry_rcu(mfc, &mrt->mfc6_cache_list, list)
		if (pos-- == 0)
			return mfc;
	}
	read_unlock(&mrt_lock);
	rcu_read_unlock();

	spin_lock_bh(&mfc_unres_lock);
	it->cache = &mrt->mfc6_unres_queue;
@@ -517,19 +533,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
	if (it->cache == &mrt->mfc6_unres_queue)
		goto end_of_list;

	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);

	while (++it->ct < MFC6_LINES) {
		it->cache = &mrt->mfc6_cache_array[it->ct];
		if (list_empty(it->cache))
			continue;
		return list_first_entry(it->cache, struct mfc6_cache, list);
	}

	/* exhausted cache_array, show unresolved */
	read_unlock(&mrt_lock);
	rcu_read_unlock();
	it->cache = &mrt->mfc6_unres_queue;
	it->ct = 0;

	spin_lock_bh(&mfc_unres_lock);
	if (!list_empty(it->cache))
@@ -549,8 +555,8 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)

	if (it->cache == &mrt->mfc6_unres_queue)
		spin_unlock_bh(&mfc_unres_lock);
	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
		read_unlock(&mrt_lock);
	else if (it->cache == &mrt->mfc6_cache_list)
		rcu_read_unlock();
}

static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -827,11 +833,18 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
	return 0;
}

static inline void ip6mr_cache_free(struct mfc6_cache *c)
static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
{
	struct mfc6_cache *c = container_of(head, struct mfc6_cache, rcu);

	kmem_cache_free(mrt_cachep, c);
}

static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
	call_rcu(&c->rcu, ip6mr_cache_free_rcu);
}

/* Destroy an unresolved cache entry, killing queued skbs
   and reporting error to netlink readers.
 */
@@ -1002,14 +1015,17 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
					   const struct in6_addr *origin,
					   const struct in6_addr *mcastgrp)
{
	int line = MFC6_HASH(mcastgrp, origin);
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = *origin,
		.mf6c_mcastgrp = *mcastgrp,
	};
	struct rhlist_head *tmp, *list;
	struct mfc6_cache *c;

	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
	rhl_for_each_entry_rcu(c, tmp, list, mnode)
		return c;
	}

	return NULL;
}

@@ -1017,13 +1033,16 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
						      mifi_t mifi)
{
	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = in6addr_any,
		.mf6c_mcastgrp = in6addr_any,
	};
	struct rhlist_head *tmp, *list;
	struct mfc6_cache *c;

	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
		if (ipv6_addr_any(&c->mf6c_origin) &&
		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
		    (c->mfc_un.res.ttls[mifi] < 255))
	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
	rhl_for_each_entry_rcu(c, tmp, list, mnode)
		if (c->mfc_un.res.ttls[mifi] < 255)
			return c;

	return NULL;
@@ -1034,21 +1053,23 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
					       struct in6_addr *mcastgrp,
					       mifi_t mifi)
{
	int line = MFC6_HASH(mcastgrp, &in6addr_any);
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = in6addr_any,
		.mf6c_mcastgrp = *mcastgrp,
	};
	struct rhlist_head *tmp, *list;
	struct mfc6_cache *c, *proxy;

	if (ipv6_addr_any(mcastgrp))
		goto skip;

	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
		if (ipv6_addr_any(&c->mf6c_origin) &&
		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
		if (c->mfc_un.res.ttls[mifi] < 255)
			return c;

		/* It's ok if the mifi is part of the static tree */
			proxy = ip6mr_cache_find_any_parent(mrt,
							    c->mf6c_parent);
		proxy = ip6mr_cache_find_any_parent(mrt, c->mf6c_parent);
		if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
			return c;
	}
@@ -1057,6 +1078,28 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
	return ip6mr_cache_find_any_parent(mrt, mifi);
}

/* Look for a (S,G,iif) entry if parent != -1 */
static struct mfc6_cache *
ip6mr_cache_find_parent(struct mr6_table *mrt,
			const struct in6_addr *origin,
			const struct in6_addr *mcastgrp,
			int parent)
{
	struct mfc6_cache_cmp_arg arg = {
		.mf6c_origin = *origin,
		.mf6c_mcastgrp = *mcastgrp,
	};
	struct rhlist_head *tmp, *list;
	struct mfc6_cache *c;

	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
	rhl_for_each_entry_rcu(c, tmp, list, mnode)
		if (parent == -1 || parent == c->mf6c_parent)
			return c;

	return NULL;
}

/*
 *	Allocate a multicast cache entry
 */
@@ -1296,27 +1339,22 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
			    int parent)
{
	int line;
	struct mfc6_cache *c, *next;

	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
	struct mfc6_cache *c;

	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
		    ipv6_addr_equal(&c->mf6c_mcastgrp,
				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
		    (parent == -1 || parent == c->mf6c_parent)) {
			write_lock_bh(&mrt_lock);
			list_del(&c->list);
			write_unlock_bh(&mrt_lock);
	/* The entries are added/deleted only under RTNL */
	rcu_read_lock();
	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
	rcu_read_unlock();
	if (!c)
		return -ENOENT;
	rhltable_remove(&mrt->mfc6_hash, &c->mnode, ip6mr_rht_params);
	list_del_rcu(&c->list);

	mr6_netlink_event(mrt, c, RTM_DELROUTE);
	ip6mr_cache_free(c);
	return 0;
}
	}
	return -ENOENT;
}

static int ip6mr_device_event(struct notifier_block *this,
			      unsigned long event, void *ptr)
@@ -1448,11 +1486,10 @@ void ip6_mr_cleanup(void)
static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
			 struct mf6cctl *mfc, int mrtsock, int parent)
{
	bool found = false;
	int line;
	struct mfc6_cache *uc, *c;
	unsigned char ttls[MAXMIFS];
	int i;
	struct mfc6_cache *uc, *c;
	bool found;
	int i, err;

	if (mfc->mf6cc_parent >= MAXMIFS)
		return -ENFILE;
@@ -1461,22 +1498,14 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
	for (i = 0; i < MAXMIFS; i++) {
		if (IF_ISSET(i, &mfc->mf6cc_ifset))
			ttls[i] = 1;

	}

	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);

	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
		    ipv6_addr_equal(&c->mf6c_mcastgrp,
				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
		    (parent == -1 || parent == mfc->mf6cc_parent)) {
			found = true;
			break;
		}
	}

	if (found) {
	/* The entries are added/deleted only under RTNL */
	rcu_read_lock();
	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
	rcu_read_unlock();
	if (c) {
		write_lock_bh(&mrt_lock);
		c->mf6c_parent = mfc->mf6cc_parent;
		ip6mr_update_thresholds(mrt, c, ttls);
@@ -1502,12 +1531,16 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
	if (!mrtsock)
		c->mfc_flags |= MFC_STATIC;

	write_lock_bh(&mrt_lock);
	list_add(&c->list, &mrt->mfc6_cache_array[line]);
	write_unlock_bh(&mrt_lock);
	err = rhltable_insert_key(&mrt->mfc6_hash, &c->cmparg, &c->mnode,
				  ip6mr_rht_params);
	if (err) {
		pr_err("ip6mr: rhtable insert error %d\n", err);
		ip6mr_cache_free(c);
		return err;
	}
	list_add_tail_rcu(&c->list, &mrt->mfc6_cache_list);

	/*
	 *	Check to see if we resolved a queued list. If so we
	/* Check to see if we resolved a queued list. If so we
	 * need to send on the frames and tidy up.
	 */
	found = false;
@@ -1539,13 +1572,11 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,

static void mroute_clean_tables(struct mr6_table *mrt, bool all)
{
	int i;
	struct mfc6_cache *c, *tmp;
	LIST_HEAD(list);
	struct mfc6_cache *c, *next;
	int i;

	/*
	 *	Shut down all active vif entries
	 */
	/* Shut down all active vif entries */
	for (i = 0; i < mrt->maxvif; i++) {
		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
			continue;
@@ -1553,25 +1584,19 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
	}
	unregister_netdevice_many(&list);

	/*
	 *	Wipe the cache
	 */
	for (i = 0; i < MFC6_LINES; i++) {
		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
	/* Wipe the cache */
	list_for_each_entry_safe(c, tmp, &mrt->mfc6_cache_list, list) {
		if (!all && (c->mfc_flags & MFC_STATIC))
			continue;
			write_lock_bh(&mrt_lock);
			list_del(&c->list);
			write_unlock_bh(&mrt_lock);

		rhltable_remove(&mrt->mfc6_hash, &c->mnode, ip6mr_rht_params);
		list_del_rcu(&c->list);
		mr6_netlink_event(mrt, c, RTM_DELROUTE);
		ip6mr_cache_free(c);
	}
	}

	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
		spin_lock_bh(&mfc_unres_lock);
		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
		list_for_each_entry_safe(c, tmp, &mrt->mfc6_unres_queue, list) {
			list_del(&c->list);
			mr6_netlink_event(mrt, c, RTM_DELROUTE);
			ip6mr_destroy_unres(mrt, c);
@@ -1905,19 +1930,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
		if (copy_from_user(&sr, arg, sizeof(sr)))
			return -EFAULT;

		read_lock(&mrt_lock);
		rcu_read_lock();
		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
		if (c) {
			sr.pktcnt = c->mfc_un.res.pkt;
			sr.bytecnt = c->mfc_un.res.bytes;
			sr.wrong_if = c->mfc_un.res.wrong_if;
			read_unlock(&mrt_lock);
			rcu_read_unlock();

			if (copy_to_user(arg, &sr, sizeof(sr)))
				return -EFAULT;
			return 0;
		}
		read_unlock(&mrt_lock);
		rcu_read_unlock();
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
@@ -1979,19 +2004,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
		if (copy_from_user(&sr, arg, sizeof(sr)))
			return -EFAULT;

		read_lock(&mrt_lock);
		rcu_read_lock();
		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
		if (c) {
			sr.pktcnt = c->mfc_un.res.pkt;
			sr.bytecnt = c->mfc_un.res.bytes;
			sr.wrong_if = c->mfc_un.res.wrong_if;
			read_unlock(&mrt_lock);
			rcu_read_unlock();

			if (copy_to_user(arg, &sr, sizeof(sr)))
				return -EFAULT;
			return 0;
		}
		read_unlock(&mrt_lock);
		rcu_read_unlock();
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
@@ -2115,11 +2140,15 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
		/* For an (*,G) entry, we only check that the incoming
		 * interface is part of the static tree.
		 */
		rcu_read_lock();
		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
		if (cache_proxy &&
		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255) {
			rcu_read_unlock();
			goto forward;
		}
		rcu_read_unlock();
	}

	/*
	 * Wrong interface: drop packet and (maybe) send PIM assert.
@@ -2535,21 +2564,16 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
	struct mr6_table *mrt;
	struct mfc6_cache *mfc;
	unsigned int t = 0, s_t;
	unsigned int h = 0, s_h;
	unsigned int e = 0, s_e;

	s_t = cb->args[0];
	s_h = cb->args[1];
	s_e = cb->args[2];
	s_e = cb->args[1];

	read_lock(&mrt_lock);
	rcu_read_lock();
	ip6mr_for_each_table(mrt, net) {
		if (t < s_t)
			goto next_table;
		if (t > s_t)
			s_h = 0;
		for (h = s_h; h < MFC6_LINES; h++) {
			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
		list_for_each_entry_rcu(mfc, &mrt->mfc6_cache_list, list) {
			if (e < s_e)
				goto next_entry;
			if (ip6mr_fill_mroute(mrt, skb,
@@ -2561,8 +2585,9 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
next_entry:
			e++;
		}
			e = s_e = 0;
		}
		e = 0;
		s_e = 0;

		spin_lock_bh(&mfc_unres_lock);
		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
			if (e < s_e)
@@ -2580,15 +2605,13 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
		}
		spin_unlock_bh(&mfc_unres_lock);
		e = s_e = 0;
		s_h = 0;
next_table:
		t++;
	}
done:
	read_unlock(&mrt_lock);
	rcu_read_unlock();

	cb->args[2] = e;
	cb->args[1] = h;
	cb->args[1] = e;
	cb->args[0] = t;

	return skb->len;