Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f4b47a59 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ipv4-Move-location-of-pcpu-route-cache-and-exceptions'



David Ahern says:

====================
ipv4: Move location of pcpu route cache and exceptions

This series moves IPv4 pcpu cached routes from fib_nh to fib_nh_common
to make the caches available for IPv6 nexthops (fib6_nh) with IPv4
routes. This allows a fib6_nh struct to be used with both IPv4 and
and IPv6 routes.

v4
- fixed memleak if encap_type is not set as noticed by Ido

v3
- dropped ipv6 patches for now. Will resubmit those once the existing
  refcnt problem is fixed

v2
- reverted patch 2 to use ifdef CONFIG_IP_ROUTE_CLASSID instead
  of IS_ENABLED(CONFIG_IP_ROUTE_CLASSID) to fix compile issues
  reported by kbuild test robot
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a734d1f4 a5995e71
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -96,6 +96,11 @@ struct fib_nh_common {

	int			nhc_weight;
	atomic_t		nhc_upper_bound;

	/* v4 specific, but allows fib6_nh with v4 routes */
	struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
	struct rtable __rcu     *nhc_rth_input;
	struct fnhe_hash_bucket	__rcu *nhc_exceptions;
};

struct fib_nh {
@@ -107,9 +112,6 @@ struct fib_nh {
#endif
	__be32			nh_saddr;
	int			nh_saddr_genid;
	struct rtable __rcu * __percpu *nh_pcpu_rth_output;
	struct rtable __rcu	*nh_rth_input;
	struct fnhe_hash_bucket	__rcu *nh_exceptions;
#define fib_nh_family		nh_common.nhc_family
#define fib_nh_dev		nh_common.nhc_dev
#define fib_nh_oif		nh_common.nhc_oif
+25 −23
Original line number Diff line number Diff line
@@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp)
	dst_release_immediate(&rt->dst);
}

static void free_nh_exceptions(struct fib_nh *nh)
static void free_nh_exceptions(struct fib_nh_common *nhc)
{
	struct fnhe_hash_bucket *hash;
	int i;

	hash = rcu_dereference_protected(nh->nh_exceptions, 1);
	hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
	if (!hash)
		return;
	for (i = 0; i < FNHE_HASH_SIZE; i++) {
@@ -212,6 +212,9 @@ void fib_nh_common_release(struct fib_nh_common *nhc)
		dev_put(nhc->nhc_dev);

	lwtstate_put(nhc->nhc_lwtstate);
	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
	rt_fibinfo_free(&nhc->nhc_rth_input);
	free_nh_exceptions(nhc);
}
EXPORT_SYMBOL_GPL(fib_nh_common_release);

@@ -222,9 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
		net->ipv4.fib_num_tclassid_users--;
#endif
	fib_nh_common_release(&fib_nh->nh_common);
	free_nh_exceptions(fib_nh);
	rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output);
	rt_fibinfo_free(&fib_nh->nh_rth_input);
}

/* Release a nexthop info record */
@@ -491,23 +491,35 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
		       u16 encap_type, void *cfg, gfp_t gfp_flags,
		       struct netlink_ext_ack *extack)
{
	int err;

	nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
						    gfp_flags);
	if (!nhc->nhc_pcpu_rth_output)
		return -ENOMEM;

	if (encap) {
		struct lwtunnel_state *lwtstate;
		int err;

		if (encap_type == LWTUNNEL_ENCAP_NONE) {
			NL_SET_ERR_MSG(extack, "LWT encap type not specified");
			return -EINVAL;
			err = -EINVAL;
			goto lwt_failure;
		}
		err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
					   cfg, &lwtstate, extack);
		if (err)
			return err;
			goto lwt_failure;

		nhc->nhc_lwtstate = lwtstate_get(lwtstate);
	}

	return 0;

lwt_failure:
	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
	nhc->nhc_pcpu_rth_output = NULL;
	return err;
}
EXPORT_SYMBOL_GPL(fib_nh_common_init);

@@ -515,18 +527,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
		struct fib_config *cfg, int nh_weight,
		struct netlink_ext_ack *extack)
{
	int err = -ENOMEM;
	int err;

	nh->fib_nh_family = AF_INET;

	nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
	if (!nh->nh_pcpu_rth_output)
		goto err_out;

	err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
				 cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
	if (err)
		goto init_failure;
		return err;

	nh->fib_nh_oif = cfg->fc_oif;
	nh->fib_nh_gw_family = cfg->fc_gw_family;
@@ -546,12 +554,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
	nh->fib_nh_weight = nh_weight;
#endif
	return 0;

init_failure:
	rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output);
	nh->nh_pcpu_rth_output = NULL;
err_out:
	return err;
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1711,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
 * - if the new MTU is greater than the PMTU, don't make any change
 * - otherwise, unlock and set PMTU
 */
static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
{
	struct fnhe_hash_bucket *bucket;
	int i;

	bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
	bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
	if (!bucket)
		return;

@@ -1747,7 +1749,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)

	hlist_for_each_entry(nh, head, nh_hash) {
		if (nh->fib_nh_dev == dev)
			nh_update_mtu(nh, dev->mtu, orig_mtu);
			nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
	}
}

+34 −41
Original line number Diff line number Diff line
@@ -643,8 +643,9 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
	}
}

static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
				  u32 pmtu, bool lock, unsigned long expires)
static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
				  __be32 gw, u32 pmtu, bool lock,
				  unsigned long expires)
{
	struct fnhe_hash_bucket *hash;
	struct fib_nh_exception *fnhe;
@@ -653,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
	unsigned int i;
	int depth;

	genid = fnhe_genid(dev_net(nh->fib_nh_dev));
	genid = fnhe_genid(dev_net(nhc->nhc_dev));
	hval = fnhe_hashfun(daddr);

	spin_lock_bh(&fnhe_lock);

	hash = rcu_dereference(nh->nh_exceptions);
	hash = rcu_dereference(nhc->nhc_exceptions);
	if (!hash) {
		hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
		if (!hash)
			goto out_unlock;
		rcu_assign_pointer(nh->nh_exceptions, hash);
		rcu_assign_pointer(nhc->nhc_exceptions, hash);
	}

	hash += hval;
@@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
		 * stale, so anyone caching it rechecks if this exception
		 * applies to them.
		 */
		rt = rcu_dereference(nh->nh_rth_input);
		rt = rcu_dereference(nhc->nhc_rth_input);
		if (rt)
			rt->dst.obsolete = DST_OBSOLETE_KILL;

		for_each_possible_cpu(i) {
			struct rtable __rcu **prt;
			prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
			prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
			rt = rcu_dereference(*prt);
			if (rt)
				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -788,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
		} else {
			if (fib_lookup(net, fl4, &res, 0) == 0) {
				struct fib_nh_common *nhc = FIB_RES_NHC(res);
				struct fib_nh *nh;

				nh = container_of(nhc, struct fib_nh, nh_common);
				update_or_create_fnhe(nh, fl4->daddr, new_gw,
				update_or_create_fnhe(nhc, fl4->daddr, new_gw,
						0, false,
						jiffies + ip_rt_gc_timeout);
			}
@@ -1039,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
	rcu_read_lock();
	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
		struct fib_nh_common *nhc = FIB_RES_NHC(res);
		struct fib_nh *nh;

		nh = container_of(nhc, struct fib_nh, nh_common);
		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
		update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
				      jiffies + ip_rt_mtu_expires);
	}
	rcu_read_unlock();
@@ -1328,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}

static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
{
	struct fnhe_hash_bucket *hash;
	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
@@ -1336,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)

	spin_lock_bh(&fnhe_lock);

	hash = rcu_dereference_protected(nh->nh_exceptions,
	hash = rcu_dereference_protected(nhc->nhc_exceptions,
					 lockdep_is_held(&fnhe_lock));
	hash += hval;

@@ -1362,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
	spin_unlock_bh(&fnhe_lock);
}

static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
					       __be32 daddr)
{
	struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
	struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
	struct fib_nh_exception *fnhe;
	u32 hval;

@@ -1378,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
		if (fnhe->fnhe_daddr == daddr) {
			if (fnhe->fnhe_expires &&
			    time_after(jiffies, fnhe->fnhe_expires)) {
				ip_del_fnhe(nh, daddr);
				ip_del_fnhe(nhc, daddr);
				break;
			}
			return fnhe;
@@ -1405,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
		mtu = fi->fib_mtu;

	if (likely(!mtu)) {
		struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
		struct fib_nh_exception *fnhe;

		fnhe = find_exception(nh, daddr);
		fnhe = find_exception(nhc, daddr);
		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
			mtu = fnhe->fnhe_pmtu;
	}
@@ -1469,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
	return ret;
}

static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
{
	struct rtable *orig, *prev, **p;
	bool ret = true;

	if (rt_is_input_route(rt)) {
		p = (struct rtable **)&nh->nh_rth_input;
		p = (struct rtable **)&nhc->nhc_rth_input;
	} else {
		p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
		p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
	}
	orig = *p;

@@ -1574,7 +1571,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,

	if (fi) {
		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
		struct fib_nh *nh;

		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
			rt->rt_gw_family = nhc->nhc_gw_family;
@@ -1587,15 +1583,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,

		ip_dst_init_metrics(&rt->dst, fi->fib_metrics);

		nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_ROUTE_CLASSID
		{
			struct fib_nh *nh;

			nh = container_of(nhc, struct fib_nh, nh_common);
			rt->dst.tclassid = nh->nh_tclassid;
		}
#endif
		rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws);
		rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
		if (unlikely(fnhe))
			cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
		else if (do_cache)
			cached = rt_cache_route(nh, rt);
			cached = rt_cache_route(nhc, rt);
		if (unlikely(!cached)) {
			/* Routes we intend to cache in nexthop exception or
			 * FIB nexthop have the DST_NOCACHE bit clear.
@@ -1756,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb,
	struct net_device *dev = nhc->nhc_dev;
	struct fib_nh_exception *fnhe;
	struct rtable *rth;
	struct fib_nh *nh;
	int err;
	struct in_device *out_dev;
	bool do_cache;
@@ -1804,13 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb,
		}
	}

	nh = container_of(nhc, struct fib_nh, nh_common);
	fnhe = find_exception(nh, daddr);
	fnhe = find_exception(nhc, daddr);
	if (do_cache) {
		if (fnhe)
			rth = rcu_dereference(fnhe->fnhe_rth_input);
		else
			rth = rcu_dereference(nh->nh_rth_input);
			rth = rcu_dereference(nhc->nhc_rth_input);
		if (rt_cache_valid(rth)) {
			skb_dst_set_noref(skb, &rth->dst);
			goto out;
@@ -2105,10 +2103,8 @@ out: return err;
	if (res->fi) {
		if (!itag) {
			struct fib_nh_common *nhc = FIB_RES_NHC(*res);
			struct fib_nh *nh;

			nh = container_of(nhc, struct fib_nh, nh_common);
			rth = rcu_dereference(nh->nh_rth_input);
			rth = rcu_dereference(nhc->nhc_rth_input);
			if (rt_cache_valid(rth)) {
				skb_dst_set_noref(skb, &rth->dst);
				err = 0;
@@ -2139,7 +2135,6 @@ out: return err;

	if (do_cache) {
		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
		struct fib_nh *nh;

		rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2148,8 +2143,7 @@ out: return err;
			rth->dst.input = lwtunnel_input;
		}

		nh = container_of(nhc, struct fib_nh, nh_common);
		if (unlikely(!rt_cache_route(nh, rth)))
		if (unlikely(!rt_cache_route(nhc, rth)))
			rt_add_uncached_list(rth);
	}
	skb_dst_set(skb, &rth->dst);
@@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
	do_cache &= fi != NULL;
	if (fi) {
		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
		struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
		struct rtable __rcu **prth;

		fnhe = find_exception(nh, fl4->daddr);
		fnhe = find_exception(nhc, fl4->daddr);
		if (!do_cache)
			goto add;
		if (fnhe) {
@@ -2337,7 +2330,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
				do_cache = false;
				goto add;
			}
			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
			prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
		}
		rth = rcu_dereference(*prth);
		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))