Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f88d8ea6 authored by David Ahern's avatar David Ahern Committed by David S. Miller
Browse files

ipv6: Plumb support for nexthop object in a fib6_info



Add struct nexthop and nh_list list_head to fib6_info. nh_list is the
fib6_info side of the nexthop <-> fib_info relationship. Since a fib6_info
referencing a nexthop object can not have 'sibling' entries (the old way
of doing multipath routes), the nh_list is a union with fib6_siblings.

Add f6i_list list_head to 'struct nexthop' to track fib6_info entries
using a nexthop instance. Update __remove_nexthop_fib to walk f6_list
and delete fib entries using the nexthop.

Add a few nexthop helpers for use when a nexthop is added to fib6_info:
- nexthop_fib6_nh - return first fib6_nh in a nexthop object
- fib6_info_nh_dev moved to nexthop.h and updated to use nexthop_fib6_nh
  if the fib6_info references a nexthop object
- nexthop_path_fib6_result - similar to ipv4, select a path within a
  multipath nexthop object. If the nexthop is a blackhole, set
  fib6_result type to RTN_BLACKHOLE, and set the REJECT flag

Update the fib6_info references to check for nh and take a different path
as needed:
- rt6_qualify_for_ecmp - if a fib entry uses a nexthop object it can NOT
  be coalesced with other fib entries into a multipath route
- rt6_duplicate_nexthop - use nexthop_cmp if either fib6_info references
  a nexthop
- addrconf (host routes), RA's and info entries (anything configured via
  ndisc) does not use nexthop objects
- fib6_info_destroy_rcu - put reference to nexthop object
- fib6_purge_rt - drop fib6_info from f6i_list
- fib6_select_path - update to use the new nexthop_path_fib6_result when
  fib entry uses a nexthop object
- rt6_device_match - update to catch use of nexthop object as a blackhole
  and set fib6_type and flags.
- ip6_route_info_create - don't add space for fib6_nh if fib entry is
  going to reference a nexthop object, take a reference to nexthop object,
  disallow use of source routing
- rt6_nlmsg_size - add space for RTA_NH_ID
- add rt6_fill_node_nexthop to add nexthop data on a dump

As with ipv4, most of the changes push existing code into the else branch
of whether the fib entry uses a nexthop object.

Update the nexthop code to walk f6i_list on a nexthop deleted to remove
fib entries referencing it.

Signed-off-by: default avatarDavid Ahern <dsahern@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4c7e8084
Loading
Loading
Loading
Loading
+5 −6
Original line number Diff line number Diff line
@@ -146,7 +146,10 @@ struct fib6_info {
	 * destination, but not the same gateway. nsiblings is just a cache
	 * to speed up lookup.
	 */
	union {
		struct list_head	fib6_siblings;
		struct list_head	nh_list;
	};
	unsigned int			fib6_nsiblings;

	refcount_t			fib6_ref;
@@ -170,6 +173,7 @@ struct fib6_info {
					unused:3;

	struct rcu_head			rcu;
	struct nexthop			*nh;
	struct fib6_nh			fib6_nh[0];
};

@@ -441,11 +445,6 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
	rcu_read_unlock();
}

static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
{
	return f6i->fib6_nh->fib_nh_dev;
}

int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
		 struct fib6_config *cfg, gfp_t gfp_flags,
		 struct netlink_ext_ack *extack);
+11 −2
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ struct route_info {
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/route.h>
#include <net/nexthop.h>

#define RT6_LOOKUP_F_IFACE		0x00000001
#define RT6_LOOKUP_F_REACHABLE		0x00000002
@@ -66,10 +67,13 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}

/* fib entries using a nexthop object can not be coalesced into
 * a multipath route
 */
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
	/* the RTF_ADDRCONF flag filters out RA's */
	return !(f6i->fib6_flags & RTF_ADDRCONF) &&
	return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh &&
		f6i->fib6_nh->fib_nh_gw_family;
}

@@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,

static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
	struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
	struct fib6_nh *nha, *nhb;

	if (a->nh || b->nh)
		return nexthop_cmp(a->nh, b->nh);

	nha = a->fib6_nh;
	nhb = b->fib6_nh;
	return nha->fib_nh_dev == nhb->fib_nh_dev &&
	       ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
	       !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
+50 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H

#include <linux/netdevice.h>
#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
@@ -78,6 +79,7 @@ struct nh_group {
struct nexthop {
	struct rb_node		rb_node;    /* entry on netns rbtree */
	struct list_head	fi_list;    /* v4 entries using nh */
	struct list_head	f6i_list;   /* v6 entries using nh */
	struct list_head	grp_list;   /* nh group entries using this nh */
	struct net		*net;

@@ -255,4 +257,52 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)

	return &fi->fib_nh[nhsel];
}

/*
 * IPv6 variants
 */
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
		       struct netlink_ext_ack *extack);

static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
{
	struct nh_info *nhi;

	if (nexthop_is_multipath(nh)) {
		nh = nexthop_mpath_select(nh, 0);
		if (!nh)
			return NULL;
	}

	nhi = rcu_dereference_rtnl(nh->nh_info);
	if (nhi->family == AF_INET6)
		return &nhi->fib6_nh;

	return NULL;
}

static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
	struct fib6_nh *fib6_nh;

	fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
	return fib6_nh->fib_nh_dev;
}

static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
{
	struct nexthop *nh = res->f6i->nh;
	struct nh_info *nhi;

	nh = nexthop_select_path(nh, hash);

	nhi = rcu_dereference_rtnl(nh->nh_info);
	if (nhi->reject_nh) {
		res->fib6_type = RTN_BLACKHOLE;
		res->fib6_flags |= RTF_REJECT;
		res->nh = nexthop_fib6_nh(nh);
	} else {
		res->nh = &nhi->fib6_nh;
	}
}
#endif
+44 −0
Original line number Diff line number Diff line
@@ -106,6 +106,7 @@ static struct nexthop *nexthop_alloc(void)
	nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
	if (nh) {
		INIT_LIST_HEAD(&nh->fi_list);
		INIT_LIST_HEAD(&nh->f6i_list);
		INIT_LIST_HEAD(&nh->grp_list);
	}
	return nh;
@@ -516,6 +517,41 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);

int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
		       struct netlink_ext_ack *extack)
{
	struct nh_info *nhi;

	/* fib6_src is unique to a fib6_info and limits the ability to cache
	 * routes in fib6_nh within a nexthop that is potentially shared
	 * across multiple fib entries. If the config wants to use source
	 * routing it can not use nexthop objects. mlxsw also does not allow
	 * fib6_src on routes.
	 */
	if (!ipv6_addr_any(&cfg->fc_src)) {
		NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
		return -EINVAL;
	}

	if (nh->is_group) {
		struct nh_group *nhg;

		nhg = rtnl_dereference(nh->nh_grp);
		if (nhg->has_v4)
			goto no_v4_nh;
	} else {
		nhi = rtnl_dereference(nh->nh_info);
		if (nhi->family == AF_INET)
			goto no_v4_nh;
	}

	return 0;
no_v4_nh:
	NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
	return -EINVAL;
}
EXPORT_SYMBOL_GPL(fib6_check_nexthop);

static int nexthop_check_scope(struct nexthop *nh, u8 scope,
			       struct netlink_ext_ack *extack)
{
@@ -658,6 +694,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)

static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
	struct fib6_info *f6i, *tmp;
	bool do_flush = false;
	struct fib_info *fi;

@@ -667,6 +704,13 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
	}
	if (do_flush)
		fib_flush(net);

	/* ip6_del_rt removes the entry from this list hence the _safe */
	list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
		/* __ip6_del_rt does a release, so do a hold here */
		fib6_info_hold(f6i);
		ipv6_stub->ip6_del_rt(net, f6i);
	}
}

static void __remove_nexthop(struct net *net, struct nexthop *nh,
+5 −0
Original line number Diff line number Diff line
@@ -2421,6 +2421,10 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
		goto out;

	for_each_fib6_node_rt_rcu(fn) {
		/* prefix routes only use builtin fib6_nh */
		if (rt->nh)
			continue;

		if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
			continue;
		if (no_gw && rt->fib6_nh->fib_nh_gw_family)
@@ -6352,6 +6356,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
	list_for_each_entry(ifa, &idev->addr_list, if_list) {
		spin_lock(&ifa->lock);
		if (ifa->rt) {
			/* host routes only use builtin fib6_nh */
			struct fib6_nh *nh = ifa->rt->fib6_nh;
			int cpu;

Loading