Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 33bd5ac5 authored by David Ahern's avatar David Ahern Committed by David S. Miller
Browse files

net/ipv6: Revert attempt to simplify route replace and append



NetworkManager likes to manage linklocal prefix routes and does so with
the NLM_F_APPEND flag, breaking attempts to simplify the IPv6 route
code and by extension enable multipath routes with device only nexthops.

Revert f34436a4 and these followup patches:
6eba08c3 ("ipv6: Only emit append events for appended routes").
ce45bded ("mlxsw: spectrum_router: Align with new route replace logic")
53b562df ("mlxsw: spectrum_router: Allow appending to dev-only routes")

Update the fib_tests cases to reflect the old behavior.

Fixes: f34436a4 ("net/ipv6: Simplify route replace and appending into multipath route")
Signed-off-by: default avatarDavid Ahern <dsahern@gmail.com>
parent d5a672ac
Loading
Loading
Loading
Loading
+24 −24
Original line number Diff line number Diff line
@@ -4756,6 +4756,12 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
	kfree(mlxsw_sp_rt6);
}

static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
	/* RTF_CACHE routes are ignored */
	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
}

static struct fib6_info *
mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
{
@@ -4765,11 +4771,11 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)

static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
				 const struct fib6_info *nrt, bool append)
				 const struct fib6_info *nrt, bool replace)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;

	if (!append)
	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
		return NULL;

	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
@@ -4784,7 +4790,8 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
			break;
		if (rt->fib6_metric < nrt->fib6_metric)
			continue;
		if (rt->fib6_metric == nrt->fib6_metric)
		if (rt->fib6_metric == nrt->fib6_metric &&
		    mlxsw_sp_fib6_rt_can_mp(rt))
			return fib6_entry;
		if (rt->fib6_metric > nrt->fib6_metric)
			break;
@@ -5163,7 +5170,7 @@ static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
			      const struct fib6_info *nrt, bool replace)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;

	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
@@ -5172,13 +5179,18 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
			continue;
		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
			break;
		if (replace && rt->fib6_metric == nrt->fib6_metric)
		if (replace && rt->fib6_metric == nrt->fib6_metric) {
			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
			    mlxsw_sp_fib6_rt_can_mp(nrt))
				return fib6_entry;
			if (mlxsw_sp_fib6_rt_can_mp(nrt))
				fallback = fallback ?: fib6_entry;
		}
		if (rt->fib6_metric > nrt->fib6_metric)
			return fib6_entry;
			return fallback ?: fib6_entry;
	}

	return NULL;
	return fallback;
}

static int
@@ -5304,8 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
}

static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
				    struct fib6_info *rt, bool replace,
				    bool append)
				    struct fib6_info *rt, bool replace)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_fib_node *fib_node;
@@ -5331,7 +5342,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
	/* Before creating a new entry, try to append route to an existing
	 * multipath entry.
	 */
	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, append);
	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
	if (fib6_entry) {
		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
		if (err)
@@ -5339,14 +5350,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
		return 0;
	}

	/* We received an append event, yet did not find any route to
	 * append to.
	 */
	if (WARN_ON(append)) {
		err = -EINVAL;
		goto err_fib6_entry_append;
	}

	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
	if (IS_ERR(fib6_entry)) {
		err = PTR_ERR(fib6_entry);
@@ -5364,7 +5367,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
err_fib6_node_entry_link:
	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
err_fib6_entry_create:
err_fib6_entry_append:
err_fib6_entry_nexthop_add:
	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
	return err;
@@ -5715,7 +5717,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
	struct mlxsw_sp_fib_event_work *fib_work =
		container_of(work, struct mlxsw_sp_fib_event_work, work);
	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
	bool replace, append;
	bool replace;
	int err;

	rtnl_lock();
@@ -5726,10 +5728,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
	case FIB_EVENT_ENTRY_APPEND: /* fall through */
	case FIB_EVENT_ENTRY_ADD:
		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
					       fib_work->fen6_info.rt, replace,
					       append);
					       fib_work->fen6_info.rt, replace);
		if (err)
			mlxsw_sp_router_fib_abort(mlxsw_sp);
		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+6 −0
Original line number Diff line number Diff line
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}

static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
	return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
	       RTF_GATEWAY;
}

void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
					 struct net_device *dev,
+86 −70
Original line number Diff line number Diff line
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
{
	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
				    lockdep_is_held(&rt->fib6_table->tb6_lock));
	enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
	struct fib6_info *iter = NULL, *match = NULL;
	struct fib6_info *iter = NULL;
	struct fib6_info __rcu **ins;
	struct fib6_info __rcu **fallback_ins = NULL;
	int replace = (info->nlh &&
		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
	int append = (info->nlh &&
		       (info->nlh->nlmsg_flags & NLM_F_APPEND));
	int add = (!info->nlh ||
		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
	int found = 0;
	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
	u16 nlflags = NLM_F_EXCL;
	int err;

	if (append)
	if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
		nlflags |= NLM_F_APPEND;

	ins = &fn->leaf;
@@ -970,9 +969,14 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,

			nlflags &= ~NLM_F_EXCL;
			if (replace) {
				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
					found++;
					break;
				}
				if (rt_can_ecmp)
					fallback_ins = fallback_ins ?: ins;
				goto next_iter;
			}

			if (rt6_duplicate_nexthop(iter, rt)) {
				if (rt->fib6_nsiblings)
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
				return -EEXIST;
			}

			/* first route that matches */
			if (!match)
				match = iter;
			/* If we have the same destination and the same metric,
			 * but not the same gateway, then the route we try to
			 * add is sibling to this route, increment our counter
			 * of siblings, and later we will add our route to the
			 * list.
			 * Only static routes (which don't have flag
			 * RTF_EXPIRES) are used for ECMPv6.
			 *
			 * To avoid long list, we only had siblings if the
			 * route have a gateway.
			 */
			if (rt_can_ecmp &&
			    rt6_qualify_for_ecmp(iter))
				rt->fib6_nsiblings++;
		}

		if (iter->fib6_metric > rt->fib6_metric)
			break;

next_iter:
		ins = &iter->fib6_next;
	}

	if (fallback_ins && !found) {
		/* No ECMP-able route found, replace first non-ECMP one */
		ins = fallback_ins;
		iter = rcu_dereference_protected(*ins,
				    lockdep_is_held(&rt->fib6_table->tb6_lock));
		found++;
	}

	/* Reset round-robin state, if necessary */
	if (ins == &fn->leaf)
		fn->rr_ptr = NULL;

	/* Link this route to others same route. */
	if (append && match) {
	if (rt->fib6_nsiblings) {
		unsigned int fib6_nsiblings;
		struct fib6_info *sibling, *temp_sibling;

		if (rt->fib6_flags & RTF_REJECT) {
			NL_SET_ERR_MSG(extack,
				       "Can not append a REJECT route");
			return -EINVAL;
		} else if (match->fib6_flags & RTF_REJECT) {
			NL_SET_ERR_MSG(extack,
				       "Can not append to a REJECT route");
			return -EINVAL;
		/* Find the first route that have the same metric */
		sibling = leaf;
		while (sibling) {
			if (sibling->fib6_metric == rt->fib6_metric &&
			    rt6_qualify_for_ecmp(sibling)) {
				list_add_tail(&rt->fib6_siblings,
					      &sibling->fib6_siblings);
				break;
			}
			sibling = rcu_dereference_protected(sibling->fib6_next,
				    lockdep_is_held(&rt->fib6_table->tb6_lock));
		}
		event = FIB_EVENT_ENTRY_APPEND;
		rt->fib6_nsiblings = match->fib6_nsiblings;
		list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
		match->fib6_nsiblings++;

		/* For each sibling in the list, increment the counter of
		 * siblings. BUG() if counters does not match, list of siblings
		 * is broken!
		 */
		fib6_nsiblings = 0;
		list_for_each_entry_safe(sibling, temp_sibling,
					 &match->fib6_siblings, fib6_siblings) {
					 &rt->fib6_siblings, fib6_siblings) {
			sibling->fib6_nsiblings++;
			BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
			BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
			fib6_nsiblings++;
		}

		rt6_multipath_rebalance(match);
		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
		rt6_multipath_rebalance(temp_sibling);
	}

	/*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
		nlflags |= NLM_F_CREATE;

		err = call_fib6_entry_notifiers(info->nl_net, event, rt,
						extack);
		err = call_fib6_entry_notifiers(info->nl_net,
						FIB_EVENT_ENTRY_ADD,
						rt, extack);
		if (err)
			return err;

@@ -1062,7 +1087,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
		}

	} else {
		struct fib6_info *tmp;
		int nsiblings;

		if (!found) {
			if (add)
@@ -1077,57 +1102,48 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
		if (err)
			return err;

		/* if route being replaced has siblings, set tmp to
		 * last one, otherwise tmp is current route. this is
		 * used to set fib6_next for new route
		 */
		if (iter->fib6_nsiblings)
			tmp = list_last_entry(&iter->fib6_siblings,
					      struct fib6_info,
					      fib6_siblings);
		else
			tmp = iter;

		/* insert new route */
		atomic_inc(&rt->fib6_ref);
		rcu_assign_pointer(rt->fib6_node, fn);
		rt->fib6_next = tmp->fib6_next;
		rt->fib6_next = iter->fib6_next;
		rcu_assign_pointer(*ins, rt);

		if (!info->skip_notify)
			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
		if (!(fn->fn_flags & RTN_RTINFO)) {
			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
			fn->fn_flags |= RTN_RTINFO;
		}
		nsiblings = iter->fib6_nsiblings;
		iter->fib6_node = NULL;
		fib6_purge_rt(iter, fn, info->nl_net);
		if (rcu_access_pointer(fn->rr_ptr) == iter)
			fn->rr_ptr = NULL;
		fib6_info_release(iter);

		/* delete old route */
		rt = iter;

		if (rt->fib6_nsiblings) {
			struct fib6_info *tmp;

		if (nsiblings) {
			/* Replacing an ECMP route, remove all siblings */
			list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
						 fib6_siblings) {
			ins = &rt->fib6_next;
			iter = rcu_dereference_protected(*ins,
				    lockdep_is_held(&rt->fib6_table->tb6_lock));
			while (iter) {
				if (iter->fib6_metric > rt->fib6_metric)
					break;
				if (rt6_qualify_for_ecmp(iter)) {
					*ins = iter->fib6_next;
					iter->fib6_node = NULL;
					fib6_purge_rt(iter, fn, info->nl_net);
					if (rcu_access_pointer(fn->rr_ptr) == iter)
						fn->rr_ptr = NULL;
					fib6_info_release(iter);

				rt->fib6_nsiblings--;
					nsiblings--;
					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
				} else {
					ins = &iter->fib6_next;
				}
				iter = rcu_dereference_protected(*ins,
					lockdep_is_held(&rt->fib6_table->tb6_lock));
			}
			WARN_ON(nsiblings != 0);
		}

		WARN_ON(rt->fib6_nsiblings != 0);

		rt->fib6_node = NULL;
		fib6_purge_rt(rt, fn, info->nl_net);
		if (rcu_access_pointer(fn->rr_ptr) == rt)
			fn->rr_ptr = NULL;
		fib6_info_release(rt);
	}

	return 0;
+1 −2
Original line number Diff line number Diff line
@@ -3842,7 +3842,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
			lockdep_is_held(&rt->fib6_table->tb6_lock));
	while (iter) {
		if (iter->fib6_metric == rt->fib6_metric &&
		    iter->fib6_nsiblings)
		    rt6_qualify_for_ecmp(iter))
			return iter;
		iter = rcu_dereference_protected(iter->fib6_next,
				lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4439,7 +4439,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
		 */
		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
						     NLM_F_REPLACE);
		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
		nhn++;
	}

+0 −41
Original line number Diff line number Diff line
@@ -740,13 +740,6 @@ ipv6_rt_add()
	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
	log_test $? 2 "Attempt to add duplicate route - reject route"

	# iproute2 prepend only sets NLM_F_CREATE
	# - adds a new route; does NOT convert existing route to ECMP
	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
	run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
	check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
	log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)"

	# route append with same prefix adds a new route
	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
@@ -754,27 +747,6 @@ ipv6_rt_add()
	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
	log_test $? 0 "Append nexthop to existing route - gw"

	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
	log_test $? 0 "Append nexthop to existing route - dev only"

	# multipath route can not have a nexthop that is a reject route
	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
	run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
	log_test $? 2 "Append nexthop to existing route - reject route"

	# reject route can not be converted to multipath route
	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
	log_test $? 2 "Append nexthop to existing reject route - gw"

	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
	log_test $? 2 "Append nexthop to existing reject route - dev only"

	# insert mpath directly
	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
@@ -819,13 +791,6 @@ ipv6_rt_replace_single()
	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
	log_test $? 0 "Single path with multipath"

	# single path with reject
	#
	add_initial_route6 "nexthop via 2001:db8:101::2"
	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
	log_test $? 0 "Single path with reject route"

	# single path with single path using MULTIPATH attribute
	#
	add_initial_route6 "via 2001:db8:101::2"
@@ -873,12 +838,6 @@ ipv6_rt_replace_mpath()
	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
	log_test $? 0 "Multipath with single path via multipath attribute"

	# multipath with reject
	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
	log_test $? 0 "Multipath with reject route"

	# route replace fails - invalid nexthop 1
	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"