Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 27596472 authored by Michal Kubeček's avatar Michal Kubeček Committed by David S. Miller
Browse files

ipv6: fix ECMP route replacement



When replacing an IPv6 multipath route with "ip route replace", i.e.
NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first
matching route without fixing its siblings, resulting in corrupted
siblings linked list; removing one of the siblings can then end in an
infinite loop.

IPv6 ECMP implementation is a bit different from IPv4 so that route
replacement cannot work in exactly the same way. This should be a
reasonable approximation:

1. If the new route is ECMP-able and there is a matching ECMP-able one
already, replace it and all its siblings (if any).

2. If the new route is ECMP-able and no matching ECMP-able route exists,
replace first matching non-ECMP-able (if any) or just add the new one.

3. If the new route is not ECMP-able, replace first matching
non-ECMP-able route (if any) or add the new route.

We also need to remove the NLM_F_REPLACE flag after replacing old
route(s) by first nexthop of an ECMP route so that each subsequent
nexthop does not replace previous one.

Fixes: 51ebd318 ("ipv6: add support of equal cost multipath (ECMP)")
Signed-off-by: default avatarMichal Kubecek <mkubecek@suse.cz>
Acked-by: default avatarNicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 35f1b4e9
Loading
Loading
Loading
Loading
+37 −2
Original line number Diff line number Diff line
@@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
{
	struct rt6_info *iter = NULL;
	struct rt6_info **ins;
	struct rt6_info **fallback_ins = NULL;
	int replace = (info->nlh &&
		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
	int add = (!info->nlh ||
@@ -716,9 +717,14 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
			    (info->nlh->nlmsg_flags & NLM_F_EXCL))
				return -EEXIST;
			if (replace) {
				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
					found++;
					break;
				}
				if (rt_can_ecmp)
					fallback_ins = fallback_ins ?: ins;
				goto next_iter;
			}

			if (iter->dst.dev == rt->dst.dev &&
			    iter->rt6i_idev == rt->rt6i_idev &&
@@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
		if (iter->rt6i_metric > rt->rt6i_metric)
			break;

next_iter:
		ins = &iter->dst.rt6_next;
	}

	if (fallback_ins && !found) {
		/* No ECMP-able route found, replace first non-ECMP one */
		ins = fallback_ins;
		iter = *ins;
		found++;
	}

	/* Reset round-robin state, if necessary */
	if (ins == &fn->leaf)
		fn->rr_ptr = NULL;
@@ -815,6 +829,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
		}

	} else {
		int nsiblings;

		if (!found) {
			if (add)
				goto add;
@@ -835,8 +851,27 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
			fn->fn_flags |= RTN_RTINFO;
		}
		nsiblings = iter->rt6i_nsiblings;
		fib6_purge_rt(iter, fn, info->nl_net);
		rt6_release(iter);

		if (nsiblings) {
			/* Replacing an ECMP route, remove all siblings */
			ins = &rt->dst.rt6_next;
			iter = *ins;
			while (iter) {
				if (rt6_qualify_for_ecmp(iter)) {
					*ins = iter->dst.rt6_next;
					fib6_purge_rt(iter, fn, info->nl_net);
					rt6_release(iter);
					nsiblings--;
				} else {
					ins = &iter->dst.rt6_next;
				}
				iter = *ins;
			}
			WARN_ON(nsiblings != 0);
		}
	}

	return 0;
+7 −4
Original line number Diff line number Diff line
@@ -2541,11 +2541,14 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add)
			}
		}
		/* Because each route is added like a single route we remove
		 * this flag after the first nexthop (if there is a collision,
		 * we have already fail to add the first nexthop:
		 * fib6_add_rt2node() has reject it).
		 * these flags after the first nexthop: if there is a collision,
		 * we have already failed to add the first nexthop:
		 * fib6_add_rt2node() has rejected it; when replacing, old
		 * nexthops have been replaced by first new, the rest should
		 * be added to it.
		 */
		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
						     NLM_F_REPLACE);
		rtnh = rtnh_next(rtnh, &remaining);
	}