Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 36c82963 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'rt6_pmtu'

Martin KaFai Lau says:

====================
ipv6: Stop /128 route from disappearing after pmtu update

The series is separated from another patch series,
'ipv6: Only create RTF_CACHE route after encountering pmtu exception',
which can be found here:
http://thread.gmane.org/gmane.linux.network/359140

This series focus on fixing the /128 route issues.  It is currently targeted
for net-next due to the number of code churn but it is also applicable
to net (should be without conflict).  The original reported problem can be
found here:
http://thread.gmane.org/gmane.linux.network/348138



Patch 01 and 02 are to prepare the fib6 search to expect both the
RTF_CACHE clone and its original route exist at the same fib6_node.

Patch 03 fixes the /128 route disappearing bug.

Patch 04 and 05 stop rt6_info from using the inet_peer's metrics to
avoid the /128 routes (like the /128 clone and its original route)
from stepping on each others' metrics.

The second patch is by 'Steffen Klassert <steffen.klassert@secunet.com>'
which I pulled off from netdev.  The third patch is also mostly by
Steffen with one minor optimization.

Many thanks to Hannes Frederic Sowa <hannes@stressinduktion.org> on
reviewing the patches and giving advice.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 355b590c afc4eef8
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -109,7 +109,6 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
extern const u32 dst_default_metrics[];

#define DST_METRICS_READ_ONLY		0x1UL
#define DST_METRICS_FORCE_OVERWRITE	0x2UL
#define DST_METRICS_FLAGS		0x3UL
#define __DST_METRICS_PTR(Y)	\
	((u32 *)((Y) & ~DST_METRICS_FLAGS))
@@ -120,11 +119,6 @@ static inline bool dst_metrics_read_only(const struct dst_entry *dst)
	return dst->_metrics & DST_METRICS_READ_ONLY;
}

static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst)
{
	dst->_metrics |= DST_METRICS_FORCE_OVERWRITE;
}

void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);

static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
+1 −40
Original line number Diff line number Diff line
@@ -121,44 +121,14 @@ struct rt6_info {
	struct rt6key			rt6i_prefsrc;

	struct inet6_dev		*rt6i_idev;
	unsigned long			_rt6i_peer;

	u32				rt6i_metric;
	u32				rt6i_pmtu;
	/* more non-fragment space at head required */
	unsigned short			rt6i_nfheader_len;
	u8				rt6i_protocol;
};

static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt)
{
	return inetpeer_ptr(rt->_rt6i_peer);
}

static inline bool rt6_has_peer(struct rt6_info *rt)
{
	return inetpeer_ptr_is_peer(rt->_rt6i_peer);
}

static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer)
{
	__inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer);
}

static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer)
{
	return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer);
}

static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base)
{
	inetpeer_init_ptr(&rt->_rt6i_peer, base);
}

static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort)
{
	inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer);
}

static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
	return ((struct rt6_info *)dst)->rt6i_idev;
@@ -189,15 +159,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
	rt0->rt6i_flags |= RTF_EXPIRES;
}

static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
	struct dst_entry *new = (struct dst_entry *) from;

	rt->rt6i_flags &= ~RTF_EXPIRES;
	dst_hold(new);
	rt->dst.from = new;
}

static inline void ip6_rt_put(struct rt6_info *rt)
{
	/* dst_release() accepts a NULL parameter.
+2 −0
Original line number Diff line number Diff line
@@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
	fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
	if (!fn)
		goto out;

	noflags |= RTF_CACHE;
	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
		if (rt->dst.dev->ifindex != dev->ifindex)
			continue;
+91 −85
Original line number Diff line number Diff line
@@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
					   struct sk_buff *skb, u32 mtu);
static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
					struct sk_buff *skb);
static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
static int rt6_score_route(struct rt6_info *rt, int oif, int strict);

#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -104,65 +105,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
					   const struct in6_addr *gwaddr, int ifindex);
#endif

static void rt6_bind_peer(struct rt6_info *rt, int create)
{
	struct inet_peer_base *base;
	struct inet_peer *peer;

	base = inetpeer_base_ptr(rt->_rt6i_peer);
	if (!base)
		return;

	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
	if (peer) {
		if (!rt6_set_peer(rt, peer))
			inet_putpeer(peer);
	}
}

static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
{
	if (rt6_has_peer(rt))
		return rt6_peer_ptr(rt);

	rt6_bind_peer(rt, create);
	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
}

static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
{
	return __rt6_get_peer(rt, 1);
}

static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
	struct rt6_info *rt = (struct rt6_info *)dst;
	struct inet_peer *peer;
	u32 *p = NULL;

	if (!(rt->dst.flags & DST_HOST))
	if (rt->rt6i_flags & RTF_CACHE)
		return NULL;
	else
		return dst_cow_metrics_generic(dst, old);

	peer = rt6_get_peer_create(rt);
	if (peer) {
		u32 *old_p = __DST_METRICS_PTR(old);
		unsigned long prev, new;

		p = peer->metrics;
		if (inet_metrics_new(peer) ||
		    (old & DST_METRICS_FORCE_OVERWRITE))
			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);

		new = (unsigned long) p;
		prev = cmpxchg(&dst->_metrics, old, new);

		if (prev != old) {
			p = __DST_METRICS_PTR(prev);
			if (prev & DST_METRICS_READ_ONLY)
				p = NULL;
		}
	}
	return p;
}

static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@@ -311,7 +261,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
		struct dst_entry *dst = &rt->dst;

		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
		INIT_LIST_HEAD(&rt->rt6i_siblings);
	}
	return rt;
@@ -323,7 +272,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
	struct inet6_dev *idev = rt->rt6i_idev;
	struct dst_entry *from = dst->from;

	if (!(rt->dst.flags & DST_HOST))
	dst_destroy_metrics_generic(dst);

	if (idev) {
@@ -333,11 +281,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)

	dst->from = NULL;
	dst_release(from);

	if (rt6_has_peer(rt)) {
		struct inet_peer *peer = rt6_peer_ptr(rt);
		inet_putpeer(peer);
	}
}

static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -652,15 +595,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
				     u32 metric, int oif, int strict,
				     bool *do_rr)
{
	struct rt6_info *rt, *match;
	struct rt6_info *rt, *match, *cont;
	int mpri = -1;

	match = NULL;
	for (rt = rr_head; rt && rt->rt6i_metric == metric;
	     rt = rt->dst.rt6_next)
	cont = NULL;
	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
		if (rt->rt6i_metric != metric) {
			cont = rt;
			break;
		}

		match = find_match(rt, oif, strict, &mpri, match, do_rr);
	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
	     rt = rt->dst.rt6_next)
	}

	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
		if (rt->rt6i_metric != metric) {
			cont = rt;
			break;
		}

		match = find_match(rt, oif, strict, &mpri, match, do_rr);
	}

	if (match || !cont)
		return match;

	for (rt = cont; rt; rt = rt->dst.rt6_next)
		match = find_match(rt, oif, strict, &mpri, match, do_rr);

	return match;
@@ -959,7 +920,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,

	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
	else if (!(rt->dst.flags & DST_HOST))
	else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
		nrt = rt6_alloc_clone(rt, &fl6->daddr);
	else
		goto out2;
@@ -985,6 +946,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
	goto redo_fib6_lookup_lock;

out2:
	rt6_dst_from_metrics_check(rt);
	rt->dst.lastuse = jiffies;
	rt->dst.__use++;

@@ -1059,7 +1021,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
		new = &rt->dst;

		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
		rt6_init_peer(rt, net->ipv6.peers);

		new->__use = 1;
		new->input = dst_discard;
@@ -1093,6 +1054,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 *	Destination cache support functions
 */

static void rt6_dst_from_metrics_check(struct rt6_info *rt)
{
	if (rt->dst.from &&
	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
}

static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
	struct rt6_info *rt;
@@ -1109,6 +1077,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
	if (rt6_check_expired(rt))
		return NULL;

	rt6_dst_from_metrics_check(rt);

	return dst;
}

@@ -1154,14 +1124,14 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
	struct rt6_info *rt6 = (struct rt6_info *)dst;

	dst_confirm(dst);
	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
	if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
		struct net *net = dev_net(dst->dev);

		rt6->rt6i_flags |= RTF_MODIFIED;
		if (mtu < IPV6_MIN_MTU)
			mtu = IPV6_MIN_MTU;

		dst_metric_set(dst, RTAX_MTU, mtu);
		rt6->rt6i_pmtu = mtu;
		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
	}
}
@@ -1341,9 +1311,14 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)

static unsigned int ip6_mtu(const struct dst_entry *dst)
{
	const struct rt6_info *rt = (const struct rt6_info *)dst;
	unsigned int mtu = rt->rt6i_pmtu;
	struct inet6_dev *idev;
	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);

	if (mtu)
		goto out;

	mtu = dst_metric_raw(dst, RTAX_MTU);
	if (mtu)
		goto out;

@@ -1590,10 +1565,8 @@ int ip6_route_add(struct fib6_config *cfg)

	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
	rt->rt6i_dst.plen = cfg->fc_dst_len;
	if (rt->rt6i_dst.plen == 128) {
	if (rt->rt6i_dst.plen == 128)
		rt->dst.flags |= DST_HOST;
		dst_metrics_set_force_overwrite(&rt->dst);
	}

#ifdef CONFIG_IPV6_SUBTREES
	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1785,6 +1758,9 @@ static int ip6_route_del(struct fib6_config *cfg)

	if (fn) {
		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
			if ((rt->rt6i_flags & RTF_CACHE) &&
			    !(cfg->fc_flags & RTF_CACHE))
				continue;
			if (cfg->fc_ifindex &&
			    (!rt->dst.dev ||
			     rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -1926,11 +1902,26 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 *	Misc support functions
 */

static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
	BUG_ON(from->dst.from);

	rt->rt6i_flags &= ~RTF_EXPIRES;
	dst_hold(&from->dst);
	rt->dst.from = &from->dst;
	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
}

static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
				    const struct in6_addr *dest)
{
	struct net *net = dev_net(ort->dst.dev);
	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
	struct rt6_info *rt;

	if (ort->rt6i_flags & RTF_CACHE)
		ort = (struct rt6_info *)ort->dst.from;

	rt = ip6_dst_alloc(net, ort->dst.dev, 0,
			   ort->rt6i_table);

	if (rt) {
@@ -1940,7 +1931,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,

		rt->rt6i_dst.addr = *dest;
		rt->rt6i_dst.plen = 128;
		dst_copy_metrics(&rt->dst, &ort->dst);
		rt->dst.error = ort->dst.error;
		rt->rt6i_idev = ort->rt6i_idev;
		if (rt->rt6i_idev)
@@ -2372,12 +2362,21 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
	   PMTU discouvery.
	 */
	if (rt->dst.dev == arg->dev &&
	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
	    (dst_mtu(&rt->dst) >= arg->mtu ||
	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
		if (rt->rt6i_flags & RTF_CACHE) {
			/* For RTF_CACHE with rt6i_pmtu == 0
			 * (i.e. a redirected route),
			 * the metrics of its rt->dst.from has already
			 * been updated.
			 */
			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
				rt->rt6i_pmtu = arg->mtu;
		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
			   (dst_mtu(&rt->dst) < arg->mtu &&
	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
		}
	}
	return 0;
}

@@ -2433,6 +2432,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
	if (rtm->rtm_type == RTN_LOCAL)
		cfg->fc_flags |= RTF_LOCAL;

	if (rtm->rtm_flags & RTM_F_CLONED)
		cfg->fc_flags |= RTF_CACHE;

	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
	cfg->fc_nlinfo.nlh = nlh;
	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2603,6 +2605,7 @@ static int rt6_fill_node(struct net *net,
			 int iif, int type, u32 portid, u32 seq,
			 int prefix, int nowait, unsigned int flags)
{
	u32 metrics[RTAX_MAX];
	struct rtmsg *rtm;
	struct nlmsghdr *nlh;
	long expires;
@@ -2716,7 +2719,10 @@ static int rt6_fill_node(struct net *net,
			goto nla_put_failure;
	}

	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
	if (rt->rt6i_pmtu)
		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
	if (rtnetlink_put_metrics(skb, metrics) < 0)
		goto nla_put_failure;

	if (rt->rt6i_flags & RTF_GATEWAY) {
+0 −14
Original line number Diff line number Diff line
@@ -71,13 +71,6 @@ static int xfrm6_get_tos(const struct flowi *fl)
	return 0;
}

static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
{
	struct rt6_info *rt = (struct rt6_info *)xdst;

	rt6_init_peer(rt, net->ipv6.peers);
}

static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
			   int nfheader_len)
{
@@ -106,8 +99,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
		return -ENODEV;
	}

	rt6_transfer_peer(&xdst->u.rt6, rt);

	/* Sheit... I remember I did this right. Apparently,
	 * it was magically lost, so this code needs audit */
	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
@@ -255,10 +246,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
	if (likely(xdst->u.rt6.rt6i_idev))
		in6_dev_put(xdst->u.rt6.rt6i_idev);
	dst_destroy_metrics_generic(dst);
	if (rt6_has_peer(&xdst->u.rt6)) {
		struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
		inet_putpeer(peer);
	}
	xfrm_dst_destroy(xdst);
}

@@ -308,7 +295,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
	.get_saddr =		xfrm6_get_saddr,
	.decode_session =	_decode_session6,
	.get_tos =		xfrm6_get_tos,
	.init_dst =		xfrm6_init_dst,
	.init_path =		xfrm6_init_path,
	.fill_dst =		xfrm6_fill_dst,
	.blackhole_route =	ip6_blackhole_route,