Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f185ede0 authored by Peilin Ye's avatar Peilin Ye Committed by Greg Kroah-Hartman
Browse files

ip_gre, ip6_gre: Fix race condition on o_seqno in collect_md mode



[ Upstream commit 31c417c948d7f6909cb63f0ac3298f3c38f8ce20 ]

As pointed out by Jakub Kicinski, currently using TUNNEL_SEQ in
collect_md mode is racy for [IP6]GRE[TAP] devices.  Consider the
following sequence of events:

1. An [IP6]GRE[TAP] device is created in collect_md mode using "ip link
   add ... external".  "ip" ignores "[o]seq" if "external" is specified,
   so TUNNEL_SEQ is off, and the device is marked as NETIF_F_LLTX (i.e.
   it uses lockless TX);
2. Someone sets TUNNEL_SEQ on outgoing skb's, using e.g.
   bpf_skb_set_tunnel_key() in an eBPF program attached to this device;
3. gre_fb_xmit() or __gre6_xmit() processes these skb's:

	gre_build_header(skb, tun_hlen,
			 flags, protocol,
			 tunnel_id_to_key32(tun_info->key.tun_id),
			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
					      : 0);   ^^^^^^^^^^^^^^^^^

Since we are not using the TX lock (&txq->_xmit_lock), multiple CPUs may
try to do this tunnel->o_seqno++ in parallel, which is racy.  Fix it by
making o_seqno atomic_t.

As mentioned by Eric Dumazet in commit b790e01a ("ip_gre: lockless
xmit"), making o_seqno atomic_t increases "chance for packets being out
of order at receiver" when NETIF_F_LLTX is on.

Maybe a better fix would be:

1. Do not ignore "oseq" in external mode.  Users MUST specify "oseq" if
   they want the kernel to allow sequencing of outgoing packets;
2. Reject all outgoing TUNNEL_SEQ packets if the device was not created
   with "oseq".

Unfortunately, that would break userspace.

We could now make [IP6]GRE[TAP] devices always NETIF_F_LLTX, but let us
do it in separate patches to keep this fix minimal.

Suggested-by: default avatarJakub Kicinski <kuba@kernel.org>
Fixes: 77a5196a ("gre: add sequence number for collect md mode.")
Signed-off-by: default avatarPeilin Ye <peilin.ye@bytedance.com>
Acked-by: default avatarWilliam Tu <u9012063@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Stable-dep-of: d80fc101d2eb ("erspan: get the proto with the md version for collect_md")
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent 0eb3ec0a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ struct ip6_tnl {

	/* These fields used only by GRE */
	__u32 i_seqno;	/* The last seen seqno	*/
	__u32 o_seqno;	/* The last output seqno */
	atomic_t o_seqno;	/* The last output seqno */
	int hlen;       /* tun_hlen + encap_hlen */
	int tun_hlen;	/* Precalculated header length */
	int encap_hlen; /* Encap header length (FOU,GUE) */
+1 −1
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ struct ip_tunnel {

	/* These four fields used only by GRE */
	u32		i_seqno;	/* The last seen seqno	*/
	u32		o_seqno;	/* The last output seqno */
	atomic_t	o_seqno;	/* The last output seqno */
	int		tun_hlen;	/* Precalculated header length */

	/* These four fields used only by ERSPAN */
+3 −3
Original line number Diff line number Diff line
@@ -437,7 +437,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
	/* Push GRE header. */
	gre_build_header(skb, tunnel->tun_hlen,
			 flags, proto, tunnel->parms.o_key,
			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);

	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -475,7 +475,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
	gre_build_header(skb, tunnel_hlen, flags, proto,
			 tunnel_id_to_key32(tun_info->key.tun_id),
			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);

	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);

@@ -557,7 +557,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
	}

	gre_build_header(skb, 8, TUNNEL_SEQ,
			 proto, 0, htonl(tunnel->o_seqno++));
			 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));

	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);

+4 −3
Original line number Diff line number Diff line
@@ -754,7 +754,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
		gre_build_header(skb, tun_hlen,
				 flags, protocol,
				 tunnel_id_to_key32(tun_info->key.tun_id),
				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
				 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
						      : 0);

	} else {
@@ -765,7 +765,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,

		gre_build_header(skb, tunnel->tun_hlen, flags,
				 protocol, tunnel->parms.o_key,
				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
				 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
						      : 0);
	}

	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -1048,7 +1049,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
	/* Push GRE header. */
	proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
					   : htons(ETH_P_ERSPAN2);
	gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
	gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));

	/* TooBig packet may have updated dst->dev's mtu */
	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)