Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 230583c1 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'udp-fix-early-demux-for-mcast-packets'



Paolo Abeni says:

====================
udp: fix early demux for mcast packets

Currently the early demux callbacks do not perform source address validation.
This is not an issue for TCP or UDP unicast, where the early demux
is only allowed for connected sockets and the source address is validated
for the first packet and never change.

The UDP protocol currently allows early demux also for unconnected multicast
sockets, and we are not currently doing any validation for them, after that
the first packet lands on the socket: beyond ignoring the rp_filter - if
enabled - any kind of martian sources are also allowed.

This series addresses the issue allowing the early demux callback to return an
error code, and performing the proper checks for unconnected UDP multicast
sockets before leveraging the rx dst cache.

Alternatively we could disable the early demux for unconnected mcast sockets,
but that would cause relevant performance regression - around 50% - while with
this series, with full rp_filter in place, we keep the regression to a more
moderate level.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d41bb33b bc044e8d
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -39,8 +39,8 @@

/* This is used to register protocols. */
struct net_protocol {
	void			(*early_demux)(struct sk_buff *skb);
	void                    (*early_demux_handler)(struct sk_buff *skb);
	int			(*early_demux)(struct sk_buff *skb);
	int			(*early_demux_handler)(struct sk_buff *skb);
	int			(*handler)(struct sk_buff *skb);
	void			(*err_handler)(struct sk_buff *skb, u32 info);
	unsigned int		no_policy:1,
+3 −1
Original line number Diff line number Diff line
@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
	fl4->fl4_gre_key = gre_key;
	return ip_route_output_key(net, fl4);
}

int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			  u8 tos, struct net_device *dev,
			  struct in_device *in_dev, u32 *itag);
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
			 u8 tos, struct net_device *devin);
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
+1 −1
Original line number Diff line number Diff line
@@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32);

void tcp_shutdown(struct sock *sk, int how);

void tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb);

int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
+1 −1
Original line number Diff line number Diff line
@@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
	return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
}

void udp_v4_early_demux(struct sk_buff *skb);
int udp_v4_early_demux(struct sk_buff *skb);
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
int udp_get_port(struct sock *sk, unsigned short snum,
		 int (*saddr_cmp)(const struct sock *,
+15 −10
Original line number Diff line number Diff line
@@ -311,9 +311,10 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	const struct iphdr *iph = ip_hdr(skb);
	struct rtable *rt;
	int (*edemux)(struct sk_buff *skb);
	struct net_device *dev = skb->dev;
	void (*edemux)(struct sk_buff *skb);
	struct rtable *rt;
	int err;

	/* if ingress device is enslaved to an L3 master device pass the
	 * skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)

		ipprot = rcu_dereference(inet_protos[protocol]);
		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
			edemux(skb);
			err = edemux(skb);
			if (unlikely(err))
				goto drop_error;
			/* must reload iph, skb->head might have changed */
			iph = ip_hdr(skb);
		}
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
	 *	how the packet travels inside Linux networking.
	 */
	if (!skb_valid_dst(skb)) {
		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
					   iph->tos, dev);
		if (unlikely(err)) {
			if (err == -EXDEV)
				__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
			goto drop;
		}
		if (unlikely(err))
			goto drop_error;
	}

#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
drop:
	kfree_skb(skb);
	return NET_RX_DROP;

drop_error:
	if (err == -EXDEV)
		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
	goto drop;
}

/*
Loading