Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7162f05f authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Greg Kroah-Hartman
Browse files

tcp/udp: Make early_demux back namespacified.



commit 11052589cf5c0bab3b4884d423d5f60c38fcf25d upstream.

Commit e21145a9 ("ipv4: namespacify ip_early_demux sysctl knob") made
it possible to enable/disable early_demux on a per-netns basis.  Then, we
introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for
TCP/UDP in commit dddb64bc ("net: Add sysctl to toggle early demux for
tcp and udp").  However, the .proc_handler() was wrong and actually
disabled us from changing the behaviour in each netns.

We can execute early_demux if net.ipv4.ip_early_demux is on and each proto
.early_demux() handler is not NULL.  When we toggle (tcp|udp)_early_demux,
the change itself is saved in each netns variable, but the .early_demux()
handler is a global variable, so the handler is switched based on the
init_net's sysctl variable.  Thus, netns (tcp|udp)_early_demux knobs have
nothing to do with the logic.  Whether we CAN execute proto .early_demux()
is always decided by init_net's sysctl knob, and whether we DO it or not is
by each netns ip_early_demux knob.

This patch namespacifies (tcp|udp)_early_demux again.  For now, the users
of the .early_demux() handler are TCP and UDP only, and they are called
directly to avoid retpoline.  So, we can remove the .early_demux() handler
from inet6?_protos and need not dereference them in ip6?_rcv_finish_core().
If another proto needs .early_demux(), we can restore it at that time.

Fixes: dddb64bc ("net: Add sysctl to toggle early demux for tcp and udp")
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent febaa6aa
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -39,8 +39,6 @@

/* This is used to register protocols. */
struct net_protocol {
	int			(*early_demux)(struct sk_buff *skb);
	int			(*early_demux_handler)(struct sk_buff *skb);
	int			(*handler)(struct sk_buff *skb);
	void			(*err_handler)(struct sk_buff *skb, u32 info);
	unsigned int		no_policy:1,
@@ -54,8 +52,6 @@ struct net_protocol {

#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
	void	(*early_demux)(struct sk_buff *skb);
	void    (*early_demux_handler)(struct sk_buff *skb);
	int	(*handler)(struct sk_buff *skb);

	void	(*err_handler)(struct sk_buff *skb,
+2 −0
Original line number Diff line number Diff line
@@ -901,6 +901,8 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb)
#endif
	return 0;
}

void tcp_v6_early_demux(struct sk_buff *skb);
#endif

static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
+1 −0
Original line number Diff line number Diff line
@@ -173,6 +173,7 @@ typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
				struct udphdr *uh, udp_lookup_t lookup);
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
void udp_v6_early_demux(struct sk_buff *skb);

struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
				  netdev_features_t features);
+2 −12
Original line number Diff line number Diff line
@@ -1678,12 +1678,7 @@ static const struct net_protocol igmp_protocol = {
};
#endif

/* thinking of making this const? Don't.
 * early_demux can change based on sysctl.
 */
static struct net_protocol tcp_protocol = {
	.early_demux	=	tcp_v4_early_demux,
	.early_demux_handler =  tcp_v4_early_demux,
static const struct net_protocol tcp_protocol = {
	.handler	=	tcp_v4_rcv,
	.err_handler	=	tcp_v4_err,
	.no_policy	=	1,
@@ -1691,12 +1686,7 @@ static struct net_protocol tcp_protocol = {
	.icmp_strict_tag_validation = 1,
};

/* thinking of making this const? Don't.
 * early_demux can change based on sysctl.
 */
static struct net_protocol udp_protocol = {
	.early_demux =	udp_v4_early_demux,
	.early_demux_handler =	udp_v4_early_demux,
static const struct net_protocol udp_protocol = {
	.handler =	udp_rcv,
	.err_handler =	udp_err,
	.no_policy =	1,
+21 −11
Original line number Diff line number Diff line
@@ -306,29 +306,39 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
	return true;
}

int udp_v4_early_demux(struct sk_buff *);
int tcp_v4_early_demux(struct sk_buff *);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
			      struct sk_buff *skb, struct net_device *dev)
{
	const struct iphdr *iph = ip_hdr(skb);
	int (*edemux)(struct sk_buff *skb);
	struct rtable *rt;
	int err;

	if (net->ipv4.sysctl_ip_early_demux &&
	if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
	    !skb_dst(skb) &&
	    !skb->sk &&
	    !ip_is_fragment(iph)) {
		const struct net_protocol *ipprot;
		int protocol = iph->protocol;
		switch (iph->protocol) {
		case IPPROTO_TCP:
			if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
				tcp_v4_early_demux(skb);

		ipprot = rcu_dereference(inet_protos[protocol]);
		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
			err = edemux(skb);
				/* must reload iph, skb->head might have changed */
				iph = ip_hdr(skb);
			}
			break;
		case IPPROTO_UDP:
			if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
				err = udp_v4_early_demux(skb);
				if (unlikely(err))
					goto drop_error;

				/* must reload iph, skb->head might have changed */
				iph = ip_hdr(skb);
			}
			break;
		}
	}

	/*
Loading