Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dddb64bc authored by Subash Abhinov Kasiviswanathan's avatar Subash Abhinov Kasiviswanathan Committed by David S. Miller
Browse files

net: Add sysctl to toggle early demux for tcp and udp



Certain system process significant unconnected UDP workload.
It would be preferrable to disable UDP early demux for those systems
and enable it for TCP only.

By disabling UDP demux, we see these slight gains on an ARM64 system-
782 -> 788Mbps unconnected single stream UDPv4
633 -> 654Mbps unconnected UDPv4 different sources

The performance impact can change based on CPU architecure and cache
sizes. There will not much difference seen if entire UDP hash table
is in cache.

Both sysctls are enabled by default to preserve existing behavior.

v1->v2: Change function pointer instead of adding conditional as
suggested by Stephen.

v2->v3: Read once in callers to avoid issues due to compiler
optimizations. Also update commit message with the tests.

v3->v4: Store and use read once result instead of querying pointer
again incorrectly.

v4->v5: Refactor to avoid errors due to compilation with IPV6={m,n}

Signed-off-by: default avatarSubash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Tom Herbert <tom@herbertland.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8fa96e3b
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -856,12 +856,21 @@ ip_dynaddr - BOOLEAN
ip_early_demux - BOOLEAN
	Optimize input packet processing down to one demux for
	certain kinds of local sockets.  Currently we only do this
	for established TCP sockets.
	for established TCP and connected UDP sockets.

	It may add an additional cost for pure routing workloads that
	reduces overall throughput, in such case you should disable it.
	Default: 1

tcp_early_demux - BOOLEAN
	Enable early demux for established TCP sockets.
	Default: 1

udp_early_demux - BOOLEAN
	Enable early demux for connected UDP sockets. Disable this if
	your system could experience more unconnected load.
	Default: 1

icmp_echo_ignore_all - BOOLEAN
	If set non-zero, then the kernel will ignore all ICMP ECHO
	requests sent to it.
+2 −0
Original line number Diff line number Diff line
@@ -95,6 +95,8 @@ struct netns_ipv4 {
	/* Shall we try to damage output packets if routing dev changes? */
	int sysctl_ip_dynaddr;
	int sysctl_ip_early_demux;
	int sysctl_tcp_early_demux;
	int sysctl_udp_early_demux;

	int sysctl_fwmark_reflect;
	int sysctl_tcp_fwmark_accept;
+4 −3
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
/* This is used to register protocols. */
struct net_protocol {
	void			(*early_demux)(struct sk_buff *skb);
	void                    (*early_demux_handler)(struct sk_buff *skb);
	int			(*handler)(struct sk_buff *skb);
	void			(*err_handler)(struct sk_buff *skb, u32 info);
	unsigned int		no_policy:1,
@@ -54,7 +55,7 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
	void	(*early_demux)(struct sk_buff *skb);

	void    (*early_demux_handler)(struct sk_buff *skb);
	int	(*handler)(struct sk_buff *skb);

	void	(*err_handler)(struct sk_buff *skb,
@@ -92,12 +93,12 @@ struct inet_protosw {
#define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
#define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */

extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
extern struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
extern const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS];
extern const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS];

#if IS_ENABLED(CONFIG_IPV6)
extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
extern struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
#endif

int inet_add_protocol(const struct net_protocol *prot, unsigned char num);
+1 −0
Original line number Diff line number Diff line
@@ -372,4 +372,5 @@ void udp_encap_enable(void);
#if IS_ENABLED(CONFIG_IPV6)
void udpv6_encap_enable(void);
#endif

#endif	/* _UDP_H */
+6 −2
Original line number Diff line number Diff line
@@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif

static const struct net_protocol tcp_protocol = {
static struct net_protocol tcp_protocol = {
	.early_demux	=	tcp_v4_early_demux,
	.early_demux_handler =  tcp_v4_early_demux,
	.handler	=	tcp_v4_rcv,
	.err_handler	=	tcp_v4_err,
	.no_policy	=	1,
@@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = {
	.icmp_strict_tag_validation = 1,
};

static const struct net_protocol udp_protocol = {
static struct net_protocol udp_protocol = {
	.early_demux =	udp_v4_early_demux,
	.early_demux_handler =	udp_v4_early_demux,
	.handler =	udp_rcv,
	.err_handler =	udp_err,
	.no_policy =	1,
@@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net)
	net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
	net->ipv4.sysctl_ip_dynaddr = 0;
	net->ipv4.sysctl_ip_early_demux = 1;
	net->ipv4.sysctl_udp_early_demux = 1;
	net->ipv4.sysctl_tcp_early_demux = 1;
#ifdef CONFIG_SYSCTL
	net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
Loading