Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6462de8c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'l3mdev-accept'



David Ahern says:

====================
net: Allow accepted sockets to be bound to l3mdev domain

Allow accepted sockets to derive their sk_bound_dev_if setting from the
l3mdev domain in which the packets originated. This version adds a sysctl
to control whether the setting is inherited, making the functionality
similar to sk_mark and its sysctl_tcp_fwmark_accept setting.

This effectively allow a process to have a "VRF-global" listen socket,
with child sockets bound to the VRF device in which the packet originated.
A similar behavior can be achieved using sk_mark, but a solution using marks
is incomplete as it does not handle duplicate addresses in different L3
domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev
domain provides a complete solution.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cc9da6cc 6dd9a14e
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -335,6 +335,14 @@ tcp_keepalive_intvl - INTEGER
	after probes started. Default value: 75sec i.e. connection
	will be aborted after ~11 minutes of retries.

tcp_l3mdev_accept - BOOLEAN
	Enables child sockets to inherit the L3 master device index.
	Enabling this option allows a "global" listen socket to work
	across L3 master domains (e.g., VRFs) with connected sockets
	derived from the listen socket to be bound to the L3 domain in
	which the packets originated. Only valid when the kernel was
	compiled with CONFIG_NET_L3_MASTER_DEV.

tcp_low_latency - BOOLEAN
	If set, the TCP stack makes decisions that prefer lower
	latency as opposed to higher throughput.  By default, this
+14 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <net/request_sock.h>
#include <net/netns/hash.h>
#include <net/tcp_states.h>
#include <net/l3mdev.h>

/** struct ip_options - IP Options
 *
@@ -113,6 +114,19 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
	return sk->sk_mark;
}

static inline int inet_request_bound_dev_if(const struct sock *sk,
					    struct sk_buff *skb)
{
#ifdef CONFIG_NET_L3_MASTER_DEV
	struct net *net = sock_net(sk);

	if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
		return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif

	return sk->sk_bound_dev_if;
}

struct inet_cork {
	unsigned int		flags;
	__be32			addr;
+23 −0
Original line number Diff line number Diff line
@@ -51,6 +51,24 @@ static inline int l3mdev_master_ifindex(struct net_device *dev)
	return ifindex;
}

static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
{
	struct net_device *dev;
	int rc = 0;

	if (likely(ifindex)) {
		rcu_read_lock();

		dev = dev_get_by_index_rcu(net, ifindex);
		if (dev)
			rc = l3mdev_master_ifindex_rcu(dev);

		rcu_read_unlock();
	}

	return rc;
}

/* get index of an interface to use for FIB lookups. For devices
 * enslaved to an L3 master device FIB lookups are based on the
 * master index
@@ -167,6 +185,11 @@ static inline int l3mdev_master_ifindex(struct net_device *dev)
	return 0;
}

static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
{
	return 0;
}

static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{
	return dev ? dev->ifindex : 0;
+3 −0
Original line number Diff line number Diff line
@@ -86,6 +86,9 @@ struct netns_ipv4 {

	int sysctl_fwmark_reflect;
	int sysctl_tcp_fwmark_accept;
#ifdef CONFIG_NET_L3_MASTER_DEV
	int sysctl_tcp_l3mdev_accept;
#endif
	int sysctl_tcp_mtu_probing;
	int sysctl_tcp_base_mss;
	int sysctl_tcp_probe_threshold;
+2 −2
Original line number Diff line number Diff line
@@ -351,7 +351,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
	treq->snt_synack.v64	= 0;
	treq->tfo_listener	= false;

	ireq->ir_iif = sk->sk_bound_dev_if;
	ireq->ir_iif = inet_request_bound_dev_if(sk, skb);

	/* We throwed the options of the initial SYN away, so we hope
	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -371,7 +371,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
	 * hasn't changed since we received the original syn, but I see
	 * no easy way to do this.
	 */
	flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
	flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
			   inet_sk_flowi_flags(sk),
			   opt->srr ? opt->faddr : ireq->ir_rmt_addr,
Loading