Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3c82a21f authored by Robert Shearman's avatar Robert Shearman Committed by David S. Miller
Browse files

net: allow binding socket in a VRF when there's an unbound socket



Change the inet socket lookup to avoid packets arriving on a device
enslaved to an l3mdev from matching unbound sockets by removing the
wildcard for non sk_bound_dev_if and instead relying on check against
the secondary device index, which will be 0 when the input device is
not enslaved to an l3mdev and so match against an unbound socket and
not match when the input device is enslaved.

Change the socket binding to take the l3mdev into account to allow an
unbound socket to not conflict sockets bound to an l3mdev given the
datapath isolation now guaranteed.

Signed-off-by: default avatarRobert Shearman <rshearma@vyatta.att-mail.com>
Signed-off-by: default avatarMike Manning <mmanning@vyatta.att-mail.com>
Reviewed-by: default avatarDavid Ahern <dsahern@gmail.com>
Tested-by: default avatarDavid Ahern <dsahern@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f601a85b
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -103,6 +103,11 @@ VRF device:

or to specify the output device using cmsg and IP_PKTINFO.

By default the scope of the port bindings for unbound sockets is
limited to the default VRF. That is, it will not be matched by packets
arriving on interfaces enslaved to an l3mdev and processes may bind to
the same port if they bind to an l3mdev.

TCP & UDP services running in the default VRF context (ie., not bound
to any VRF device) can work across all VRF domains by enabling the
tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
@@ -112,10 +117,6 @@ tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
netfilter rules on the VRF device can be used to limit access to services
running in the default VRF context as well.

The default VRF does not have limited scope with respect to port bindings.
That is, if a process does a wildcard bind to a port in the default VRF it
owns the port across all VRF domains within the network namespace.

################################################################################

Using iproute2 for VRFs
+2 −3
Original line number Diff line number Diff line
@@ -115,8 +115,7 @@ int inet6_hash(struct sock *sk);
	 ((__sk)->sk_family == AF_INET6)			&&	\
	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
	 (!(__sk)->sk_bound_dev_if	||				\
	   ((__sk)->sk_bound_dev_if == (__dif))	||			\
	 (((__sk)->sk_bound_dev_if == (__dif))	||			\
	  ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
	 net_eq(sock_net(__sk), (__net)))

+6 −7
Original line number Diff line number Diff line
@@ -79,6 +79,7 @@ struct inet_ehash_bucket {

struct inet_bind_bucket {
	possible_net_t		ib_net;
	int			l3mdev;
	unsigned short		port;
	signed char		fastreuse;
	signed char		fastreuseport;
@@ -191,7 +192,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
			struct inet_bind_hashbucket *head,
			const unsigned short snum);
			const unsigned short snum, int l3mdev);
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
			      struct inet_bind_bucket *tb);

@@ -282,8 +283,7 @@ static inline struct sock *inet_lookup_listener(struct net *net,
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
	(((__sk)->sk_portpair == (__ports))			&&	\
	 ((__sk)->sk_addrpair == (__cookie))			&&	\
	 (!(__sk)->sk_bound_dev_if	||				\
	   ((__sk)->sk_bound_dev_if == (__dif))			||	\
	 (((__sk)->sk_bound_dev_if == (__dif))			||	\
	  ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
	 net_eq(sock_net(__sk), (__net)))
#else /* 32-bit arch */
@@ -294,8 +294,7 @@ static inline struct sock *inet_lookup_listener(struct net *net,
	(((__sk)->sk_portpair == (__ports))		&&		\
	 ((__sk)->sk_daddr	== (__saddr))		&&		\
	 ((__sk)->sk_rcv_saddr	== (__daddr))		&&		\
	 (!(__sk)->sk_bound_dev_if	||				\
	   ((__sk)->sk_bound_dev_if == (__dif))		||		\
	 (((__sk)->sk_bound_dev_if == (__dif))		||		\
	  ((__sk)->sk_bound_dev_if == (__sdif)))	&&		\
	 net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */
+13 −0
Original line number Diff line number Diff line
@@ -130,6 +130,19 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
	return sk->sk_bound_dev_if;
}

static inline int inet_sk_bound_l3mdev(const struct sock *sk)
{
#ifdef CONFIG_NET_L3_MASTER_DEV
	struct net *net = sock_net(sk);

	if (!net->ipv4.sysctl_tcp_l3mdev_accept)
		return l3mdev_master_ifindex_by_index(net,
						      sk->sk_bound_dev_if);
#endif

	return 0;
}

struct inet_cork {
	unsigned int		flags;
	__be32			addr;
+10 −3
Original line number Diff line number Diff line
@@ -183,7 +183,9 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
	int i, low, high, attempt_half;
	struct inet_bind_bucket *tb;
	u32 remaining, offset;
	int l3mdev;

	l3mdev = inet_sk_bound_l3mdev(sk);
	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
	inet_get_local_port_range(net, &low, &high);
@@ -219,7 +221,8 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
						  hinfo->bhash_size)];
		spin_lock_bh(&head->lock);
		inet_bind_bucket_for_each(tb, &head->chain)
			if (net_eq(ib_net(tb), net) && tb->port == port) {
			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
			    tb->port == port) {
				if (!inet_csk_bind_conflict(sk, tb, false, false))
					goto success;
				goto next_port;
@@ -293,6 +296,9 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
	struct net *net = sock_net(sk);
	struct inet_bind_bucket *tb = NULL;
	kuid_t uid = sock_i_uid(sk);
	int l3mdev;

	l3mdev = inet_sk_bound_l3mdev(sk);

	if (!port) {
		head = inet_csk_find_open_port(sk, &tb, &port);
@@ -306,11 +312,12 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
					  hinfo->bhash_size)];
	spin_lock_bh(&head->lock);
	inet_bind_bucket_for_each(tb, &head->chain)
		if (net_eq(ib_net(tb), net) && tb->port == port)
		if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
		    tb->port == port)
			goto tb_found;
tb_not_found:
	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
				     net, head, port);
				     net, head, port, l3mdev);
	if (!tb)
		goto fail_unlock;
tb_found:
Loading