Merge branch 'vrf-allow-simultaneous-service-instances-in-default-and-other-VRFs' (7e225619) · Commits · e / devices / android_kernel_fairphone_FP5

Documentation/networking/ip-sysctl.txt

+12 −0

Original line number	Diff line number	Diff line
		@@ -370,6 +370,7 @@ tcp_l3mdev_accept - BOOLEAN
		derived from the listen socket to be bound to the L3 domain in
		which the packets originated. Only valid when the kernel was
		compiled with CONFIG_NET_L3_MASTER_DEV.
		Default: 0 (disabled)

		tcp_low_latency - BOOLEAN
		This is a legacy option, it has no effect anymore.
		@@ -773,6 +774,7 @@ udp_l3mdev_accept - BOOLEAN
		being received regardless of the L3 domain in which they
		originated. Only valid when the kernel was compiled with
		CONFIG_NET_L3_MASTER_DEV.
		Default: 0 (disabled)

		udp_mem - vector of 3 INTEGERs: min, pressure, max
		Number of pages allowed for queueing by all UDP sockets.
		@@ -799,6 +801,16 @@ udp_wmem_min - INTEGER
		total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
		Default: 4K

		RAW variables:

		raw_l3mdev_accept - BOOLEAN
		Enabling this option allows a "global" bound socket to work
		across L3 master domains (e.g., VRFs) with packets capable of
		being received regardless of the L3 domain in which they
		originated. Only valid when the kernel was compiled with
		CONFIG_NET_L3_MASTER_DEV.
		Default: 1 (enabled)

		CIPSOv4 Variables:

		cipso_cache_enable - BOOLEAN

Documentation/networking/vrf.txt

+18 −4

Original line number	Diff line number	Diff line
		@@ -103,19 +103,33 @@ VRF device:

		or to specify the output device using cmsg and IP_PKTINFO.

		By default the scope of the port bindings for unbound sockets is
		limited to the default VRF. That is, it will not be matched by packets
		arriving on interfaces enslaved to an l3mdev and processes may bind to
		the same port if they bind to an l3mdev.

		TCP & UDP services running in the default VRF context (ie., not bound
		to any VRF device) can work across all VRF domains by enabling the
		tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:

		sysctl -w net.ipv4.tcp_l3mdev_accept=1
		sysctl -w net.ipv4.udp_l3mdev_accept=1

		These options are disabled by default so that a socket in a VRF is only
		selected for packets in that VRF. There is a similar option for RAW
		sockets, which is enabled by default for reasons of backwards compatibility.
		This is so as to specify the output device with cmsg and IP_PKTINFO, but
		using a socket not bound to the corresponding VRF. This allows e.g. older ping
		implementations to be run with specifying the device but without executing it
		in the VRF. This option can be disabled so that packets received in a VRF
		context are only handled by a raw socket bound to the VRF, and packets in the
		default VRF are only handled by a socket not bound to any VRF:

		sysctl -w net.ipv4.raw_l3mdev_accept=0

		netfilter rules on the VRF device can be used to limit access to services
		running in the default VRF context as well.

		The default VRF does not have limited scope with respect to port bindings.
		That is, if a process does a wildcard bind to a port in the default VRF it
		owns the port across all VRF domains within the network namespace.

		################################################################################

		Using iproute2 for VRFs

drivers/net/vrf.c

+9 −10

Original line number	Diff line number	Diff line
		@@ -981,24 +981,23 @@ static struct sk_buff vrf_ip6_rcv(struct net_device vrf_dev,
		struct sk_buff *skb)
		{
		int orig_iif = skb->skb_iif;
		bool need_strict;
		bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
		bool is_ndisc = ipv6_ndisc_frame(skb);

		/* loopback traffic; do not push through packet taps again.
		* Reset pkt_type for upper layers to process skb
		/* loopback, multicast & non-ND link-local traffic; do not push through
		* packet taps again. Reset pkt_type for upper layers to process skb
		*/
		if (skb->pkt_type == PACKET_LOOPBACK) {
		if (skb->pkt_type == PACKET_LOOPBACK \|\| (need_strict && !is_ndisc)) {
		skb->dev = vrf_dev;
		skb->skb_iif = vrf_dev->ifindex;
		IP6CB(skb)->flags \|= IP6SKB_L3SLAVE;
		if (skb->pkt_type == PACKET_LOOPBACK)
		skb->pkt_type = PACKET_HOST;
		goto out;
		}

		/* if packet is NDISC or addressed to multicast or link-local
		* then keep the ingress interface
		*/
		need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
		if (!ipv6_ndisc_frame(skb) && !need_strict) {
		/* if packet is NDISC then keep the ingress interface */
		if (!is_ndisc) {
		vrf_rx_stats(vrf_dev, skb->len);
		skb->dev = vrf_dev;
		skb->skb_iif = vrf_dev->ifindex;

include/net/inet6_hashtables.h

+2 −3

Original line number	Diff line number	Diff line
		@@ -115,8 +115,7 @@ int inet6_hash(struct sock *sk);
		((__sk)->sk_family == AF_INET6) && \
		ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
		ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
		(!(__sk)->sk_bound_dev_if \|\| \
		((__sk)->sk_bound_dev_if == (__dif)) \|\| \
		(((__sk)->sk_bound_dev_if == (__dif)) \|\| \
		((__sk)->sk_bound_dev_if == (__sdif))) && \
		net_eq(sock_net(__sk), (__net)))

include/net/inet_hashtables.h

+17 −7

Original line number	Diff line number	Diff line
		@@ -79,6 +79,7 @@ struct inet_ehash_bucket {

		struct inet_bind_bucket {
		possible_net_t ib_net;
		int l3mdev;
		unsigned short port;
		signed char fastreuse;
		signed char fastreuseport;
		@@ -188,10 +189,21 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
		hashinfo->ehash_locks = NULL;
		}

		static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
		int dif, int sdif)
		{
		#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
		return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
		bound_dev_if, dif, sdif);
		#else
		return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
		#endif
		}

		struct inet_bind_bucket *
		inet_bind_bucket_create(struct kmem_cache cachep, struct net net,
		struct inet_bind_hashbucket *head,
		const unsigned short snum);
		const unsigned short snum, int l3mdev);
		void inet_bind_bucket_destroy(struct kmem_cache *cachep,
		struct inet_bind_bucket *tb);

		@@ -282,8 +294,7 @@ static inline struct sock inet_lookup_listener(struct net net,
		#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
		(((__sk)->sk_portpair == (__ports)) && \
		((__sk)->sk_addrpair == (__cookie)) && \
		(!(__sk)->sk_bound_dev_if \|\| \
		((__sk)->sk_bound_dev_if == (__dif)) \|\| \
		(((__sk)->sk_bound_dev_if == (__dif)) \|\| \
		((__sk)->sk_bound_dev_if == (__sdif))) && \
		net_eq(sock_net(__sk), (__net)))
		#else /* 32-bit arch */
		@@ -294,8 +305,7 @@ static inline struct sock inet_lookup_listener(struct net net,
		(((__sk)->sk_portpair == (__ports)) && \
		((__sk)->sk_daddr == (__saddr)) && \
		((__sk)->sk_rcv_saddr == (__daddr)) && \
		(!(__sk)->sk_bound_dev_if \|\| \
		((__sk)->sk_bound_dev_if == (__dif)) \|\| \
		(((__sk)->sk_bound_dev_if == (__dif)) \|\| \
		((__sk)->sk_bound_dev_if == (__sdif))) && \
		net_eq(sock_net(__sk), (__net)))
		#endif /* 64-bit arch */