Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c5f40c90 authored by Lorenzo Colitti's avatar Lorenzo Colitti Committed by Ian Maund
Browse files

net: support marking accepting TCP sockets



When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.

This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.

This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established.  If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.

Black-box tested using user-mode linux:

- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
  mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
  incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.

Change-Id: I26bc1eceefd2c588d73b921865ab70e4645ade57
Signed-off-by: default avatarLorenzo Colitti <lorenzo@google.com>
Git-commit: 6ba3a0e3b112bdb47858e97aa763706ba26ca5ea
Git-repo: https://android.googlesource.com/kernel/common.git


Signed-off-by: default avatarIan Maund <imaund@codeaurora.org>
parent 9366fad7
Loading
Loading
Loading
Loading
+10 −0
Original line number Original line Diff line number Diff line
@@ -477,6 +477,16 @@ tcp_fastopen - INTEGER


	See include/net/tcp.h and the code for more details.
	See include/net/tcp.h and the code for more details.


tcp_fwmark_accept - BOOLEAN
	If set, incoming connections to listening sockets that do not have a
	socket mark will set the mark of the accepting socket to the fwmark of
	the incoming SYN packet. This will cause all packets on that connection
	(starting from the first SYNACK) to be sent with that fwmark. The
	listening socket's mark is unchanged. Listening sockets that already
	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
	unaffected.
	Default: 0

tcp_syn_retries - INTEGER
tcp_syn_retries - INTEGER
	Number of times initial SYNs for an active TCP connection attempt
	Number of times initial SYNs for an active TCP connection attempt
	will be retransmitted. Should not be higher than 255. Default value
	will be retransmitted. Should not be higher than 255. Default value
+9 −0
Original line number Original line Diff line number Diff line
@@ -88,6 +88,7 @@ struct inet_request_sock {
				acked	   : 1,
				acked	   : 1,
				no_srccheck: 1;
				no_srccheck: 1;
	kmemcheck_bitfield_end(flags);
	kmemcheck_bitfield_end(flags);
	u32                     ir_mark;
	struct ip_options_rcu	*opt;
	struct ip_options_rcu	*opt;
};
};


@@ -96,6 +97,14 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
	return (struct inet_request_sock *)sk;
	return (struct inet_request_sock *)sk;
}
}


static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
{
	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
		return skb->mark;

	return sk->sk_mark;
}

struct inet_cork {
struct inet_cork {
	unsigned int		flags;
	unsigned int		flags;
	__be32			addr;
	__be32			addr;
+1 −0
Original line number Original line Diff line number Diff line
@@ -65,6 +65,7 @@ struct netns_ipv4 {
	int sysctl_tcp_ecn;
	int sysctl_tcp_ecn;


	int sysctl_fwmark_reflect;
	int sysctl_fwmark_reflect;
	int sysctl_tcp_fwmark_accept;


	kgid_t sysctl_ping_group_range[2];
	kgid_t sysctl_ping_group_range[2];
	long sysctl_tcp_mem[3];
	long sysctl_tcp_mem[3];
+4 −2
Original line number Original line Diff line number Diff line
@@ -417,7 +417,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
	struct net *net = sock_net(sk);
	struct net *net = sock_net(sk);
	int flags = inet_sk_flowi_flags(sk);
	int flags = inet_sk_flowi_flags(sk);


	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
	flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
			   sk->sk_protocol,
			   sk->sk_protocol,
			   flags,
			   flags,
@@ -454,7 +454,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,


	rcu_read_lock();
	rcu_read_lock();
	opt = rcu_dereference(newinet->inet_opt);
	opt = rcu_dereference(newinet->inet_opt);
	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
	flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
			   sk->sk_protocol, inet_sk_flowi_flags(sk),
			   sk->sk_protocol, inet_sk_flowi_flags(sk),
			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
@@ -688,6 +688,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
		newsk->sk_write_space = sk_stream_write_space;
		newsk->sk_write_space = sk_stream_write_space;


		newsk->sk_mark = inet_rsk(req)->ir_mark;

		newicsk->icsk_retransmits = 0;
		newicsk->icsk_retransmits = 0;
		newicsk->icsk_backoff	  = 0;
		newicsk->icsk_backoff	  = 0;
		newicsk->icsk_probes_out  = 0;
		newicsk->icsk_probes_out  = 0;
+2 −1
Original line number Original line Diff line number Diff line
@@ -312,6 +312,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
	ireq->rmt_port		= th->source;
	ireq->rmt_port		= th->source;
	ireq->loc_addr		= ip_hdr(skb)->daddr;
	ireq->loc_addr		= ip_hdr(skb)->daddr;
	ireq->rmt_addr		= ip_hdr(skb)->saddr;
	ireq->rmt_addr		= ip_hdr(skb)->saddr;
	ireq->ir_mark		= inet_request_mark(sk, skb);
	ireq->ecn_ok		= ecn_ok;
	ireq->ecn_ok		= ecn_ok;
	ireq->snd_wscale	= tcp_opt.snd_wscale;
	ireq->snd_wscale	= tcp_opt.snd_wscale;
	ireq->sack_ok		= tcp_opt.sack_ok;
	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -348,7 +349,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
	 * hasn't changed since we received the original syn, but I see
	 * hasn't changed since we received the original syn, but I see
	 * no easy way to do this.
	 * no easy way to do this.
	 */
	 */
	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
	flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
			   inet_sk_flowi_flags(sk),
			   inet_sk_flowi_flags(sk),
			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
Loading