Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f2dbdd9 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'listener_refactor_part_11'



Eric Dumazet says:

====================
inet: tcp listener refactoring, part 11

Before inserting request sockets into general (ehash) table,
we need to prepare netfilter to cope with them, as they are
not full sockets.

I'll later change xt_socket to get full support, including for
request sockets (NEW_SYN_RECV)

Save 8 bytes in inet_request_sock on 64bit arches. We'll soon add
a pointer to the listener socket.

I included two TCP changes in this patch series.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c2497395 7970ddc8
Loading
Loading
Loading
Loading
+5 −6
Original line number Diff line number Diff line
@@ -94,11 +94,11 @@ struct inet_request_sock {
				acked	   : 1,
				no_srccheck: 1;
	kmemcheck_bitfield_end(flags);
	u32                     ir_mark;
	union {
		struct ip_options_rcu	*opt;
		struct sk_buff		*pktopts;
	};
	u32                     ir_mark;
};

static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -106,14 +106,13 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
	return (struct inet_request_sock *)sk;
}

static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
		return skb->mark;
	} else {

	return sk->sk_mark;
}
}

struct inet_cork {
	unsigned int		flags;
+2 −55
Original line number Diff line number Diff line
@@ -1137,31 +1137,6 @@ static inline int tcp_full_space(const struct sock *sk)
	return tcp_win_from_space(sk->sk_rcvbuf);
}

static inline void tcp_openreq_init(struct request_sock *req,
				    struct tcp_options_received *rx_opt,
				    struct sk_buff *skb, struct sock *sk)
{
	struct inet_request_sock *ireq = inet_rsk(req);

	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
	req->cookie_ts = 0;
	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
	tcp_rsk(req)->snt_synack = tcp_time_stamp;
	tcp_rsk(req)->last_oow_ack_time = 0;
	req->mss = rx_opt->mss_clamp;
	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
	ireq->tstamp_ok = rx_opt->tstamp_ok;
	ireq->sack_ok = rx_opt->sack_ok;
	ireq->snd_wscale = rx_opt->snd_wscale;
	ireq->wscale_ok = rx_opt->wscale_ok;
	ireq->acked = 0;
	ireq->ecn_ok = 0;
	ireq->ir_rmt_port = tcp_hdr(skb)->source;
	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
	ireq->ir_mark = inet_request_mark(sk, skb);
}

extern void tcp_openreq_init_rwin(struct request_sock *req,
				  struct sock *sk, struct dst_entry *dst);

@@ -1241,36 +1216,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
	return true;
}

/* Return true if we're currently rate-limiting out-of-window ACKs and
 * thus shouldn't send a dupack right now. We rate-limit dupacks in
 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
 * attacks that send repeated SYNs or ACKs for the same connection. To
 * do this, we do not send a duplicate SYNACK or ACK if the remote
 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
 */
static inline bool tcp_oow_rate_limited(struct net *net,
					const struct sk_buff *skb,
					int mib_idx, u32 *last_oow_ack_time)
{
	/* Data packets without SYNs are not likely part of an ACK loop. */
	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
	    !tcp_hdr(skb)->syn)
		goto not_rate_limited;

	if (*last_oow_ack_time) {
		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);

		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
			NET_INC_STATS_BH(net, mib_idx);
			return true;	/* rate-limited: don't send yet! */
		}
	}

	*last_oow_ack_time = tcp_time_stamp;

not_rate_limited:
	return false;	/* not rate-limited: go ahead, send dupack now! */
}
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
			  int mib_idx, u32 *last_oow_ack_time);

static inline void tcp_mib_init(struct net *net)
{
+55 −0
Original line number Diff line number Diff line
@@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
	return flag;
}

/* Return true if we're currently rate-limiting out-of-window ACKs and
 * thus shouldn't send a dupack right now. We rate-limit dupacks in
 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
 * attacks that send repeated SYNs or ACKs for the same connection. To
 * do this, we do not send a duplicate SYNACK or ACK if the remote
 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
 */
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
			  int mib_idx, u32 *last_oow_ack_time)
{
	/* Data packets without SYNs are not likely part of an ACK loop. */
	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
	    !tcp_hdr(skb)->syn)
		goto not_rate_limited;

	if (*last_oow_ack_time) {
		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);

		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
			NET_INC_STATS_BH(net, mib_idx);
			return true;	/* rate-limited: don't send yet! */
		}
	}

	*last_oow_ack_time = tcp_time_stamp;

not_rate_limited:
	return false;	/* not rate-limited: go ahead, send dupack now! */
}

/* RFC 5961 7 [ACK Throttling] */
static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
{
@@ -5912,6 +5942,31 @@ static void tcp_ecn_create_request(struct request_sock *req,
		inet_rsk(req)->ecn_ok = 1;
}

static void tcp_openreq_init(struct request_sock *req,
			     const struct tcp_options_received *rx_opt,
			     struct sk_buff *skb, const struct sock *sk)
{
	struct inet_request_sock *ireq = inet_rsk(req);

	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
	req->cookie_ts = 0;
	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
	tcp_rsk(req)->snt_synack = tcp_time_stamp;
	tcp_rsk(req)->last_oow_ack_time = 0;
	req->mss = rx_opt->mss_clamp;
	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
	ireq->tstamp_ok = rx_opt->tstamp_ok;
	ireq->sack_ok = rx_opt->sack_ok;
	ireq->snd_wscale = rx_opt->snd_wscale;
	ireq->wscale_ok = rx_opt->wscale_ok;
	ireq->acked = 0;
	ireq->ecn_ok = 0;
	ireq->ir_rmt_port = tcp_hdr(skb)->source;
	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
	ireq->ir_mark = inet_request_mark(sk, skb);
}

int tcp_conn_request(struct request_sock_ops *rsk_ops,
		     const struct tcp_request_sock_ops *af_ops,
		     struct sock *sk, struct sk_buff *skb)
+1 −1
Original line number Diff line number Diff line
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
	struct sock *sk = skb->sk;
	struct rtable *ort = skb_rtable(skb);

	if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
	if (!skb->dev && sk && sk_fullsock(sk))
		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}

+1 −1
Original line number Diff line number Diff line
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);

void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
{
	if (!sk || sk->sk_state == TCP_TIME_WAIT)
	if (!sk || !sk_fullsock(sk))
		return;

	read_lock_bh(&sk->sk_callback_lock);
Loading