Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b1394967 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-second-round-for-EDT-conversion'



Eric Dumazet says:

====================
tcp: second round for EDT conversion

First round of EDT patches left TCP stack in a non optimal state.

- High speed flows suffered from loss of performance, addressed
  by the first patch of this series.

- Second patch brings pacing to the current state of networking,
  since we now reach ~100 Gbit on a single TCP flow.

- Third patch implements a mitigation for scheduling delays,
  like the one we did in sch_fq in the past.

- Fourth patch removes one special case in sch_fq for ACK packets.

- Fifth patch removes a serious perfomance cost for TCP internal
  pacing. We should setup the high resolution timer only if
  really needed.

- Sixth patch fixes a typo in BBR.

- Last patch is one minor change in cdg congestion control.

Neal Cardwell also has a patch series fixing BBR after
EDT adoption.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1a3aea25 825e1c52
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -249,6 +249,7 @@ struct tcp_sock {
	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */


	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */


/* RTT measurement */
/* RTT measurement */
	u64	tcp_mstamp;	/* most recent packet received/sent */
	u64	tcp_mstamp;	/* most recent packet received/sent */
+2 −2
Original line number Original line Diff line number Diff line
@@ -422,8 +422,8 @@ struct sock {
	struct timer_list	sk_timer;
	struct timer_list	sk_timer;
	__u32			sk_priority;
	__u32			sk_priority;
	__u32			sk_mark;
	__u32			sk_mark;
	u32			sk_pacing_rate; /* bytes per second */
	unsigned long		sk_pacing_rate; /* bytes per second */
	u32			sk_max_pacing_rate;
	unsigned long		sk_max_pacing_rate;
	struct page_frag	sk_frag;
	struct page_frag	sk_frag;
	netdev_features_t	sk_route_caps;
	netdev_features_t	sk_route_caps;
	netdev_features_t	sk_route_nocaps;
	netdev_features_t	sk_route_nocaps;
+2 −2
Original line number Original line Diff line number Diff line
@@ -3927,8 +3927,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
			sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
			sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
			break;
			break;
		case SO_MAX_PACING_RATE:
		case SO_MAX_PACING_RATE: /* 32bit version */
			sk->sk_max_pacing_rate = val;
			sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
			sk->sk_pacing_rate = min(sk->sk_pacing_rate,
			sk->sk_pacing_rate = min(sk->sk_pacing_rate,
						 sk->sk_max_pacing_rate);
						 sk->sk_max_pacing_rate);
			break;
			break;
+5 −4
Original line number Original line Diff line number Diff line
@@ -998,7 +998,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
			cmpxchg(&sk->sk_pacing_status,
			cmpxchg(&sk->sk_pacing_status,
				SK_PACING_NONE,
				SK_PACING_NONE,
				SK_PACING_NEEDED);
				SK_PACING_NEEDED);
		sk->sk_max_pacing_rate = val;
		sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
					 sk->sk_max_pacing_rate);
					 sk->sk_max_pacing_rate);
		break;
		break;
@@ -1336,7 +1336,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
#endif
#endif


	case SO_MAX_PACING_RATE:
	case SO_MAX_PACING_RATE:
		v.val = sk->sk_max_pacing_rate;
		/* 32bit version */
		v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
		break;
		break;


	case SO_INCOMING_CPU:
	case SO_INCOMING_CPU:
@@ -2810,8 +2811,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
	sk->sk_ll_usec		=	sysctl_net_busy_read;
	sk->sk_ll_usec		=	sysctl_net_busy_read;
#endif
#endif


	sk->sk_max_pacing_rate = ~0U;
	sk->sk_max_pacing_rate = ~0UL;
	sk->sk_pacing_rate = ~0U;
	sk->sk_pacing_rate = ~0UL;
	sk->sk_pacing_shift = 10;
	sk->sk_pacing_shift = 10;
	sk->sk_incoming_cpu = -1;
	sk->sk_incoming_cpu = -1;


+5 −5
Original line number Original line Diff line number Diff line
@@ -3111,10 +3111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
{
	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
	const struct inet_connection_sock *icsk = inet_csk(sk);
	const struct inet_connection_sock *icsk = inet_csk(sk);
	unsigned long rate;
	u32 now;
	u32 now;
	u64 rate64;
	u64 rate64;
	bool slow;
	bool slow;
	u32 rate;


	memset(info, 0, sizeof(*info));
	memset(info, 0, sizeof(*info));
	if (sk->sk_type != SOCK_STREAM)
	if (sk->sk_type != SOCK_STREAM)
@@ -3124,11 +3124,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)


	/* Report meaningful fields for all TCP states, including listeners */
	/* Report meaningful fields for all TCP states, including listeners */
	rate = READ_ONCE(sk->sk_pacing_rate);
	rate = READ_ONCE(sk->sk_pacing_rate);
	rate64 = rate != ~0U ? rate : ~0ULL;
	rate64 = (rate != ~0UL) ? rate : ~0ULL;
	info->tcpi_pacing_rate = rate64;
	info->tcpi_pacing_rate = rate64;


	rate = READ_ONCE(sk->sk_max_pacing_rate);
	rate = READ_ONCE(sk->sk_max_pacing_rate);
	rate64 = rate != ~0U ? rate : ~0ULL;
	rate64 = (rate != ~0UL) ? rate : ~0ULL;
	info->tcpi_max_pacing_rate = rate64;
	info->tcpi_max_pacing_rate = rate64;


	info->tcpi_reordering = tp->reordering;
	info->tcpi_reordering = tp->reordering;
@@ -3254,8 +3254,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
	const struct tcp_sock *tp = tcp_sk(sk);
	const struct tcp_sock *tp = tcp_sk(sk);
	struct sk_buff *stats;
	struct sk_buff *stats;
	struct tcp_info info;
	struct tcp_info info;
	unsigned long rate;
	u64 rate64;
	u64 rate64;
	u32 rate;


	stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
	stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
	if (!stats)
	if (!stats)
@@ -3274,7 +3274,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
			  tp->total_retrans, TCP_NLA_PAD);
			  tp->total_retrans, TCP_NLA_PAD);


	rate = READ_ONCE(sk->sk_pacing_rate);
	rate = READ_ONCE(sk->sk_pacing_rate);
	rate64 = rate != ~0U ? rate : ~0ULL;
	rate64 = (rate != ~0UL) ? rate : ~0ULL;
	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);


	rate64 = tcp_compute_delivery_rate(tp);
	rate64 = tcp_compute_delivery_rate(tp);
Loading