Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a89f96c9 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp_sack_rttm'



Kenneth Klette Jonassen says:

====================
tcp: SACK RTTM changes for congestion control

This patch series improves SACK RTT measurements for congestion control:
  o Picks the latest sequence SACKed for RTT, i.e. most accurate delay
    signal.
  o Calls the congestion control's pkts_acked hook with SACK RTTMs
    even when not sequentially ACKing new data.

V2: amend misleading comment
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 10308220 138998fd
Loading
Loading
Loading
Loading
+43 −43
Original line number Diff line number Diff line
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sacktag_state {
	int	reord;
	int	fack_count;
	long	rtt_us; /* RTT measured by SACKing never-retransmitted data */
	/* Timestamps for earliest and latest never-retransmitted segment
	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
	 * but congestion control should still get an accurate delay signal.
	 */
	struct skb_mstamp first_sackt;
	struct skb_mstamp last_sackt;
	int	flag;
};

@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
							   state->reord);
				if (!after(end_seq, tp->high_seq))
					state->flag |= FLAG_ORIG_SACK_ACKED;
				/* Pick the earliest sequence sacked for RTT */
				if (state->rtt_us < 0) {
					struct skb_mstamp now;

					skb_mstamp_get(&now);
					state->rtt_us = skb_mstamp_us_delta(&now,
								xmit_time);
				}
				if (state->first_sackt.v64 == 0)
					state->first_sackt = *xmit_time;
				state->last_sackt = *xmit_time;
			}

			if (sacked & TCPCB_LOST) {
@@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl

static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
			u32 prior_snd_una, long *sack_rtt_us)
			u32 prior_snd_una, struct tcp_sacktag_state *state)
{
	struct tcp_sock *tp = tcp_sk(sk);
	const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
	struct tcp_sack_block sp[TCP_NUM_SACKS];
	struct tcp_sack_block *cache;
	struct tcp_sacktag_state state;
	struct sk_buff *skb;
	int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
	int used_sacks;
@@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
	int i, j;
	int first_sack_index;

	state.flag = 0;
	state.reord = tp->packets_out;
	state.rtt_us = -1L;
	state->flag = 0;
	state->reord = tp->packets_out;

	if (!tp->sacked_out) {
		if (WARN_ON(tp->fackets_out))
@@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
					 num_sacks, prior_snd_una);
	if (found_dup_sack)
		state.flag |= FLAG_DSACKING_ACK;
		state->flag |= FLAG_DSACKING_ACK;

	/* Eliminate too old ACKs, but take into
	 * account more or less fresh ones, they can
@@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
	}

	skb = tcp_write_queue_head(sk);
	state.fack_count = 0;
	state->fack_count = 0;
	i = 0;

	if (!tp->sacked_out) {
@@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,

			/* Head todo? */
			if (before(start_seq, cache->start_seq)) {
				skb = tcp_sacktag_skip(skb, sk, &state,
				skb = tcp_sacktag_skip(skb, sk, state,
						       start_seq);
				skb = tcp_sacktag_walk(skb, sk, next_dup,
						       &state,
						       state,
						       start_seq,
						       cache->start_seq,
						       dup_sack);
@@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
				goto advance_sp;

			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
						       &state,
						       state,
						       cache->end_seq);

			/* ...tail remains todo... */
@@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
				skb = tcp_highest_sack(sk);
				if (!skb)
					break;
				state.fack_count = tp->fackets_out;
				state->fack_count = tp->fackets_out;
				cache++;
				goto walk;
			}

			skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
			skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
			/* Check overlap against next cached too (past this one already) */
			cache++;
			continue;
@@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
			skb = tcp_highest_sack(sk);
			if (!skb)
				break;
			state.fack_count = tp->fackets_out;
			state->fack_count = tp->fackets_out;
		}
		skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
		skb = tcp_sacktag_skip(skb, sk, state, start_seq);

walk:
		skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
		skb = tcp_sacktag_walk(skb, sk, next_dup, state,
				       start_seq, end_seq, dup_sack);

advance_sp:
@@ -1820,9 +1818,9 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
	for (j = 0; j < used_sacks; j++)
		tp->recv_sack_cache[i++] = sp[j];

	if ((state.reord < tp->fackets_out) &&
	if ((state->reord < tp->fackets_out) &&
	    ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
		tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
		tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);

	tcp_mark_lost_retrans(sk);
	tcp_verify_left_out(tp);
@@ -1834,8 +1832,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
	WARN_ON((int)tp->retrans_out < 0);
	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
	*sack_rtt_us = state.rtt_us;
	return state.flag;
	return state->flag;
}

/* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -3052,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
 * arrived at the other end.
 */
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
			       u32 prior_snd_una, long sack_rtt_us)
			       u32 prior_snd_una,
			       struct tcp_sacktag_state *sack)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
	struct skb_mstamp first_ackt, last_ackt, now;
@@ -3060,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
	u32 prior_sacked = tp->sacked_out;
	u32 reord = tp->packets_out;
	bool fully_acked = true;
	long ca_seq_rtt_us = -1L;
	long sack_rtt_us = -1L;
	long seq_rtt_us = -1L;
	long ca_rtt_us = -1L;
	struct sk_buff *skb;
	u32 pkts_acked = 0;
	bool rtt_update;
@@ -3150,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
	skb_mstamp_get(&now);
	if (likely(first_ackt.v64)) {
		seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
		ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
		ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
	}
	if (sack->first_sackt.v64) {
		sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
		ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
	}

	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);

	if (flag & FLAG_ACKED) {
		const struct tcp_congestion_ops *ca_ops
			= inet_csk(sk)->icsk_ca_ops;

		tcp_rearm_rto(sk);
		if (unlikely(icsk->icsk_mtup.probe_size &&
			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3181,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,

		tp->fackets_out -= min(pkts_acked, tp->fackets_out);

		if (ca_ops->pkts_acked) {
			long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
		}

	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
		   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
		/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3195,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
		tcp_rearm_rto(sk);
	}

	if (icsk->icsk_ca_ops->pkts_acked)
		icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);

#if FASTRETRANS_DEBUG > 0
	WARN_ON((int)tp->sacked_out < 0);
	WARN_ON((int)tp->lost_out < 0);
@@ -3459,6 +3457,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	struct tcp_sacktag_state sack_state;
	u32 prior_snd_una = tp->snd_una;
	u32 ack_seq = TCP_SKB_CB(skb)->seq;
	u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3467,7 +3466,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
	int prior_packets = tp->packets_out;
	const int prior_unsacked = tp->packets_out - tp->sacked_out;
	int acked = 0; /* Number of packets newly acked */
	long sack_rtt_us = -1L;

	sack_state.first_sackt.v64 = 0;

	/* We very likely will need to access write queue head. */
	prefetchw(sk->sk_write_queue.next);
@@ -3531,7 +3531,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)

		if (TCP_SKB_CB(skb)->sacked)
			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
							&sack_rtt_us);
							&sack_state);

		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
			flag |= FLAG_ECE;
@@ -3556,7 +3556,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
	/* See if we can take anything off of the retransmit queue. */
	acked = tp->packets_out;
	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
				    sack_rtt_us);
				    &sack_state);
	acked -= tp->packets_out;

	/* Advance cwnd if state allows */
@@ -3608,7 +3608,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
	 */
	if (TCP_SKB_CB(skb)->sacked) {
		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
						&sack_rtt_us);
						&sack_state);
		tcp_fastretrans_alert(sk, acked, prior_unsacked,
				      is_dupack, flag);
	}