tcp: fix cwnd limited checking to improve congestion control (e114a710) · Commits · e / devices / android_kernel_teracube_emerald

include/linux/tcp.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -230,6 +230,7 @@ struct tcp_sock {
		u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
		u32 snd_cwnd_used;
		u32 snd_cwnd_stamp;
		u32 lsnd_pending; /* packets inflight or unsent since last xmit */
		u32 prior_cwnd; /* Congestion window at start of Recovery. */
		u32 prr_delivered; /* Number of newly delivered packets to
		* receiver in Recovery. */

include/net/tcp.h

+21 −1

Original line number	Diff line number	Diff line
		@@ -974,7 +974,27 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
		{
		return tp->snd_una + tp->snd_wnd;
		}
		bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);

		/* We follow the spirit of RFC2861 to validate cwnd but implement a more
		* flexible approach. The RFC suggests cwnd should not be raised unless
		* it was fully used previously. But we allow cwnd to grow as long as the
		* application has used half the cwnd.
		* Example :
		* cwnd is 10 (IW10), but application sends 9 frames.
		* We allow cwnd to reach 18 when all frames are ACKed.
		* This check is safe because it's as aggressive as slow start which already
		* risks 100% overshoot. The advantage is that we discourage application to
		* either send more filler packets or data to artificially blow up the cwnd
		* usage, and allow application-limited process to probe bw more aggressively.
		*
		* TODO: remove in_flight once we can fix all callers, and their callers...
		*/
		static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
		{
		const struct tcp_sock *tp = tcp_sk(sk);

		return tp->snd_cwnd < 2 * tp->lsnd_pending;
		}

		static inline void tcp_check_probe_timer(struct sock *sk)
		{

net/ipv4/tcp_cong.c

+0 −20

Original line number	Diff line number	Diff line
		@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock sk, const char name)
		return err;
		}

		/* RFC2861 Check whether we are limited by application or congestion window
		* This is the inverse of cwnd check in tcp_tso_should_defer
		*/
		bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
		{
		const struct tcp_sock *tp = tcp_sk(sk);
		u32 left;

		if (in_flight >= tp->snd_cwnd)
		return true;

		left = tp->snd_cwnd - in_flight;
		if (sk_can_gso(sk) &&
		left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
		left < tp->xmit_size_goal_segs)
		return true;
		return left <= tcp_max_tso_deferred_mss(tp);
		}
		EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);

		/* Slow start is used when congestion window is no greater than the slow start
		* threshold. We base on RFC2581 and also handle stretch ACKs properly.
		* We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but

net/ipv4/tcp_output.c

+14 −7

Original line number	Diff line number	Diff line
		@@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk)
		tp->snd_cwnd_stamp = tcp_time_stamp;
		}

		/* Congestion window validation. (RFC2861) */
		static void tcp_cwnd_validate(struct sock *sk)
		static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
		{
		struct tcp_sock *tp = tcp_sk(sk);

		if (tp->packets_out >= tp->snd_cwnd) {
		tp->lsnd_pending = tp->packets_out + unsent_segs;

		if (tcp_is_cwnd_limited(sk, 0)) {
		/* Network is feed fully. */
		tp->snd_cwnd_used = 0;
		tp->snd_cwnd_stamp = tcp_time_stamp;
		@@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
		{
		struct tcp_sock *tp = tcp_sk(sk);
		struct sk_buff *skb;
		unsigned int tso_segs, sent_pkts;
		unsigned int tso_segs, sent_pkts, unsent_segs = 0;
		int cwnd_quota;
		int result;

		@@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
		break;
		} else {
		if (!push_one && tcp_tso_should_defer(sk, skb))
		break;
		goto compute_unsent_segs;
		}

		/* TCP Small Queues :
		@@ -1949,9 +1950,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
		* there is no smp_mb__after_set_bit() yet
		*/
		smp_mb__after_clear_bit();
		if (atomic_read(&sk->sk_wmem_alloc) > limit)
		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
		u32 unsent_bytes;

		compute_unsent_segs:
		unsent_bytes = tp->write_seq - tp->snd_nxt;
		unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
		break;
		}
		}

		limit = mss_now;
		if (tso_segs > 1 && !tcp_urg_mode(tp))
		@@ -1990,7 +1997,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
		/* Send one loss probe per tail loss episode. */
		if (push_one != 2)
		tcp_schedule_loss_probe(sk);
		tcp_cwnd_validate(sk);
		tcp_cwnd_validate(sk, unsent_segs);
		return false;
		}
		return (push_one == 2) \|\| (!tp->packets_out && tcp_send_head(sk));