Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ffd177de authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller
Browse files

tcp: refactor DCTCP ECN ACK handling

DCTCP has two parts - a new ECN signalling mechanism and the response
function to it. The first part can be used by other congestion
control for DCTCP-ECN deployed networks. This patch moves that part
into a separate tcp_dctcp.h to be used by other congestion control
module (like how Yeah uses Vegas algorithmas). For example, BBR is
experimenting such ECN signal currently
https://tinyurl.com/ietf-102-iccrg-bbr2



Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarYousuk Seung <ysseung@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ed792e28
Loading
Loading
Loading
Loading
+4 −51
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#include <linux/mm.h>
#include <net/tcp.h>
#include <linux/inet_diag.h>
#include "tcp_dctcp.h"

#define DCTCP_MAX_ALPHA	1024U

@@ -118,54 +119,6 @@ static u32 dctcp_ssthresh(struct sock *sk)
	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}

/* Minimal DCTP CE state machine:
 *
 * S:	0 <- last pkt was non-CE
 *	1 <- last pkt was CE
 */

static void dctcp_ce_state_0_to_1(struct sock *sk)
{
	struct dctcp *ca = inet_csk_ca(sk);
	struct tcp_sock *tp = tcp_sk(sk);

	if (!ca->ce_state) {
		/* State has changed from CE=0 to CE=1, force an immediate
		 * ACK to reflect the new CE state. If an ACK was delayed,
		 * send that first to reflect the prior CE state.
		 */
		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
			__tcp_send_ack(sk, ca->prior_rcv_nxt);
		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
	}

	ca->prior_rcv_nxt = tp->rcv_nxt;
	ca->ce_state = 1;

	tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}

static void dctcp_ce_state_1_to_0(struct sock *sk)
{
	struct dctcp *ca = inet_csk_ca(sk);
	struct tcp_sock *tp = tcp_sk(sk);

	if (ca->ce_state) {
		/* State has changed from CE=1 to CE=0, force an immediate
		 * ACK to reflect the new CE state. If an ACK was delayed,
		 * send that first to reflect the prior CE state.
		 */
		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
			__tcp_send_ack(sk, ca->prior_rcv_nxt);
		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
	}

	ca->prior_rcv_nxt = tp->rcv_nxt;
	ca->ce_state = 0;

	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}

static void dctcp_update_alpha(struct sock *sk, u32 flags)
{
	const struct tcp_sock *tp = tcp_sk(sk);
@@ -230,12 +183,12 @@ static void dctcp_state(struct sock *sk, u8 new_state)

static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
	struct dctcp *ca = inet_csk_ca(sk);

	switch (ev) {
	case CA_EVENT_ECN_IS_CE:
		dctcp_ce_state_0_to_1(sk);
		break;
	case CA_EVENT_ECN_NO_CE:
		dctcp_ce_state_1_to_0(sk);
		dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
		break;
	default:
		/* Don't care for the rest. */

net/ipv4/tcp_dctcp.h

0 → 100644
+40 −0
Original line number Diff line number Diff line
#ifndef _TCP_DCTCP_H
#define _TCP_DCTCP_H

static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
{
	struct tcp_sock *tp = tcp_sk(sk);

	if (ce_state == 1)
		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
	else
		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}

/* Minimal DCTP CE state machine:
 *
 * S:	0 <- last pkt was non-CE
 *	1 <- last pkt was CE
 */
static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
					u32 *prior_rcv_nxt, u32 *ce_state)
{
	u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;

	if (*ce_state != new_ce_state) {
		/* CE state has changed, force an immediate ACK to
		 * reflect the new CE state. If an ACK was delayed,
		 * send that first to reflect the prior CE state.
		 */
		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
			dctcp_ece_ack_cwr(sk, *ce_state);
			__tcp_send_ack(sk, *prior_rcv_nxt);
		}
		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
	}
	*prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
	*ce_state = new_ce_state;
	dctcp_ece_ack_cwr(sk, new_ce_state);
}

#endif