Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c3a8d947 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by David S. Miller
Browse files

tcp: use dctcp if enabled on the route to the initiator



Currently, the following case doesn't use DCTCP, even if it should:
A responder has f.e. Cubic as system wide default, but for a specific
route to the initiating host, DCTCP is being set in RTAX_CC_ALGO. The
initiating host then uses DCTCP as congestion control, but since the
initiator sets ECT(0), tcp_ecn_create_request() doesn't set ecn_ok,
and we have to fall back to Reno after 3WHS completes.

We were thinking on how to solve this in a minimal, non-intrusive
way without bloating tcp_ecn_create_request() needlessly: lets cache
the CA ecn option flag in RTAX_FEATURES. In other words, when ECT(0)
is set on the SYN packet, set ecn_ok=1 iff route RTAX_FEATURES
contains the unexposed (internal-only) DST_FEATURE_ECN_CA. This allows
to only do a single metric feature lookup inside tcp_ecn_create_request().

Joint work with Florian Westphal.

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b8d3e416
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -207,6 +207,12 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
		p[metric-1] = val;
}

/* Kernel-internal feature bits that are unallocated in user space. */
#define DST_FEATURE_ECN_CA	(1 << 31)

#define DST_FEATURE_MASK	(DST_FEATURE_ECN_CA)
#define DST_FEATURE_ECN_MASK	(DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN)

static inline u32
dst_feature(const struct dst_entry *dst, u32 feature)
{
+1 −1
Original line number Diff line number Diff line
@@ -888,7 +888,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;

struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
u32 tcp_ca_get_key_by_name(const char *name);
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
#ifdef CONFIG_INET
char *tcp_ca_get_name_by_key(u32 key, char *buffer);
#else
+6 −0
Original line number Diff line number Diff line
@@ -678,6 +678,12 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
					continue;
				if (nla_put_string(skb, i + 1, name))
					goto nla_put_failure;
			} else if (i == RTAX_FEATURES - 1) {
				u32 user_features = metrics[i] & RTAX_FEATURE_MASK;

				BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
				if (nla_put_u32(skb, i + 1, user_features))
					goto nla_put_failure;
			} else {
				if (nla_put_u32(skb, i + 1, metrics[i]))
					goto nla_put_failure;
+5 −1
Original line number Diff line number Diff line
@@ -879,6 +879,7 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
	bool ecn_ca = false;
	struct nlattr *nla;
	int remaining;

@@ -898,7 +899,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
			char tmp[TCP_CA_NAME_MAX];

			nla_strlcpy(tmp, nla, sizeof(tmp));
			val = tcp_ca_get_key_by_name(tmp);
			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
			if (val == TCP_CA_UNSPEC)
				return -EINVAL;
		} else {
@@ -913,6 +914,9 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
		fi->fib_metrics[type - 1] = val;
	}

	if (ecn_ca)
		fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;

	return 0;
}

+6 −3
Original line number Diff line number Diff line
@@ -114,16 +114,19 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);

u32 tcp_ca_get_key_by_name(const char *name)
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
{
	const struct tcp_congestion_ops *ca;
	u32 key;
	u32 key = TCP_CA_UNSPEC;

	might_sleep();

	rcu_read_lock();
	ca = __tcp_ca_find_autoload(name);
	key = ca ? ca->key : TCP_CA_UNSPEC;
	if (ca) {
		key = ca->key;
		*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
	}
	rcu_read_unlock();

	return key;
Loading