Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a918eb9f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'rt_cong_ctrl'

Daniel Borkmann says:

====================
net: allow setting congctl via routing table

This is the second part of our work and allows for setting the congestion
control algorithm via routing table. For details, please see individual
patches.

Since patch 1 is a bug fix, we suggest applying patch 1 to net, and then
merging net into net-next, for example, and following up with the remaining
feature patches wrt dependencies.

Joint work with Florian Westphal, suggested by Hannes Frederic Sowa.

Patch for iproute2 is available under [1], but will be reposted with along
with the man-page update when this set hits net-next.

  [1] http://patchwork.ozlabs.org/patch/418149/



Thanks!

v2 -> v3:
 - Added module auto-loading as suggested by David Miller, thanks!
  - Added patch 2 for handling possible sleeps in fib6
  - While working on this, we discovered a bug, hence fix in patch 1
  - Added auto-loading to patch 4
 - Rebased, retested, rest the same.
v1 -> v2:
 - Very sorry, I noticed I had decnet disabled during testing.
   Added missing header include in decnet, rest as is.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6cb69742 81164413
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -98,7 +98,8 @@ struct inet_connection_sock {
	const struct tcp_congestion_ops *icsk_ca_ops;
	const struct inet_connection_sock_af_ops *icsk_af_ops;
	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
	__u8			  icsk_ca_state;
	__u8			  icsk_ca_state:7,
				  icsk_ca_dst_locked:1;
	__u8			  icsk_retransmits;
	__u8			  icsk_pending;
	__u8			  icsk_backoff;
+7 −3
Original line number Diff line number Diff line
@@ -74,6 +74,11 @@ struct fib6_node {
#define FIB6_SUBTREE(fn)	((fn)->subtree)
#endif

struct mx6_config {
	const u32 *mx;
	DECLARE_BITMAP(mx_valid, RTAX_MAX);
};

/*
 *	routing information
 *
@@ -291,9 +296,8 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
		    void *arg);

int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
	     struct nlattr *mx, int mx_len);

int fib6_add(struct fib6_node *root, struct rt6_info *rt,
	     struct nl_info *info, struct mx6_config *mxc);
int fib6_del(struct rt6_info *rt, struct nl_info *info);

void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info);
+21 −1
Original line number Diff line number Diff line
@@ -448,6 +448,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(struct sock *sk,
				      struct request_sock *req,
				      struct sk_buff *skb);
void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
				  struct request_sock *req,
				  struct dst_entry *dst);
@@ -636,6 +637,11 @@ static inline u32 tcp_rto_min_us(struct sock *sk)
	return jiffies_to_usecs(tcp_rto_min(sk));
}

static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
{
	return dst_metric_locked(dst, RTAX_CC_ALGO);
}

/* Compute the actual receive window we are currently advertising.
 * Rcv_nxt can be after the window if our peer push more data
 * than the offered window.
@@ -787,6 +793,8 @@ enum tcp_ca_ack_event_flags {
#define TCP_CA_MAX	128
#define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)

#define TCP_CA_UNSPEC	0

/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
@@ -794,7 +802,8 @@ enum tcp_ca_ack_event_flags {

struct tcp_congestion_ops {
	struct list_head	list;
	unsigned long flags;
	u32 key;
	u32 flags;

	/* initialize private data (optional) */
	void (*init)(struct sock *sk);
@@ -841,6 +850,17 @@ u32 tcp_reno_ssthresh(struct sock *sk);
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;

struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
u32 tcp_ca_get_key_by_name(const char *name);
#ifdef CONFIG_INET
char *tcp_ca_get_name_by_key(u32 key, char *buffer);
#else
static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
{
	return NULL;
}
#endif

static inline bool tcp_ca_needs_ecn(const struct sock *sk)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
+2 −0
Original line number Diff line number Diff line
@@ -389,6 +389,8 @@ enum {
#define RTAX_INITRWND RTAX_INITRWND
	RTAX_QUICKACK,
#define RTAX_QUICKACK RTAX_QUICKACK
	RTAX_CC_ALGO,
#define RTAX_CC_ALGO RTAX_CC_ALGO
	__RTAX_MAX
};

+13 −2
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
@@ -669,10 +670,20 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)

	for (i = 0; i < RTAX_MAX; i++) {
		if (metrics[i]) {
			valid++;
			if (i == RTAX_CC_ALGO - 1) {
				char tmp[TCP_CA_NAME_MAX], *name;

				name = tcp_ca_get_name_by_key(metrics[i], tmp);
				if (!name)
					continue;
				if (nla_put_string(skb, i + 1, name))
					goto nla_put_failure;
			} else {
				if (nla_put_u32(skb, i + 1, metrics[i]))
					goto nla_put_failure;
			}
			valid++;
		}
	}

	if (!valid) {
Loading