Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53e20678 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-remove-non-GSO-code'



Eric Dumazet says:

====================
tcp: remove non GSO code

Switching TCP to GSO mode, relying on core networking layers
to perform eventual adaptation for dumb devices was overdue.

1) Most TCP developments are done with TSO in mind.
2) Less high-resolution timers needs to be armed for TCP-pacing
3) GSO can benefit of xmit_more hint
4) Receiver GRO is more effective (as if TSO was used for real on sender)
   -> less ACK packets and overhead.
5) Write queues have less overhead (one skb holds about 64KB of payload)
6) SACK coalescing just works. (no payload in skb->head)
7) rtx rb-tree contains less packets, SACK is cheaper.
8) Removal of legacy code. Less maintenance hassles.

Note that I have left the sendpage/zerocopy paths, but they probably can
benefit from the same strategy.

Thanks to Oleksandr Natalenko for reporting a performance issue for BBR/fq_codel,
which was the main reason I worked on this patch series.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 960103ff 98be9b12
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -417,6 +417,7 @@ struct sock {
	struct page_frag	sk_frag;
	netdev_features_t	sk_route_caps;
	netdev_features_t	sk_route_nocaps;
	netdev_features_t	sk_route_forced_caps;
	int			sk_gso_type;
	unsigned int		sk_gso_max_size;
	gfp_t			sk_allocation;
@@ -1862,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
	sk->sk_route_caps &= ~flags;
}

static inline bool sk_check_csum_caps(struct sock *sk)
{
	return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
	       (sk->sk_family == PF_INET &&
		(sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
	       (sk->sk_family == PF_INET6 &&
		(sk->sk_route_caps & NETIF_F_IPV6_CSUM));
}

static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
					   struct iov_iter *from, char *to,
					   int copy, int offset)
+1 −1
Original line number Diff line number Diff line
@@ -1777,7 +1777,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
	u32 max_segs = 1;

	sk_dst_set(sk, dst);
	sk->sk_route_caps = dst->dev->features;
	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
	if (sk->sk_route_caps & NETIF_F_GSO)
		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
	sk->sk_route_caps &= ~sk->sk_route_nocaps;
+15 −42
Original line number Diff line number Diff line
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];

	sk_sockets_allocated_inc(sk);
	sk->sk_route_forced_caps = NETIF_F_GSO;
}
EXPORT_SYMBOL(tcp_init_sock);

@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
	struct tcp_sock *tp = tcp_sk(sk);
	u32 new_size_goal, size_goal;

	if (!large_allowed || !sk_can_gso(sk))
	if (!large_allowed)
		return mss_now;

	/* Note : tcp_tso_autosize() will eventually split this later */
@@ -1062,8 +1063,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
			size_t size, int flags)
{
	if (!(sk->sk_route_caps & NETIF_F_SG) ||
	    !sk_check_csum_caps(sk))
	if (!(sk->sk_route_caps & NETIF_F_SG))
		return sock_no_sendpage_locked(sk, page, offset, size, flags);

	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
@@ -1102,27 +1102,11 @@ static int linear_payload_sz(bool first_skb)
	return 0;
}

static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
static int select_size(bool first_skb, bool zc)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	int tmp = tp->mss_cache;

	if (sg) {
	if (zc)
		return 0;

		if (sk_can_gso(sk)) {
			tmp = linear_payload_sz(first_skb);
		} else {
			int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);

			if (tmp >= pgbreak &&
			    tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
				tmp = pgbreak;
		}
	}

	return tmp;
	return linear_payload_sz(first_skb);
}

void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1171,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
	int flags, err, copied = 0;
	int mss_now = 0, size_goal, copied_syn = 0;
	bool process_backlog = false;
	bool sg, zc = false;
	bool zc = false;
	long timeo;

	flags = msg->msg_flags;
@@ -1205,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
			goto out_err;
		}

		zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
		zc = sk->sk_route_caps & NETIF_F_SG;
		if (!zc)
			uarg->zerocopy = 0;
	}
@@ -1268,18 +1252,12 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
		goto do_error;

	sg = !!(sk->sk_route_caps & NETIF_F_SG);

	while (msg_data_left(msg)) {
		int copy = 0;
		int max = size_goal;

		skb = tcp_write_queue_tail(sk);
		if (skb) {
			if (skb->ip_summed == CHECKSUM_NONE)
				max = mss_now;
			copy = max - skb->len;
		}
		if (skb)
			copy = size_goal - skb->len;

		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
			bool first_skb;
@@ -1297,22 +1275,17 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
				goto restart;
			}
			first_skb = tcp_rtx_and_write_queues_empty(sk);
			linear = select_size(sk, sg, first_skb, zc);
			linear = select_size(first_skb, zc);
			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
						  first_skb);
			if (!skb)
				goto wait_for_memory;

			process_backlog = true;
			/*
			 * Check whether we can use HW checksum.
			 */
			if (sk_check_csum_caps(sk))
			skb->ip_summed = CHECKSUM_PARTIAL;

			skb_entail(sk, skb);
			copy = size_goal;
			max = size_goal;

			/* All packets are restored as if they have
			 * already been sent. skb_mstamp isn't set to
@@ -1343,7 +1316,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)

			if (!skb_can_coalesce(skb, i, pfrag->page,
					      pfrag->offset)) {
				if (i >= sysctl_max_skb_frags || !sg) {
				if (i >= sysctl_max_skb_frags) {
					tcp_mark_push(tp, skb);
					goto new_segment;
				}
@@ -1396,7 +1369,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
			goto out;
		}

		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
		if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
			continue;

		if (forced_push(tp)) {
+0 −3
Original line number Diff line number Diff line
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
	int len;
	int in_sack;

	if (!sk_can_gso(sk))
		goto fallback;

	/* Normally R but no L won't result in plain S */
	if (!dup_sack &&
	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
+3 −10
Original line number Diff line number Diff line
@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
	struct tcphdr *th = tcp_hdr(skb);

	if (skb->ip_summed == CHECKSUM_PARTIAL) {
	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
	skb->csum_start = skb_transport_header(skb) - skb->head;
	skb->csum_offset = offsetof(struct tcphdr, check);
	} else {
		th->check = tcp_v4_check(skb->len, saddr, daddr,
					 csum_partial(th,
						      th->doff << 2,
						      skb->csum));
	}
}

/* This routine computes an IPv4 TCP checksum. */
Loading