Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 570d6320 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-preempt'



Eric Dumazet says:

====================
net: make TCP preemptible

Most of TCP stack assumed it was running from BH handler.

This is great for most things, as TCP behavior is very sensitive
to scheduling artifacts.

However, the prequeue and backlog processing are problematic,
as they need to be flushed with BH being blocked.

To cope with modern needs, TCP sockets have big sk_rcvbuf values,
in the order of 16 MB, and soon 32 MB.
This means that backlog can hold thousands of packets, and things
like TCP coalescing or collapsing on this amount of packets can
lead to insane latency spikes, since BH are blocked for too long.

It is time to make UDP/TCP stacks preemptible.

Note that fast path still runs from BH handler.

v2: Added "tcp: make tcp_sendmsg() aware of socket backlog"
    to reduce latency problems of large sends.

v3: Fixed a typo in tcp_cdg.c
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5e59c83f d41a69f1
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -926,6 +926,17 @@ void sk_stream_kill_queues(struct sock *sk);
void sk_set_memalloc(struct sock *sk);
void sk_clear_memalloc(struct sock *sk);

void __sk_flush_backlog(struct sock *sk);

static inline bool sk_flush_backlog(struct sock *sk)
{
	if (unlikely(READ_ONCE(sk->sk_backlog.tail))) {
		__sk_flush_backlog(sk);
		return true;
	}
	return false;
}

int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);

struct request_sock_ops;
+15 −14
Original line number Diff line number Diff line
@@ -2019,33 +2019,27 @@ static void __release_sock(struct sock *sk)
	__releases(&sk->sk_lock.slock)
	__acquires(&sk->sk_lock.slock)
{
	struct sk_buff *skb = sk->sk_backlog.head;
	struct sk_buff *skb, *next;

	do {
	while ((skb = sk->sk_backlog.head) != NULL) {
		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
		bh_unlock_sock(sk);

		do {
			struct sk_buff *next = skb->next;
		spin_unlock_bh(&sk->sk_lock.slock);

		do {
			next = skb->next;
			prefetch(next);
			WARN_ON_ONCE(skb_dst_is_noref(skb));
			skb->next = NULL;
			sk_backlog_rcv(sk, skb);

			/*
			 * We are in process context here with softirqs
			 * disabled, use cond_resched_softirq() to preempt.
			 * This is safe to do because we've taken the backlog
			 * queue private:
			 */
			cond_resched_softirq();
			cond_resched();

			skb = next;
		} while (skb != NULL);

		bh_lock_sock(sk);
	} while ((skb = sk->sk_backlog.head) != NULL);
		spin_lock_bh(&sk->sk_lock.slock);
	}

	/*
	 * Doing the zeroing here guarantee we can not loop forever
@@ -2054,6 +2048,13 @@ static void __release_sock(struct sock *sk)
	sk->sk_backlog.len = 0;
}

void __sk_flush_backlog(struct sock *sk)
{
	spin_lock_bh(&sk->sk_lock.slock);
	__release_sock(sk);
	spin_unlock_bh(&sk->sk_lock.slock);
}

/**
 * sk_wait_data - wait for data to arrive at sk_receive_queue
 * @sk:    sock to wait on
+1 −1
Original line number Diff line number Diff line
@@ -359,7 +359,7 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
		goto discard;
	}

	__DCCP_INC_STATS(DCCP_MIB_INERRS);
	DCCP_INC_STATS(DCCP_MIB_INERRS);
discard:
	__kfree_skb(skb);
	return 0;
+2 −2
Original line number Diff line number Diff line
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
	bh_unlock_sock(ctl_sk);

	if (net_xmit_eval(err) == 0) {
		__DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
		__DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
	}
out:
	 dst_release(dst);
+2 −2
Original line number Diff line number Diff line
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
	if (!IS_ERR(dst)) {
		skb_dst_set(skb, dst);
		ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
		__DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
		__DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
		return;
	}

Loading