Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cec451ce authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-improving-RACK-cpu-performance'



Yuchung Cheng says:

====================
tcp: improving RACK cpu performance

This patch set improves the CPU consumption of the RACK TCP loss
recovery algorithm, in particular for high-speed networks. Currently,
for every ACK in recovery RACK can potentially iterate over all sent
packets in the write queue. On large BDP networks with non-trivial
losses the RACK write queue walk CPU usage becomes unreasonably high.

This patch introduces a new queue in TCP that keeps only skbs sent and
not yet (s)acked or marked lost, in time order instead of sequence
order.  With that, RACK can examine this time-sorted list and only
check packets that were sent recently, within the reordering window,
per ACK. This is the fastest way without any write queue walks. The
number of skbs examined per ACK is reduced by orders of magnitude.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b1fb67fa bef06223
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -617,6 +617,7 @@ typedef unsigned char *sk_buff_data_t;
 *	@nf_trace: netfilter packet trace flag
 *	@protocol: Packet protocol from driver
 *	@destructor: Destruct function
 *	@tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
 *	@_nfct: Associated connection, if any (with nfctinfo bits)
 *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
 *	@skb_iif: ifindex of device we arrived on
@@ -686,8 +687,14 @@ struct sk_buff {
	 */
	char			cb[48] __aligned(8);

	union {
		struct {
			unsigned long	_skb_refdst;
			void		(*destructor)(struct sk_buff *skb);
		};
		struct list_head	tcp_tsorted_anchor;
	};

#ifdef CONFIG_XFRM
	struct	sec_path	*sp;
#endif
+1 −0
Original line number Diff line number Diff line
@@ -191,6 +191,7 @@ struct tcp_sock {
	u32	tsoffset;	/* timestamp offset */

	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
	struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */

	u32	snd_wl1;	/* Sequence for window update		*/
	u32	snd_wnd;	/* The window we expect to receive	*/
+23 −1
Original line number Diff line number Diff line
@@ -1589,14 +1589,34 @@ enum tcp_chrono {
void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);

/* This helper is needed, because skb->tcp_tsorted_anchor uses
 * the same memory storage than skb->destructor/_skb_refdst
 */
static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
{
	skb->destructor = NULL;
	skb->_skb_refdst = 0UL;
}

#define tcp_skb_tsorted_save(skb) {		\
	unsigned long _save = skb->_skb_refdst;	\
	skb->_skb_refdst = 0UL;

#define tcp_skb_tsorted_restore(skb)		\
	skb->_skb_refdst = _save;		\
}

/* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk)
{
	struct sk_buff *skb;

	tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
		tcp_skb_tsorted_anchor_cleanup(skb);
		sk_wmem_free_skb(sk, skb);
	}
	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
	sk_mem_reclaim(sk);
	tcp_clear_all_retrans_hints(tcp_sk(sk));
}
@@ -1711,6 +1731,8 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new,

static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
{
	list_del(&skb->tcp_tsorted_anchor);
	tcp_skb_tsorted_anchor_cleanup(skb);
	__skb_unlink(skb, &sk->sk_write_queue);
}

+2 −0
Original line number Diff line number Diff line
@@ -415,6 +415,7 @@ void tcp_init_sock(struct sock *sk)
	tp->out_of_order_queue = RB_ROOT;
	tcp_init_xmit_timers(sk);
	INIT_LIST_HEAD(&tp->tsq_node);
	INIT_LIST_HEAD(&tp->tsorted_sent_queue);

	icsk->icsk_rto = TCP_TIMEOUT_INIT;
	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -881,6 +882,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
			 * available to the caller, no more, no less.
			 */
			skb->reserved_tailroom = skb->end - skb->tail - size;
			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
			return skb;
		}
		__kfree_skb(skb);
+7 −2
Original line number Diff line number Diff line
@@ -1593,6 +1593,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
						tcp_skb_pcount(skb),
						skb->skb_mstamp);
			tcp_rate_skb_delivered(sk, skb, state->rate);
			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
				list_del_init(&skb->tcp_tsorted_anchor);

			if (!before(TCP_SKB_CB(skb)->seq,
				    tcp_highest_sack_seq(tp)))
@@ -3054,8 +3056,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,

	shinfo = skb_shinfo(skb);
	if (!before(shinfo->tskey, prior_snd_una) &&
	    before(shinfo->tskey, tcp_sk(sk)->snd_una))
	    before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
		tcp_skb_tsorted_save(skb) {
			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
		} tcp_skb_tsorted_restore(skb);
	}
}

/* Remove acknowledged frames from the retransmission queue. If our packet
Loading