Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bffa72cf authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net: sk_buff rbnode reorg



skb->rbnode shares space with skb->next, skb->prev and skb->tstamp

Current uses (TCP receive ofo queue and netem) need to save/restore
tstamp, while skb->dev is either NULL (TCP) or a constant for a given
queue (netem).

Since we plan using an RB tree for TCP retransmit queue to speedup SACK
processing with large BDP, this patch exchanges skb->dev and
skb->tstamp.

This saves some overhead in both TCP and netem.

v2: removes the swtstamp field from struct tcp_skb_cb

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a38b2fa3
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -661,8 +661,12 @@ struct sk_buff {
			struct sk_buff		*prev;

			union {
				ktime_t		tstamp;
				u64		skb_mstamp;
				struct net_device	*dev;
				/* Some protocols might use this space to store information,
				 * while device pointer would be NULL.
				 * UDP receive path is one user.
				 */
				unsigned long		dev_scratch;
			};
		};
		struct rb_node	rbnode; /* used in netem & tcp stack */
@@ -670,12 +674,8 @@ struct sk_buff {
	struct sock		*sk;

	union {
		struct net_device	*dev;
		/* Some protocols might use this space to store information,
		 * while device pointer would be NULL.
		 * UDP receive path is one user.
		 */
		unsigned long		dev_scratch;
		ktime_t		tstamp;
		u64		skb_mstamp;
	};
	/*
	 * This is the control buffer. It is free to use for every
+0 −6
Original line number Diff line number Diff line
@@ -797,12 +797,6 @@ struct tcp_skb_cb {
			u16	tcp_gso_segs;
			u16	tcp_gso_size;
		};

		/* Used to stash the receive timestamp while this skb is in the
		 * out of order queue, as skb->tstamp is overwritten by the
		 * rbnode.
		 */
		ktime_t		swtstamp;
	};
	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/

+5 −22
Original line number Diff line number Diff line
@@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
	tp->rx_opt.num_sacks = num_sacks;
}

enum tcp_queue {
	OOO_QUEUE,
	RCV_QUEUE,
};

/**
 * tcp_try_coalesce - try to merge skb to prior one
 * @sk: socket
@@ -4286,7 +4281,6 @@ enum tcp_queue {
 * Returns true if caller should free @from instead of queueing it
 */
static bool tcp_try_coalesce(struct sock *sk,
			     enum tcp_queue dest,
			     struct sk_buff *to,
			     struct sk_buff *from,
			     bool *fragstolen)
@@ -4311,9 +4305,6 @@ static bool tcp_try_coalesce(struct sock *sk,

	if (TCP_SKB_CB(from)->has_rxtstamp) {
		TCP_SKB_CB(to)->has_rxtstamp = true;
		if (dest == OOO_QUEUE)
			TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
		else
		to->tstamp = from->tstamp;
	}

@@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk)
		}
		p = rb_next(p);
		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
		/* Replace tstamp which was stomped by rbnode */
		if (TCP_SKB_CB(skb)->has_rxtstamp)
			skb->tstamp = TCP_SKB_CB(skb)->swtstamp;

		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
			SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk)
			   TCP_SKB_CB(skb)->end_seq);

		tail = skb_peek_tail(&sk->sk_receive_queue);
		eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
						 tail, skb, &fragstolen);
		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
		if (!eaten)
@@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
		return;
	}

	/* Stash tstamp to avoid being stomped on by rbnode */
	if (TCP_SKB_CB(skb)->has_rxtstamp)
		TCP_SKB_CB(skb)->swtstamp = skb->tstamp;

	/* Disable header prediction. */
	tp->pred_flags = 0;
	inet_csk_schedule_ack(sk);
@@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
	/* In the typical case, we are adding an skb to the end of the list.
	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
	 */
	if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
	if (tcp_try_coalesce(sk, tp->ooo_last_skb,
			     skb, &fragstolen)) {
coalesce_done:
		tcp_grow_window(sk, skb);
@@ -4502,7 +4485,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
				__kfree_skb(skb1);
				goto merge_right;
			}
		} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
		} else if (tcp_try_coalesce(sk, skb1,
					    skb, &fragstolen)) {
			goto coalesce_done;
		}
@@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int

	__skb_pull(skb, hdrlen);
	eaten = (tail &&
		 tcp_try_coalesce(sk, RCV_QUEUE, tail,
		 tcp_try_coalesce(sk, tail,
				  skb, fragstolen)) ? 1 : 0;
	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
	if (!eaten) {
+4 −3
Original line number Diff line number Diff line
@@ -146,7 +146,6 @@ struct netem_sched_data {
 */
struct netem_skb_cb {
	psched_time_t	time_to_send;
	ktime_t		tstamp_save;
};


@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
		}

		cb->time_to_send = now + delay;
		cb->tstamp_save = skb->tstamp;
		++q->counter;
		tfifo_enqueue(skb, sch);
	} else {
@@ -629,7 +627,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
			qdisc_qstats_backlog_dec(sch, skb);
			skb->next = NULL;
			skb->prev = NULL;
			skb->tstamp = netem_skb_cb(skb)->tstamp_save;
			/* skb->dev shares skb->rbnode area,
			 * we need to restore its value.
			 */
			skb->dev = qdisc_dev(sch);

#ifdef CONFIG_NET_CLS_ACT
			/*