Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d66280b1 authored by Peter Oskolkov's avatar Peter Oskolkov Committed by David S. Miller
Browse files

net: netem: use a list in addition to rbtree



When testing high-bandwidth TCP streams with large windows,
high latency, and low jitter, netem consumes a lot of CPU cycles
doing rbtree rebalancing.

This patch uses a linear list/queue in addition to the rbtree:
if an incoming packet is past the tail of the linear queue, it is
added there, otherwise it is inserted into the rbtree.

Without this patch, perf shows netem_enqueue, netem_dequeue,
and rb_* functions among the top offenders. With this patch,
only netem_enqueue is noticeable if jitter is low/absent.

Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarPeter Oskolkov <posk@google.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 932c4417
Loading
Loading
Loading
Loading
+69 −20
Original line number Diff line number Diff line
@@ -77,6 +77,10 @@ struct netem_sched_data {
	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
	struct rb_root t_root;

	/* a linear queue; reduces rbtree rebalancing when jitter is low */
	struct sk_buff	*t_head;
	struct sk_buff	*t_tail;

	/* optional qdisc for classful handling (NULL at netem init) */
	struct Qdisc	*qdisc;

@@ -369,12 +373,24 @@ static void tfifo_reset(struct Qdisc *sch)
		rb_erase(&skb->rbnode, &q->t_root);
		rtnl_kfree_skbs(skb, skb);
	}

	rtnl_kfree_skbs(q->t_head, q->t_tail);
	q->t_head = NULL;
	q->t_tail = NULL;
}

static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
	struct netem_sched_data *q = qdisc_priv(sch);
	u64 tnext = netem_skb_cb(nskb)->time_to_send;

	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
		if (q->t_tail)
			q->t_tail->next = nskb;
		else
			q->t_head = nskb;
		q->t_tail = nskb;
	} else {
		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;

		while (*p) {
@@ -389,6 +405,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
		}
		rb_link_node(&nskb->rbnode, parent, p);
		rb_insert_color(&nskb->rbnode, &q->t_root);
	}
	sch->q.qlen++;
}

@@ -530,9 +547,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
				t_skb = skb_rb_last(&q->t_root);
				t_last = netem_skb_cb(t_skb);
				if (!last ||
				    t_last->time_to_send > last->time_to_send) {
				    t_last->time_to_send > last->time_to_send)
					last = t_last;
			}
			if (q->t_tail) {
				struct netem_skb_cb *t_last =
					netem_skb_cb(q->t_tail);

				if (!last ||
				    t_last->time_to_send > last->time_to_send)
					last = t_last;
			}

			if (last) {
@@ -611,11 +635,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
	q->slot.bytes_left = q->slot_config.max_bytes;
}

static struct sk_buff *netem_peek(struct netem_sched_data *q)
{
	struct sk_buff *skb = skb_rb_first(&q->t_root);
	u64 t1, t2;

	if (!skb)
		return q->t_head;
	if (!q->t_head)
		return skb;

	t1 = netem_skb_cb(skb)->time_to_send;
	t2 = netem_skb_cb(q->t_head)->time_to_send;
	if (t1 < t2)
		return skb;
	return q->t_head;
}

static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
{
	if (skb == q->t_head) {
		q->t_head = skb->next;
		if (!q->t_head)
			q->t_tail = NULL;
	} else {
		rb_erase(&skb->rbnode, &q->t_root);
	}
}

static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
	struct netem_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;
	struct rb_node *p;

tfifo_dequeue:
	skb = __qdisc_dequeue_head(&sch->q);
@@ -625,20 +676,18 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
		qdisc_bstats_update(sch, skb);
		return skb;
	}
	p = rb_first(&q->t_root);
	if (p) {
	skb = netem_peek(q);
	if (skb) {
		u64 time_to_send;
		u64 now = ktime_get_ns();

		skb = rb_to_skb(p);

		/* if more time remaining? */
		time_to_send = netem_skb_cb(skb)->time_to_send;
		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
			get_slot_next(q, now);

		if (time_to_send <= now && q->slot.slot_next <= now) {
			rb_erase(p, &q->t_root);
			netem_erase_head(q, skb);
			sch->q.qlen--;
			qdisc_qstats_backlog_dec(sch, skb);
			skb->next = NULL;