Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 029ac211 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-sched-singly-linked-list'



Florian Westphal says:

====================
sched: convert queues to single-linked list

During Netfilter Workshop 2016 Eric Dumazet pointed out that qdisc
schedulers use doubly-linked lists, even though single-linked list
would be enough.

The double-linked skb lists incur one extra write on enqueue/dequeue
operations (to change ->prev pointer of next list elem).

This series converts qdiscs to single-linked version, listhead
maintains pointers to first (for dequeue) and last skb (for enqueue).

Most qdiscs don't queue at all and instead use a leaf qdisc (typically
pfifo_fast) so only a few schedulers needed changes.

I briefly tested netem and htb and they seemed fine.

UDP_STREAM netperf with 64 byte packets via veth+pfifo_fast shows
a small (~2%) improvement.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 106323b9 48da34b7
Loading
Loading
Loading
Loading
+56 −16
Original line number Diff line number Diff line
@@ -36,6 +36,14 @@ struct qdisc_size_table {
	u16			data[];
};

/* similar to sk_buff_head, but skb->prev pointer is undefined. */
struct qdisc_skb_head {
	struct sk_buff	*head;
	struct sk_buff	*tail;
	__u32		qlen;
	spinlock_t	lock;
};

struct Qdisc {
	int 			(*enqueue)(struct sk_buff *skb,
					   struct Qdisc *sch,
@@ -76,7 +84,7 @@ struct Qdisc {
	 * For performance sake on SMP, we put highly modified fields at the end
	 */
	struct sk_buff		*gso_skb ____cacheline_aligned_in_smp;
	struct sk_buff_head	q;
	struct qdisc_skb_head	q;
	struct gnet_stats_basic_packed bstats;
	seqcount_t		running;
	struct gnet_stats_queue	qstats;
@@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
	sch->qstats.overlimits++;
}

static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
{
	qh->head = NULL;
	qh->tail = NULL;
	qh->qlen = 0;
}

static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
				       struct sk_buff_head *list)
				       struct qdisc_skb_head *qh)
{
	__skb_queue_tail(list, skb);
	struct sk_buff *last = qh->tail;

	if (last) {
		skb->next = NULL;
		last->next = skb;
		qh->tail = skb;
	} else {
		qh->tail = skb;
		qh->head = skb;
	}
	qh->qlen++;
	qdisc_qstats_backlog_inc(sch, skb);

	return NET_XMIT_SUCCESS;
@@ -614,14 +639,16 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
	return __qdisc_enqueue_tail(skb, sch, &sch->q);
}

static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
						   struct sk_buff_head *list)
static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
{
	struct sk_buff *skb = __skb_dequeue(list);
	struct sk_buff *skb = qh->head;

	if (likely(skb != NULL)) {
		qdisc_qstats_backlog_dec(sch, skb);
		qdisc_bstats_update(sch, skb);
		qh->head = skb->next;
		qh->qlen--;
		if (qh->head == NULL)
			qh->tail = NULL;
		skb->next = NULL;
	}

	return skb;
@@ -629,7 +656,14 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,

static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{
	return __qdisc_dequeue_head(sch, &sch->q);
	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);

	if (likely(skb != NULL)) {
		qdisc_qstats_backlog_dec(sch, skb);
		qdisc_bstats_update(sch, skb);
	}

	return skb;
}

/* Instead of calling kfree_skb() while root qdisc lock is held,
@@ -642,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
}

static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
						   struct sk_buff_head *list,
						   struct qdisc_skb_head *qh,
						   struct sk_buff **to_free)
{
	struct sk_buff *skb = __skb_dequeue(list);
	struct sk_buff *skb = __qdisc_dequeue_head(qh);

	if (likely(skb != NULL)) {
		unsigned int len = qdisc_pkt_len(skb);
@@ -666,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,

static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
{
	return skb_peek(&sch->q);
	const struct qdisc_skb_head *qh = &sch->q;

	return qh->head;
}

/* generic pseudo peek method for non-work-conserving qdisc */
@@ -701,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
	return skb;
}

static inline void __qdisc_reset_queue(struct sk_buff_head *list)
static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
{
	/*
	 * We do not know the backlog in bytes of this list, it
	 * is up to the caller to correct it
	 */
	if (!skb_queue_empty(list)) {
		rtnl_kfree_skbs(list->next, list->prev);
		__skb_queue_head_init(list);
	ASSERT_RTNL();
	if (qh->qlen) {
		rtnl_kfree_skbs(qh->head, qh->tail);

		qh->head = NULL;
		qh->tail = NULL;
		qh->qlen = 0;
	}
}

+2 −2
Original line number Diff line number Diff line
@@ -69,7 +69,7 @@ struct codel_sched_data {
static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
{
	struct Qdisc *sch = ctx;
	struct sk_buff *skb = __skb_dequeue(&sch->q);
	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);

	if (skb)
		sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)

	qlen = sch->q.qlen;
	while (sch->q.qlen > sch->limit) {
		struct sk_buff *skb = __skb_dequeue(&sch->q);
		struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);

		dropped += qdisc_pkt_len(skb);
		qdisc_qstats_backlog_dec(sch, skb);
+2 −2
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
			 struct sk_buff **to_free)
{
	if (likely(skb_queue_len(&sch->q) < sch->limit))
	if (likely(sch->q.qlen < sch->limit))
		return qdisc_enqueue_tail(skb, sch);

	return qdisc_drop(skb, sch, to_free);
@@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
	unsigned int prev_backlog;

	if (likely(skb_queue_len(&sch->q) < sch->limit))
	if (likely(sch->q.qlen < sch->limit))
		return qdisc_enqueue_tail(skb, sch);

	prev_backlog = sch->qstats.backlog;
+17 −11
Original line number Diff line number Diff line
@@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = {
 */
struct pfifo_fast_priv {
	u32 bitmap;
	struct sk_buff_head q[PFIFO_FAST_BANDS];
	struct qdisc_skb_head q[PFIFO_FAST_BANDS];
};

/*
@@ -477,7 +477,7 @@ struct pfifo_fast_priv {
 */
static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};

static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
					     int band)
{
	return priv->q + band;
@@ -486,10 +486,10 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
			      struct sk_buff **to_free)
{
	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
	if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
		int band = prio2band[skb->priority & TC_PRIO_MAX];
		struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
		struct sk_buff_head *list = band2list(priv, band);
		struct qdisc_skb_head *list = band2list(priv, band);

		priv->bitmap |= (1 << band);
		qdisc->q.qlen++;
@@ -505,11 +505,16 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
	int band = bitmap2band[priv->bitmap];

	if (likely(band >= 0)) {
		struct sk_buff_head *list = band2list(priv, band);
		struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
		struct qdisc_skb_head *qh = band2list(priv, band);
		struct sk_buff *skb = __qdisc_dequeue_head(qh);

		if (likely(skb != NULL)) {
			qdisc_qstats_backlog_dec(qdisc, skb);
			qdisc_bstats_update(qdisc, skb);
		}

		qdisc->q.qlen--;
		if (skb_queue_empty(list))
		if (qh->qlen == 0)
			priv->bitmap &= ~(1 << band);

		return skb;
@@ -524,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
	int band = bitmap2band[priv->bitmap];

	if (band >= 0) {
		struct sk_buff_head *list = band2list(priv, band);
		struct qdisc_skb_head *qh = band2list(priv, band);

		return skb_peek(list);
		return qh->head;
	}

	return NULL;
@@ -564,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__skb_queue_head_init(band2list(priv, prio));
		qdisc_skb_head_init(band2list(priv, prio));

	/* Can by-pass the queue discipline */
	qdisc->flags |= TCQ_F_CAN_BYPASS;
@@ -612,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
		sch->padded = (char *) sch - (char *) p;
	}
	skb_queue_head_init(&sch->q);
	qdisc_skb_head_init(&sch->q);
	spin_lock_init(&sch->q.lock);

	spin_lock_init(&sch->busylock);
	lockdep_set_class(&sch->busylock,
+20 −4
Original line number Diff line number Diff line
@@ -162,7 +162,7 @@ struct htb_sched {
	struct work_struct	work;

	/* non shaped skbs; let them go directly thru */
	struct sk_buff_head	direct_queue;
	struct qdisc_skb_head	direct_queue;
	long			direct_pkts;

	struct qdisc_watchdog	watchdog;
@@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
	list_del_init(&cl->un.leaf.drop_list);
}

static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
			     struct qdisc_skb_head *qh)
{
	struct sk_buff *last = qh->tail;

	if (last) {
		skb->next = NULL;
		last->next = skb;
		qh->tail = skb;
	} else {
		qh->tail = skb;
		qh->head = skb;
	}
	qh->qlen++;
}

static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
		       struct sk_buff **to_free)
{
@@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
	if (cl == HTB_DIRECT) {
		/* enqueue to helper queue */
		if (q->direct_queue.qlen < q->direct_qlen) {
			__skb_queue_tail(&q->direct_queue, skb);
			htb_enqueue_tail(skb, sch, &q->direct_queue);
			q->direct_pkts++;
		} else {
			return qdisc_drop(skb, sch, to_free);
@@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
	unsigned long start_at;

	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
	skb = __skb_dequeue(&q->direct_queue);
	skb = __qdisc_dequeue_head(&q->direct_queue);
	if (skb != NULL) {
ok:
		qdisc_bstats_update(sch, skb);
@@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)

	qdisc_watchdog_init(&q->watchdog, sch);
	INIT_WORK(&q->work, htb_work_func);
	__skb_queue_head_init(&q->direct_queue);
	qdisc_skb_head_init(&q->direct_queue);

	if (tb[TCA_HTB_DIRECT_QLEN])
		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
Loading