Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 520ac30f authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net_sched: drop packets after root qdisc lock is released



Qdisc performance suffers when packets are dropped at enqueue()
time because drops (kfree_skb()) are done while qdisc lock is held,
delaying a dequeue() draining the queue.

Nominal throughput can be reduced by 50 % when this happens,
at a time we would like the dequeue() to proceed as fast as possible.

Even FQ is vulnerable to this problem, while one of FQ goals was
to provide some flow isolation.

This patch adds a 'struct sk_buff **to_free' parameter to all
qdisc->enqueue(), and in qdisc_drop() helper.

I measured a performance increase of up to 12 %, but this patch
is a prereq so that future batches in enqueue() can fly.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 36195d86
Loading
Loading
Loading
Loading
+30 −11
Original line number Diff line number Diff line
@@ -37,8 +37,10 @@ struct qdisc_size_table {
};

struct Qdisc {
	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
	struct sk_buff *	(*dequeue)(struct Qdisc *dev);
	int 			(*enqueue)(struct sk_buff *skb,
					   struct Qdisc *sch,
					   struct sk_buff **to_free);
	struct sk_buff *	(*dequeue)(struct Qdisc *sch);
	unsigned int		flags;
#define TCQ_F_BUILTIN		1
#define TCQ_F_INGRESS		2
@@ -160,7 +162,9 @@ struct Qdisc_ops {
	char			id[IFNAMSIZ];
	int			priv_size;

	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
	int 			(*enqueue)(struct sk_buff *skb,
					   struct Qdisc *sch,
					   struct sk_buff **to_free);
	struct sk_buff *	(*dequeue)(struct Qdisc *);
	struct sk_buff *	(*peek)(struct Qdisc *);

@@ -498,10 +502,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
#endif
}

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
				struct sk_buff **to_free)
{
	qdisc_calculate_pkt_len(skb, sch);
	return sch->enqueue(skb, sch);
	return sch->enqueue(skb, sch, to_free);
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
@@ -626,24 +631,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
	return __qdisc_dequeue_head(sch, &sch->q);
}

/* Instead of calling kfree_skb() while root qdisc lock is held,
 * queue the skb for future freeing at end of __dev_xmit_skb()
 */
static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
{
	skb->next = *to_free;
	*to_free = skb;
}

static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
					      struct sk_buff_head *list)
						   struct sk_buff_head *list,
						   struct sk_buff **to_free)
{
	struct sk_buff *skb = __skb_dequeue(list);

	if (likely(skb != NULL)) {
		unsigned int len = qdisc_pkt_len(skb);

		qdisc_qstats_backlog_dec(sch, skb);
		kfree_skb(skb);
		__qdisc_drop(skb, to_free);
		return len;
	}

	return 0;
}

static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
						 struct sk_buff **to_free)
{
	return __qdisc_queue_drop_head(sch, &sch->q);
	return __qdisc_queue_drop_head(sch, &sch->q, to_free);
}

static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
@@ -724,9 +741,11 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
	qdisc_qstats_drop(sch);
}

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
			     struct sk_buff **to_free)
{
	kfree_skb(skb);
	__qdisc_drop(skb, to_free);
	qdisc_qstats_drop(sch);

	return NET_XMIT_DROP;
+5 −2
Original line number Diff line number Diff line
@@ -3070,6 +3070,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
				 struct netdev_queue *txq)
{
	spinlock_t *root_lock = qdisc_lock(q);
	struct sk_buff *to_free = NULL;
	bool contended;
	int rc;

@@ -3086,7 +3087,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

	spin_lock(root_lock);
	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
		kfree_skb(skb);
		__qdisc_drop(skb, &to_free);
		rc = NET_XMIT_DROP;
	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
		   qdisc_run_begin(q)) {
@@ -3109,7 +3110,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

		rc = NET_XMIT_SUCCESS;
	} else {
		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
		if (qdisc_run_begin(q)) {
			if (unlikely(contended)) {
				spin_unlock(&q->busylock);
@@ -3119,6 +3120,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
		}
	}
	spin_unlock(root_lock);
	if (unlikely(to_free))
		kfree_skb_list(to_free);
	if (unlikely(contended))
		spin_unlock(&q->busylock);
	return rc;
+5 −4
Original line number Diff line number Diff line
@@ -357,7 +357,8 @@ static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,

/* --------------------------- Qdisc operations ---------------------------- */

static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
			  struct sk_buff **to_free)
{
	struct atm_qdisc_data *p = qdisc_priv(sch);
	struct atm_flow_data *flow;
@@ -398,10 +399,10 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
		switch (result) {
		case TC_ACT_QUEUED:
		case TC_ACT_STOLEN:
			kfree_skb(skb);
			__qdisc_drop(skb, to_free);
			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
		case TC_ACT_SHOT:
			kfree_skb(skb);
			__qdisc_drop(skb, to_free);
			goto drop;
		case TC_ACT_RECLASSIFY:
			if (flow->excess)
@@ -413,7 +414,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#endif
	}

	ret = qdisc_enqueue(skb, flow->q);
	ret = qdisc_enqueue(skb, flow->q, to_free);
	if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
		if (net_xmit_drop_count(ret)) {
+3 −2
Original line number Diff line number Diff line
@@ -17,9 +17,10 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>

static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
			     struct sk_buff **to_free)
{
	qdisc_drop(skb, sch);
	qdisc_drop(skb, sch, to_free);
	return NET_XMIT_SUCCESS;
}

+4 −3
Original line number Diff line number Diff line
@@ -358,7 +358,8 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}

static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
	    struct sk_buff **to_free)
{
	struct cbq_sched_data *q = qdisc_priv(sch);
	int uninitialized_var(ret);
@@ -370,11 +371,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
	if (cl == NULL) {
		if (ret & __NET_XMIT_BYPASS)
			qdisc_qstats_drop(sch);
		kfree_skb(skb);
		__qdisc_drop(skb, to_free);
		return ret;
	}

	ret = qdisc_enqueue(skb, cl->q);
	ret = qdisc_enqueue(skb, cl->q, to_free);
	if (ret == NET_XMIT_SUCCESS) {
		sch->q.qlen++;
		cbq_mark_toplevel(q, cl);
Loading