Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 225d9b89 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

sch_sfq: rehash queues in perturb timer



A known Out Of Order (OOO) problem hurts SFQ when timer changes
perturbation value, since all new packets delivered to SFQ enqueue might
end on different slots than previous in-flight packets.

With round robin delivery, we can thus deliver packets in a different
order.

Since SFQ is limited to small amount of in-flight packets, we can rehash
packets so that this OOO problem is fixed.

This rehashing is performed only if internal flow classifier is in use.

We now store in skb->cb[] the "struct flow_keys" so that we dont call
skb_flow_dissect() again while rehashing.

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4e68ea26
Loading
Loading
Loading
Loading
+81 −6
Original line number Diff line number Diff line
@@ -136,16 +136,30 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
	return &q->dep[val - SFQ_SLOTS];
}

/*
 * In order to be able to quickly rehash our queue when timer changes
 * q->perturbation, we store flow_keys in skb->cb[]
 */
struct sfq_skb_cb {
       struct flow_keys        keys;
};

static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
{
       BUILD_BUG_ON(sizeof(skb->cb) <
               sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
       return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
}

static unsigned int sfq_hash(const struct sfq_sched_data *q,
			     const struct sk_buff *skb)
{
	struct flow_keys keys;
	const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
	unsigned int hash;

	skb_flow_dissect(skb, &keys);
	hash = jhash_3words((__force u32)keys.dst,
			    (__force u32)keys.src ^ keys.ip_proto,
			    (__force u32)keys.ports, q->perturbation);
	hash = jhash_3words((__force u32)keys->dst,
			    (__force u32)keys->src ^ keys->ip_proto,
			    (__force u32)keys->ports, q->perturbation);
	return hash & (q->divisor - 1);
}

@@ -161,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
	    TC_H_MIN(skb->priority) <= q->divisor)
		return TC_H_MIN(skb->priority);

	if (!q->filter_list)
	if (!q->filter_list) {
		skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
		return sfq_hash(q, skb) + 1;
	}

	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
	result = tc_classify(skb, q->filter_list, &res);
@@ -423,12 +439,71 @@ sfq_reset(struct Qdisc *sch)
		kfree_skb(skb);
}

/*
 * When q->perturbation is changed, we rehash all queued skbs
 * to avoid OOO (Out Of Order) effects.
 * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
 * counters.
 */
static void sfq_rehash(struct sfq_sched_data *q)
{
	struct sk_buff *skb;
	int i;
	struct sfq_slot *slot;
	struct sk_buff_head list;

	__skb_queue_head_init(&list);

	for (i = 0; i < SFQ_SLOTS; i++) {
		slot = &q->slots[i];
		if (!slot->qlen)
			continue;
		while (slot->qlen) {
			skb = slot_dequeue_head(slot);
			sfq_dec(q, i);
			__skb_queue_tail(&list, skb);
		}
		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
	}
	q->tail = NULL;

	while ((skb = __skb_dequeue(&list)) != NULL) {
		unsigned int hash = sfq_hash(q, skb);
		sfq_index x = q->ht[hash];

		slot = &q->slots[x];
		if (x == SFQ_EMPTY_SLOT) {
			x = q->dep[0].next; /* get a free slot */
			q->ht[hash] = x;
			slot = &q->slots[x];
			slot->hash = hash;
		}
		slot_queue_add(slot, skb);
		sfq_inc(q, x);
		if (slot->qlen == 1) {		/* The flow is new */
			if (q->tail == NULL) {	/* It is the first flow */
				slot->next = x;
			} else {
				slot->next = q->tail->next;
				q->tail->next = x;
			}
			q->tail = slot;
			slot->allot = q->scaled_quantum;
		}
	}
}

static void sfq_perturbation(unsigned long arg)
{
	struct Qdisc *sch = (struct Qdisc *)arg;
	struct sfq_sched_data *q = qdisc_priv(sch);
	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));

	spin_lock(root_lock);
	q->perturbation = net_random();
	if (!q->filter_list && q->tail)
		sfq_rehash(q);
	spin_unlock(root_lock);

	if (q->perturb_period)
		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);