Merge master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6 (8e33ba49) · Commits · e / devices / android_kernel_xiaomi_markw

include/linux/pkt_sched.h

+24 −26

Original line number	Original line	Diff line number	Diff line
	@@ -93,6 +93,7 @@ struct tc_fifo_qopt
	/* PRIO section */		/* PRIO section */

	#define TCQ_PRIO_BANDS 16		#define TCQ_PRIO_BANDS 16
			#define TCQ_MIN_PRIO_BANDS 2

	struct tc_prio_qopt		struct tc_prio_qopt
	{		{
	@@ -169,6 +170,7 @@ struct tc_red_qopt
	unsigned char Scell_log; /* cell size for idle damping */		unsigned char Scell_log; /* cell size for idle damping */
	unsigned char flags;		unsigned char flags;
	#define TC_RED_ECN 1		#define TC_RED_ECN 1
			#define TC_RED_HARDDROP 2
	};		};

	struct tc_red_xstats		struct tc_red_xstats
	@@ -194,15 +196,11 @@ enum

	#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)		#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)

	#define TCA_SET_OFF TCA_GRED_PARMS
	struct tc_gred_qopt		struct tc_gred_qopt
	{		{
	__u32 limit; /* HARD maximal queue length (bytes)		__u32 limit; /* HARD maximal queue length (bytes) */
	*/		__u32 qth_min; /* Min average length threshold (bytes) */
	__u32 qth_min; /* Min average length threshold (bytes)		__u32 qth_max; /* Max average length threshold (bytes) */
	*/
	__u32 qth_max; /* Max average length threshold (bytes)
	*/
	__u32 DP; /* upto 2^32 DPs */		__u32 DP; /* upto 2^32 DPs */
	__u32 backlog;		__u32 backlog;
	__u32 qave;		__u32 qave;
	@@ -210,22 +208,22 @@ struct tc_gred_qopt
	__u32 early;		__u32 early;
	__u32 other;		__u32 other;
	__u32 pdrop;		__u32 pdrop;
			__u8 Wlog; /* log(W) */
	unsigned char Wlog; /* log(W) */		__u8 Plog; /* log(P_max/(qth_max-qth_min)) */
	unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */		__u8 Scell_log; /* cell size for idle damping */
	unsigned char Scell_log; /* cell size for idle damping */
	__u8 prio; /* prio of this VQ */		__u8 prio; /* prio of this VQ */
	__u32 packets;		__u32 packets;
	__u32 bytesin;		__u32 bytesin;
	};		};

	/* gred setup */		/* gred setup */
	struct tc_gred_sopt		struct tc_gred_sopt
	{		{
	__u32 DPs;		__u32 DPs;
	__u32 def_DP;		__u32 def_DP;
	__u8 grio;		__u8 grio;
	__u8 pad1;		__u8 flags;
	__u16 pad2;		__u16 pad1;
	};		};

	/* HTB section */		/* HTB section */

include/linux/skbuff.h

+30 −8

Original line number	Original line	Diff line number	Diff line
	@@ -603,29 +603,46 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
	*/		*/

	/**		/**
	* __skb_queue_head - queue a buffer at the list head		* __skb_queue_after - queue a buffer at the list head
	* @list: list to use		* @list: list to use
			* @prev: place after this buffer
	* @newsk: buffer to queue		* @newsk: buffer to queue
	*		*
	* Queue a buffer at the start of a list. This function takes no locks		* Queue a buffer int the middle of a list. This function takes no locks
	* and you must therefore hold required locks before calling it.		* and you must therefore hold required locks before calling it.
	*		*
	* A buffer cannot be placed on two lists at the same time.		* A buffer cannot be placed on two lists at the same time.
	*/		*/
	extern void skb_queue_head(struct sk_buff_head list, struct sk_buff newsk);		static inline void __skb_queue_after(struct sk_buff_head *list,
	static inline void __skb_queue_head(struct sk_buff_head *list,		struct sk_buff *prev,
	struct sk_buff *newsk)		struct sk_buff *newsk)
	{		{
	struct sk_buff prev, next;		struct sk_buff *next;

	list->qlen++;		list->qlen++;
	prev = (struct sk_buff *)list;
	next = prev->next;		next = prev->next;
	newsk->next = next;		newsk->next = next;
	newsk->prev = prev;		newsk->prev = prev;
	next->prev = prev->next = newsk;		next->prev = prev->next = newsk;
	}		}

			/**
			* __skb_queue_head - queue a buffer at the list head
			* @list: list to use
			* @newsk: buffer to queue
			*
			* Queue a buffer at the start of a list. This function takes no locks
			* and you must therefore hold required locks before calling it.
			*
			* A buffer cannot be placed on two lists at the same time.
			*/
			extern void skb_queue_head(struct sk_buff_head list, struct sk_buff newsk);
			static inline void __skb_queue_head(struct sk_buff_head *list,
			struct sk_buff *newsk)
			{
			__skb_queue_after(list, (struct sk_buff *)list, newsk);
			}

	/**		/**
	* __skb_queue_tail - queue a buffer at the list tail		* __skb_queue_tail - queue a buffer at the list tail
	* @list: list to use		* @list: list to use
	@@ -1203,6 +1220,11 @@ static inline void kunmap_skb_frag(void *vaddr)
	prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \		prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \
	skb = skb->next)		skb = skb->next)

			#define skb_queue_reverse_walk(queue, skb) \
			for (skb = (queue)->prev; \
			prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \
			skb = skb->prev)


	extern struct sk_buff skb_recv_datagram(struct sock sk, unsigned flags,		extern struct sk_buff skb_recv_datagram(struct sock sk, unsigned flags,
	int noblock, int *err);		int noblock, int *err);

include/net/inet_ecn.h

+24 −4

Original line number	Original line	Diff line number	Diff line
	@@ -2,6 +2,7 @@
	#define _INET_ECN_H_		#define _INET_ECN_H_

	#include <linux/ip.h>		#include <linux/ip.h>
			#include <linux/skbuff.h>
	#include <net/dsfield.h>		#include <net/dsfield.h>

	enum {		enum {
	@@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
	(label) \|= __constant_htons(INET_ECN_ECT_0 << 4); \		(label) \|= __constant_htons(INET_ECN_ECT_0 << 4); \
	} while (0)		} while (0)

	static inline void IP_ECN_set_ce(struct iphdr *iph)		static inline int IP_ECN_set_ce(struct iphdr *iph)
	{		{
	u32 check = iph->check;		u32 check = iph->check;
	u32 ecn = (iph->tos + 1) & INET_ECN_MASK;		u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
	@@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
	* INET_ECN_CE => 00		* INET_ECN_CE => 00
	*/		*/
	if (!(ecn & 2))		if (!(ecn & 2))
	return;		return !ecn;

	/*		/*
	* The following gives us:		* The following gives us:
	@@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)

	iph->check = check + (check>=0xFFFF);		iph->check = check + (check>=0xFFFF);
	iph->tos \|= INET_ECN_CE;		iph->tos \|= INET_ECN_CE;
			return 1;
	}		}

	static inline void IP_ECN_clear(struct iphdr *iph)		static inline void IP_ECN_clear(struct iphdr *iph)
	@@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr outer, struct iphdr inner)

	struct ipv6hdr;		struct ipv6hdr;

	static inline void IP6_ECN_set_ce(struct ipv6hdr *iph)		static inline int IP6_ECN_set_ce(struct ipv6hdr *iph)
	{		{
	if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))		if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
	return;		return 0;
	(u32)iph \|= htonl(INET_ECN_CE << 20);		(u32)iph \|= htonl(INET_ECN_CE << 20);
			return 1;
	}		}

	static inline void IP6_ECN_clear(struct ipv6hdr *iph)		static inline void IP6_ECN_clear(struct ipv6hdr *iph)
	@@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr outer, struct ipv6hdr inner)
	ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);		ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
	}		}

			static inline int INET_ECN_set_ce(struct sk_buff *skb)
			{
			switch (skb->protocol) {
			case __constant_htons(ETH_P_IP):
			if (skb->nh.raw + sizeof(struct iphdr) <= skb->tail)
			return IP_ECN_set_ce(skb->nh.iph);
			break;

			case __constant_htons(ETH_P_IPV6):
			if (skb->nh.raw + sizeof(struct ipv6hdr) <= skb->tail)
			return IP6_ECN_set_ce(skb->nh.ipv6h);
			break;
			}

			return 0;
			}

	#endif		#endif

include/net/inet_hashtables.h

+0 −2

Original line number	Original line	Diff line number	Diff line
	@@ -125,9 +125,7 @@ struct inet_hashinfo {
	rwlock_t lhash_lock ____cacheline_aligned;		rwlock_t lhash_lock ____cacheline_aligned;
	atomic_t lhash_users;		atomic_t lhash_users;
	wait_queue_head_t lhash_wait;		wait_queue_head_t lhash_wait;
	spinlock_t portalloc_lock;
	kmem_cache_t *bind_bucket_cachep;		kmem_cache_t *bind_bucket_cachep;
	int port_rover;
	};		};

	static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,		static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,

include/net/red.h

0 → 100644

+325 −0

Original line number	Original line	Diff line number	Diff line
			#ifndef __NET_SCHED_RED_H
			#define __NET_SCHED_RED_H

			#include <linux/config.h>
			#include <linux/types.h>
			#include <net/pkt_sched.h>
			#include <net/inet_ecn.h>
			#include <net/dsfield.h>

			/* Random Early Detection (RED) algorithm.
			=======================================

			Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
			for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.

			This file codes a "divisionless" version of RED algorithm
			as written down in Fig.17 of the paper.

			Short description.
			------------------

			When a new packet arrives we calculate the average queue length:

			avg = (1-W)avg + Wcurrent_queue_len,

			W is the filter time constant (chosen as 2^(-Wlog)), it controls
			the inertia of the algorithm. To allow larger bursts, W should be
			decreased.

			if (avg > th_max) -> packet marked (dropped).
			if (avg < th_min) -> packet passes.
			if (th_min < avg < th_max) we calculate probability:

			Pb = max_P * (avg - th_min)/(th_max-th_min)

			and mark (drop) packet with this probability.
			Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
			max_P should be small (not 1), usually 0.01..0.02 is good value.

			max_P is chosen as a number, so that max_P/(th_max-th_min)
			is a negative power of two in order arithmetics to contain
			only shifts.


			Parameters, settable by user:
			-----------------------------

			qth_min - bytes (should be < qth_max/2)
			qth_max - bytes (should be at least 2*qth_min and less limit)
			Wlog - bits (<32) log(1/W).
			Plog - bits (<32)

			Plog is related to max_P by formula:

			max_P = (qth_max-qth_min)/2^Plog;

			F.e. if qth_max=128K and qth_min=32K, then Plog=22
			corresponds to max_P=0.02

			Scell_log
			Stab

			Lookup table for log((1-W)^(t/t_ave).


			NOTES:

			Upper bound on W.
			-----------------

			If you want to allow bursts of L packets of size S,
			you should choose W:

			L + 1 - th_min/S < (1-(1-W)^L)/W

			th_min/S = 32 th_min/S = 4

			log(W) L
			-1 33
			-2 35
			-3 39
			-4 46
			-5 57
			-6 75
			-7 101
			-8 135
			-9 190
			etc.
			*/

			#define RED_STAB_SIZE 256
			#define RED_STAB_MASK (RED_STAB_SIZE - 1)

			struct red_stats
			{
			u32 prob_drop; /* Early probability drops */
			u32 prob_mark; /* Early probability marks */
			u32 forced_drop; /* Forced drops, qavg > max_thresh */
			u32 forced_mark; /* Forced marks, qavg > max_thresh */
			u32 pdrop; /* Drops due to queue limits */
			u32 other; /* Drops due to drop() calls */
			u32 backlog;
			};

			struct red_parms
			{
			/* Parameters */
			u32 qth_min; /* Min avg length threshold: A scaled */
			u32 qth_max; /* Max avg length threshold: A scaled */
			u32 Scell_max;
			u32 Rmask; /* Cached random mask, see red_rmask */
			u8 Scell_log;
			u8 Wlog; /* log(W) */
			u8 Plog; /* random number bits */
			u8 Stab[RED_STAB_SIZE];

			/* Variables */
			int qcount; /* Number of packets since last random
			number generation */
			u32 qR; /* Cached random number */

			unsigned long qavg; /* Average queue length: A scaled */
			psched_time_t qidlestart; /* Start of current idle period */
			};

			static inline u32 red_rmask(u8 Plog)
			{
			return Plog < 32 ? ((1 << Plog) - 1) : ~0UL;
			}

			static inline void red_set_parms(struct red_parms *p,
			u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
			u8 Scell_log, u8 *stab)
			{
			/* Reset average queue length, the value is strictly bound
			* to the parameters below, reseting hurts a bit but leaving
			* it might result in an unreasonable qavg for a while. --TGR
			*/
			p->qavg = 0;

			p->qcount = -1;
			p->qth_min = qth_min << Wlog;
			p->qth_max = qth_max << Wlog;
			p->Wlog = Wlog;
			p->Plog = Plog;
			p->Rmask = red_rmask(Plog);
			p->Scell_log = Scell_log;
			p->Scell_max = (255 << Scell_log);

			memcpy(p->Stab, stab, sizeof(p->Stab));
			}

			static inline int red_is_idling(struct red_parms *p)
			{
			return !PSCHED_IS_PASTPERFECT(p->qidlestart);
			}

			static inline void red_start_of_idle_period(struct red_parms *p)
			{
			PSCHED_GET_TIME(p->qidlestart);
			}

			static inline void red_end_of_idle_period(struct red_parms *p)
			{
			PSCHED_SET_PASTPERFECT(p->qidlestart);
			}

			static inline void red_restart(struct red_parms *p)
			{
			red_end_of_idle_period(p);
			p->qavg = 0;
			p->qcount = -1;
			}

			static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
			{
			psched_time_t now;
			long us_idle;
			int shift;

			PSCHED_GET_TIME(now);
			us_idle = PSCHED_TDIFF_SAFE(now, p->qidlestart, p->Scell_max);

			/*
			* The problem: ideally, average length queue recalcultion should
			* be done over constant clock intervals. This is too expensive, so
			* that the calculation is driven by outgoing packets.
			* When the queue is idle we have to model this clock by hand.
			*
			* SF+VJ proposed to "generate":
			*
			* m = idletime / (average_pkt_size / bandwidth)
			*
			* dummy packets as a burst after idle time, i.e.
			*
			* p->qavg *= (1-W)^m
			*
			* This is an apparently overcomplicated solution (f.e. we have to
			* precompute a table to make this calculation in reasonable time)
			* I believe that a simpler model may be used here,
			* but it is field for experiments.
			*/

			shift = p->Stab[(us_idle >> p->Scell_log) & RED_STAB_MASK];

			if (shift)
			return p->qavg >> shift;
			else {
			/* Approximate initial part of exponent with linear function:
			*
			* (1-W)^m ~= 1-mW + ...
			*
			* Seems, it is the best solution to
			* problem of too coarse exponent tabulation.
			*/
			us_idle = (p->qavg * us_idle) >> p->Scell_log;

			if (us_idle < (p->qavg >> 1))
			return p->qavg - us_idle;
			else
			return p->qavg >> 1;
			}
			}

			static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
			unsigned int backlog)
			{
			/*
			* NOTE: p->qavg is fixed point number with point at Wlog.
			* The formula below is equvalent to floating point
			* version:
			*
			* qavg = qavg(1-W) + backlogW;
			*
			* --ANK (980924)
			*/
			return p->qavg + (backlog - (p->qavg >> p->Wlog));
			}

			static inline unsigned long red_calc_qavg(struct red_parms *p,
			unsigned int backlog)
			{
			if (!red_is_idling(p))
			return red_calc_qavg_no_idle_time(p, backlog);
			else
			return red_calc_qavg_from_idle_time(p);
			}

			static inline u32 red_random(struct red_parms *p)
			{
			return net_random() & p->Rmask;
			}

			static inline int red_mark_probability(struct red_parms *p, unsigned long qavg)
			{
			/* The formula used below causes questions.

			OK. qR is random number in the interval 0..Rmask
			i.e. 0..(2^Plog). If we used floating point
			arithmetics, it would be: (2^Plog)*rnd_num,
			where rnd_num is less 1.

			Taking into account, that qavg have fixed
			point at Wlog, and Plog is related to max_P by
			max_P = (qth_max-qth_min)/2^Plog; two lines
			below have the following floating point equivalent:

			max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount

			Any questions? --ANK (980924)
			*/
			return !(((qavg - p->qth_min) >> p->Wlog) * p->qcount < p->qR);
			}

			enum {
			RED_BELOW_MIN_THRESH,
			RED_BETWEEN_TRESH,
			RED_ABOVE_MAX_TRESH,
			};

			static inline int red_cmp_thresh(struct red_parms *p, unsigned long qavg)
			{
			if (qavg < p->qth_min)
			return RED_BELOW_MIN_THRESH;
			else if (qavg >= p->qth_max)
			return RED_ABOVE_MAX_TRESH;
			else
			return RED_BETWEEN_TRESH;
			}

			enum {
			RED_DONT_MARK,
			RED_PROB_MARK,
			RED_HARD_MARK,
			};

			static inline int red_action(struct red_parms *p, unsigned long qavg)
			{
			switch (red_cmp_thresh(p, qavg)) {
			case RED_BELOW_MIN_THRESH:
			p->qcount = -1;
			return RED_DONT_MARK;

			case RED_BETWEEN_TRESH:
			if (++p->qcount) {
			if (red_mark_probability(p, qavg)) {
			p->qcount = 0;
			p->qR = red_random(p);
			return RED_PROB_MARK;
			}
			} else
			p->qR = red_random(p);

			return RED_DONT_MARK;

			case RED_ABOVE_MAX_TRESH:
			p->qcount = -1;
			return RED_HARD_MARK;
			}

			BUG();
			return RED_DONT_MARK;
			}

			#endif