Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22e0f8b9 authored by John Fastabend's avatar John Fastabend Committed by David S. Miller
Browse files

net: sched: make bstats per cpu and estimator RCU safe



In order to run qdisc's without locking statistics and estimators
need to be handled correctly.

To resolve bstats make the statistics per cpu. And because this is
only needed for qdiscs that are running without locks which is not
the case for most qdiscs in the near future only create percpu
stats when qdiscs set the TCQ_F_CPUSTATS flag.

Next because estimators use the bstats to calculate packets per
second and bytes per second the estimator code paths are updated
to use the per cpu statistics.

Signed-off-by: default avatarJohn Fastabend <john.r.fastabend@intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 79cf79ab
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -6,6 +6,11 @@
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>

struct gnet_stats_basic_cpu {
	struct gnet_stats_basic_packed bstats;
	struct u64_stats_sync syncp;
};

struct gnet_dump {
	spinlock_t *      lock;
	struct sk_buff *  skb;
@@ -27,6 +32,10 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
				 spinlock_t *lock, struct gnet_dump *d);

int gnet_stats_copy_basic(struct gnet_dump *d,
			  struct gnet_stats_basic_cpu __percpu *cpu,
			  struct gnet_stats_basic_packed *b);
void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
			     struct gnet_stats_basic_cpu __percpu *cpu,
			     struct gnet_stats_basic_packed *b);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
			     const struct gnet_stats_basic_packed *b,
@@ -37,11 +46,13 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);

int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
		      struct gnet_stats_rate_est64 *rate_est,
		      spinlock_t *stats_lock, struct nlattr *opt);
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
			struct gnet_stats_rate_est64 *rate_est);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
			  struct gnet_stats_rate_est64 *rate_est,
			  spinlock_t *stats_lock, struct nlattr *opt);
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
+21 −1
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/rcupdate.h>
#include <linux/pkt_sched.h>
#include <linux/pkt_cls.h>
#include <linux/percpu.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>

@@ -58,6 +59,7 @@ struct Qdisc {
				      * multiqueue device.
				      */
#define TCQ_F_WARN_NONWC	(1 << 16)
#define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
	u32			limit;
	const struct Qdisc_ops	*ops;
	struct qdisc_size_table	__rcu *stab;
@@ -83,7 +85,10 @@ struct Qdisc {
	 */
	unsigned long		state;
	struct sk_buff_head	q;
	union {
		struct gnet_stats_basic_packed bstats;
		struct gnet_stats_basic_cpu __percpu *cpu_bstats;
	} __packed;
	unsigned int		__state;
	struct gnet_stats_queue	qstats;
	struct rcu_head		rcu_head;
@@ -487,6 +492,10 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
{
	return q->flags & TCQ_F_CPUSTATS;
}

static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
				 const struct sk_buff *skb)
@@ -495,6 +504,17 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
	bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
}

static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
					   const struct sk_buff *skb)
{
	struct gnet_stats_basic_cpu *bstats =
				this_cpu_ptr(sch->cpu_bstats);

	u64_stats_update_begin(&bstats->syncp);
	bstats_update(&bstats->bstats, skb);
	u64_stats_update_end(&bstats->syncp);
}

static inline void qdisc_bstats_update(struct Qdisc *sch,
				       const struct sk_buff *skb)
{
+18 −11
Original line number Diff line number Diff line
@@ -91,6 +91,8 @@ struct gen_estimator
	u32			avpps;
	struct rcu_head		e_rcu;
	struct rb_node		node;
	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
	struct rcu_head		head;
};

struct gen_estimator_head
@@ -115,9 +117,8 @@ static void est_timer(unsigned long arg)

	rcu_read_lock();
	list_for_each_entry_rcu(e, &elist[idx].list, list) {
		u64 nbytes;
		struct gnet_stats_basic_packed b = {0};
		u64 brate;
		u32 npackets;
		u32 rate;

		spin_lock(e->stats_lock);
@@ -125,15 +126,15 @@ static void est_timer(unsigned long arg)
		if (e->bstats == NULL)
			goto skip;

		nbytes = e->bstats->bytes;
		npackets = e->bstats->packets;
		brate = (nbytes - e->last_bytes)<<(7 - idx);
		e->last_bytes = nbytes;
		__gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);

		brate = (b.bytes - e->last_bytes)<<(7 - idx);
		e->last_bytes = b.bytes;
		e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
		e->rate_est->bps = (e->avbps+0xF)>>5;

		rate = (npackets - e->last_packets)<<(12 - idx);
		e->last_packets = npackets;
		rate = (b.packets - e->last_packets)<<(12 - idx);
		e->last_packets = b.packets;
		e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
		e->rate_est->pps = (e->avpps+0x1FF)>>10;
skip:
@@ -203,12 +204,14 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
 *
 */
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
		      struct gnet_stats_rate_est64 *rate_est,
		      spinlock_t *stats_lock,
		      struct nlattr *opt)
{
	struct gen_estimator *est;
	struct gnet_estimator *parm = nla_data(opt);
	struct gnet_stats_basic_packed b = {0};
	int idx;

	if (nla_len(opt) < sizeof(*parm))
@@ -221,15 +224,18 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
	if (est == NULL)
		return -ENOBUFS;

	__gnet_stats_copy_basic(&b, cpu_bstats, bstats);

	idx = parm->interval + 2;
	est->bstats = bstats;
	est->rate_est = rate_est;
	est->stats_lock = stats_lock;
	est->ewma_log = parm->ewma_log;
	est->last_bytes = bstats->bytes;
	est->last_bytes = b.bytes;
	est->avbps = rate_est->bps<<5;
	est->last_packets = bstats->packets;
	est->last_packets = b.packets;
	est->avpps = rate_est->pps<<10;
	est->cpu_bstats = cpu_bstats;

	spin_lock_bh(&est_tree_lock);
	if (!elist[idx].timer.function) {
@@ -290,11 +296,12 @@ EXPORT_SYMBOL(gen_kill_estimator);
 * Returns 0 on success or a negative error code.
 */
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
			  struct gnet_stats_rate_est64 *rate_est,
			  spinlock_t *stats_lock, struct nlattr *opt)
{
	gen_kill_estimator(bstats, rate_est);
	return gen_new_estimator(bstats, rate_est, stats_lock, opt);
	return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

+48 −5
Original line number Diff line number Diff line
@@ -97,6 +97,43 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);

static void
__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
			    struct gnet_stats_basic_cpu __percpu *cpu)
{
	int i;

	for_each_possible_cpu(i) {
		struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
		unsigned int start;
		__u64 bytes;
		__u32 packets;

		do {
			start = u64_stats_fetch_begin_irq(&bcpu->syncp);
			bytes = bcpu->bstats.bytes;
			packets = bcpu->bstats.packets;
		} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));

		bstats->bytes += bcpu->bstats.bytes;
		bstats->packets += bcpu->bstats.packets;
	}
}

void
__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
			struct gnet_stats_basic_cpu __percpu *cpu,
			struct gnet_stats_basic_packed *b)
{
	if (cpu) {
		__gnet_stats_copy_basic_cpu(bstats, cpu);
	} else {
		bstats->bytes = b->bytes;
		bstats->packets = b->packets;
	}
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);

/**
 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
 * @d: dumping handle
@@ -109,19 +146,25 @@ EXPORT_SYMBOL(gnet_stats_start_copy);
 * if the room in the socket buffer was not sufficient.
 */
int
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
gnet_stats_copy_basic(struct gnet_dump *d,
		      struct gnet_stats_basic_cpu __percpu *cpu,
		      struct gnet_stats_basic_packed *b)
{
	struct gnet_stats_basic_packed bstats = {0};

	__gnet_stats_copy_basic(&bstats, cpu, b);

	if (d->compat_tc_stats) {
		d->tc_stats.bytes = b->bytes;
		d->tc_stats.packets = b->packets;
		d->tc_stats.bytes = bstats.bytes;
		d->tc_stats.packets = bstats.packets;
	}

	if (d->tail) {
		struct gnet_stats_basic sb;

		memset(&sb, 0, sizeof(sb));
		sb.bytes = b->bytes;
		sb.packets = b->packets;
		sb.bytes = bstats.bytes;
		sb.packets = bstats.packets;
		return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
	}
	return 0;
+1 −1
Original line number Diff line number Diff line
@@ -136,7 +136,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
	cfg.est.interval	= info->interval;
	cfg.est.ewma_log	= info->ewma_log;

	ret = gen_new_estimator(&est->bstats, &est->rstats,
	ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
				&est->lock, &cfg.opt);
	if (ret < 0)
		goto err2;
Loading