sched: sched_avg: Add snapshot of sched_avg (52e7ec28) · Commits · e / devices / android_kernel_xiaomi_nabu

include/linux/sched/stat.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -23,9 +23,13 @@ extern unsigned long nr_iowait_cpu(int cpu);
		extern void get_iowait_load(unsigned long nr_waiters, unsigned long load);

		#ifdef CONFIG_SMP
		extern void sched_update_nr_prod(int cpu, long delta, bool inc);
		extern unsigned int sched_get_cpu_util(int cpu);
		extern u64 sched_get_cpu_last_busy_time(int cpu);
		#else
		static inline void sched_update_nr_prod(int cpu, long delta, bool inc)
		{
		}
		static inline unsigned int sched_get_cpu_util(int cpu)
		{
		return 0;

include/trace/events/sched.h

+27 −0

Original line number	Diff line number	Diff line
		@@ -1067,6 +1067,33 @@ TRACE_EVENT(sched_overutilized,
		__entry->overutilized)
		);

		/*
		* Tracepoint for sched_get_nr_running_avg
		*/
		TRACE_EVENT(sched_get_nr_running_avg,

		TP_PROTO(int cpu, int nr, int nr_misfit, int nr_max),

		TP_ARGS(cpu, nr, nr_misfit, nr_max),

		TP_STRUCT__entry(
		__field(int, cpu)
		__field(int, nr)
		__field(int, nr_misfit)
		__field(int, nr_max)
		),

		TP_fast_assign(
		__entry->cpu = cpu;
		__entry->nr = nr;
		__entry->nr_misfit = nr_misfit;
		__entry->nr_max = nr_max;
		),

		TP_printk("cpu=%d nr=%d nr_misfit=%d nr_max=%d",
		__entry->cpu, __entry->nr, __entry->nr_misfit, __entry->nr_max)
		);

		#include "walt.h"

		#endif /* CONFIG_SMP */

kernel/sched/Makefile

+1 −2

Original line number	Diff line number	Diff line
		@@ -19,8 +19,7 @@ endif
		obj-y += core.o loadavg.o clock.o cputime.o
		obj-y += idle.o fair.o rt.o deadline.o
		obj-y += wait.o wait_bit.o swait.o completion.o

		obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
		obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o sched_avg.o
		obj-$(CONFIG_SCHED_WALT) += walt.o boost.o
		obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
		obj-$(CONFIG_SCHEDSTATS) += stats.o

kernel/sched/sched.h

+10 −0

Original line number	Diff line number	Diff line
		@@ -1870,6 +1870,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
		{
		unsigned prev_nr = rq->nr_running;

		sched_update_nr_prod(cpu_of(rq), count, true);
		rq->nr_running = prev_nr + count;

		if (prev_nr < 2 && rq->nr_running >= 2) {
		@@ -1884,6 +1885,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count)

		static inline void sub_nr_running(struct rq *rq, unsigned count)
		{
		sched_update_nr_prod(cpu_of(rq), count, false);
		rq->nr_running -= count;
		/* Check if we still need preemption */
		sched_update_tick_dependency(rq);
		@@ -3251,3 +3253,11 @@ static inline bool energy_aware(void)
		return 0;
		}
		#endif


		struct sched_avg_stats {
		int nr;
		int nr_misfit;
		int nr_max;
		};
		extern void sched_get_nr_running_avg(struct sched_avg_stats *stats);

kernel/sched/sched_avg.c

0 → 100644

+189 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0-only
		/*
		* Copyright (c) 2012, 2015-2018, The Linux Foundation. All rights reserved.
		*/

		/*
		* Scheduler hook for average runqueue determination
		*/
		#include <linux/module.h>
		#include <linux/percpu.h>
		#include <linux/hrtimer.h>
		#include <linux/sched.h>
		#include <linux/math64.h>

		#include "sched.h"
		#include "walt.h"
		#include <trace/events/sched.h>

		static DEFINE_PER_CPU(u64, nr_prod_sum);
		static DEFINE_PER_CPU(u64, last_time);
		static DEFINE_PER_CPU(u64, nr_big_prod_sum);
		static DEFINE_PER_CPU(u64, nr);
		static DEFINE_PER_CPU(u64, nr_max);

		static DEFINE_PER_CPU(unsigned long, iowait_prod_sum);
		static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
		static s64 last_get_time;

		static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);

		#define NR_THRESHOLD_PCT 15

		/**
		* sched_get_nr_running_avg
		* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
		* Returns the avg * 100 to return up to two decimal points
		* of accuracy.
		*
		* Obtains the average nr_running value since the last poll.
		* This function may not be called concurrently with itself
		*/
		void sched_get_nr_running_avg(struct sched_avg_stats *stats)
		{
		int cpu;
		u64 curr_time = sched_clock();
		u64 period = curr_time - last_get_time;
		u64 tmp_nr, tmp_misfit;

		if (!period)
		return;

		/* read and reset nr_running counts */
		for_each_possible_cpu(cpu) {
		unsigned long flags;
		u64 diff;

		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
		curr_time = sched_clock();
		diff = curr_time - per_cpu(last_time, cpu);
		BUG_ON((s64)diff < 0);

		tmp_nr = per_cpu(nr_prod_sum, cpu);
		tmp_nr += per_cpu(nr, cpu) * diff;
		tmp_nr = div64_u64((tmp_nr * 100), period);

		tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
		tmp_misfit += walt_big_tasks(cpu) * diff;
		tmp_misfit = div64_u64((tmp_misfit * 100), period);

		/*
		* NR_THRESHOLD_PCT is to make sure that the task ran
		* at least 85% in the last window to compensate any
		* over estimating being done.
		*/
		stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
		100);
		stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
		NR_THRESHOLD_PCT), 100);
		stats[cpu].nr_max = per_cpu(nr_max, cpu);

		trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
		stats[cpu].nr_misfit, stats[cpu].nr_max);

		per_cpu(last_time, cpu) = curr_time;
		per_cpu(nr_prod_sum, cpu) = 0;
		per_cpu(nr_big_prod_sum, cpu) = 0;
		per_cpu(iowait_prod_sum, cpu) = 0;
		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);

		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
		}

		last_get_time = curr_time;

		}
		EXPORT_SYMBOL(sched_get_nr_running_avg);

		#define BUSY_NR_RUN 3
		#define BUSY_LOAD_FACTOR 10
		static inline void update_last_busy_time(int cpu, bool dequeue,
		unsigned long prev_nr_run, u64 curr_time)
		{
		bool nr_run_trigger = false, load_trigger = false;

		if (!hmp_capable() \|\| is_min_capacity_cpu(cpu))
		return;

		if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
		nr_run_trigger = true;

		if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
		capacity_orig_of(cpu))
		load_trigger = true;

		if (nr_run_trigger \|\| load_trigger)
		atomic64_set(&per_cpu(last_busy_time, cpu), curr_time);
		}

		/**
		* sched_update_nr_prod
		* @cpu: The core id of the nr running driver.
		* @delta: Adjust nr by 'delta' amount
		* @inc: Whether we are increasing or decreasing the count
		* @return: N/A
		*
		* Update average with latest nr_running value for CPU
		*/
		void sched_update_nr_prod(int cpu, long delta, bool inc)
		{
		u64 diff;
		u64 curr_time;
		unsigned long flags, nr_running;

		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
		nr_running = per_cpu(nr, cpu);
		curr_time = sched_clock();
		diff = curr_time - per_cpu(last_time, cpu);
		BUG_ON((s64)diff < 0);
		per_cpu(last_time, cpu) = curr_time;
		per_cpu(nr, cpu) = nr_running + (inc ? delta : -delta);

		BUG_ON((s64)per_cpu(nr, cpu) < 0);

		if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);

		update_last_busy_time(cpu, !inc, nr_running, curr_time);

		per_cpu(nr_prod_sum, cpu) += nr_running * diff;
		per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
		per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
		}
		EXPORT_SYMBOL(sched_update_nr_prod);

		/*
		* Returns the CPU utilization % in the last window.
		*
		*/
		unsigned int sched_get_cpu_util(int cpu)
		{
		struct rq *rq = cpu_rq(cpu);
		u64 util;
		unsigned long capacity, flags;
		unsigned int busy;

		raw_spin_lock_irqsave(&rq->lock, flags);

		util = rq->cfs.avg.util_avg;
		capacity = capacity_orig_of(cpu);

		#ifdef CONFIG_SCHED_WALT
		if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
		util = rq->prev_runnable_sum + rq->grp_time.prev_runnable_sum;
		util = div64_u64(util,
		sched_ravg_window >> SCHED_CAPACITY_SHIFT);
		}
		#endif
		raw_spin_unlock_irqrestore(&rq->lock, flags);

		util = (util >= capacity) ? capacity : util;
		busy = div64_ul((util * 100), capacity);
		return busy;
		}

		u64 sched_get_cpu_last_busy_time(int cpu)
		{
		return atomic64_read(&per_cpu(last_busy_time, cpu));
		}