Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9dd14571 authored by qctecmdr's avatar qctecmdr Committed by Gerrit - the friendly Code Review server
Browse files

Merge "sched/fair: Add snapshot of load-balancing changes"

parents 18b2bd02 e8b54c76
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -20,11 +20,17 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
enum { sysctl_hung_task_timeout_secs = 0 };
#endif

#define MAX_CLUSTERS 3
/* MAX_MARGIN_LEVELS should be one less than MAX_CLUSTERS */
#define MAX_MARGIN_LEVELS (MAX_CLUSTERS - 1)

extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
#ifdef CONFIG_SCHED_WALT
extern unsigned int __weak sysctl_sched_capacity_margin_up[MAX_MARGIN_LEVELS];
extern unsigned int __weak sysctl_sched_capacity_margin_down[MAX_MARGIN_LEVELS];
extern unsigned int __weak sysctl_sched_user_hint;
extern const int __weak sched_user_hint_max;
extern unsigned int __weak sysctl_sched_cpu_high_irqload;
@@ -55,12 +61,24 @@ walt_proc_user_hint_handler(struct ctl_table *table, int write,
			void __user *buffer, size_t *lenp,
			loff_t *ppos);

extern int __weak
sched_updown_migrate_handler(struct ctl_table *table, int write,
			void __user *buffer, size_t *lenp,
			loff_t *ppos);

extern int __weak
sched_ravg_window_handler(struct ctl_table *table, int write,
			void __user *buffer, size_t *lenp,
			loff_t *ppos);
#endif

#if defined(CONFIG_PREEMPT_TRACER) || defined(CONFIG_DEBUG_PREEMPT)
extern unsigned int sysctl_preemptoff_tracing_threshold_ns;
#endif
#if defined(CONFIG_PREEMPTIRQ_EVENTS) && defined(CONFIG_IRQSOFF_TRACER)
extern unsigned int sysctl_irqsoff_tracing_threshold_ns;
#endif

enum sched_tunable_scaling {
	SCHED_TUNABLESCALING_NONE,
	SCHED_TUNABLESCALING_LOG,
+3 −0
Original line number Diff line number Diff line
@@ -73,6 +73,9 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
extern int proc_do_static_key(struct ctl_table *table, int write,
			      void __user *buffer, size_t *lenp,
			      loff_t *ppos);
extern int proc_douintvec_capacity(struct ctl_table *table, int write,
			      void __user *buffer, size_t *lenp,
			      loff_t *ppos);
extern int proc_douintvec_ravg_window(struct ctl_table *table, int write,
			      void __user *buffer, size_t *lenp,
			      loff_t *ppos);
+28 −0
Original line number Diff line number Diff line
@@ -62,6 +62,34 @@ DEFINE_EVENT(preemptirq_template, preempt_enable,
#define trace_preempt_disable_rcuidle(...)
#endif

TRACE_EVENT(irqs_disable,

	TP_PROTO(u64 delta, unsigned long caddr0, unsigned long caddr1,
				unsigned long caddr2, unsigned long caddr3),

	TP_ARGS(delta, caddr0, caddr1, caddr2, caddr3),

	TP_STRUCT__entry(
		__field(u64, delta)
		__field(void*, caddr0)
		__field(void*, caddr1)
		__field(void*, caddr2)
		__field(void*, caddr3)
	),

	TP_fast_assign(
		__entry->delta = delta;
		__entry->caddr0 = (void *)caddr0;
		__entry->caddr1 = (void *)caddr1;
		__entry->caddr2 = (void *)caddr2;
		__entry->caddr3 = (void *)caddr3;
	),

	TP_printk("delta=%llu(ns) Callers:(%ps<-%ps<-%ps<-%ps)", __entry->delta,
					__entry->caddr0, __entry->caddr1,
					__entry->caddr2, __entry->caddr3)
);

#endif /* _TRACE_PREEMPTIRQ_H */

#include <trace/define_trace.h>
+460 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/sched/numa_balancing.h>
#include <linux/tracepoint.h>
#include <linux/binfmts.h>
#include <linux/sched/idle.h>

/*
 * Tracepoint for calling kthread_stop, performed to end a kthread:
@@ -259,6 +260,233 @@ TRACE_EVENT(sched_migrate_task,
		  __entry->orig_cpu, __entry->dest_cpu)
);

/*
 * Tracepoint for load balancing:
 */
#ifdef CONFIG_SMP
#if NR_CPUS > BITS_PER_LONG
#define trace_sched_load_balance_sg_stats(...)
#define trace_sched_load_balance_stats(...)
#define trace_sched_load_balance(...)
#define trace_sched_load_balance_nohz_kick(...)
#else
TRACE_EVENT(sched_load_balance,

	TP_PROTO(int cpu, enum cpu_idle_type idle, int balance,
		unsigned long group_mask, int busiest_nr_running,
		unsigned long imbalance, unsigned int env_flags, int ld_moved,
		unsigned int balance_interval, int active_balance),

	TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running,
		imbalance, env_flags, ld_moved, balance_interval,
		active_balance),

	TP_STRUCT__entry(
		__field(int,                    cpu)
		__field(enum cpu_idle_type,     idle)
		__field(int,                    balance)
		__field(unsigned long,          group_mask)
		__field(int,                    busiest_nr_running)
		__field(unsigned long,          imbalance)
		__field(unsigned int,           env_flags)
		__field(int,                    ld_moved)
		__field(unsigned int,           balance_interval)
		__field(int,                    active_balance)
	),

	TP_fast_assign(
		__entry->cpu                    = cpu;
		__entry->idle                   = idle;
		__entry->balance                = balance;
		__entry->group_mask             = group_mask;
		__entry->busiest_nr_running     = busiest_nr_running;
		__entry->imbalance              = imbalance;
		__entry->env_flags              = env_flags;
		__entry->ld_moved               = ld_moved;
		__entry->balance_interval       = balance_interval;
		__entry->active_balance		= active_balance;
	),

	TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d",
		__entry->cpu,
		__entry->idle == CPU_IDLE ? "idle" :
		(__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"),
		__entry->balance,
		__entry->group_mask, __entry->busiest_nr_running,
		__entry->imbalance, __entry->env_flags, __entry->ld_moved,
		__entry->balance_interval, __entry->active_balance)
);

TRACE_EVENT(sched_load_balance_nohz_kick,

	TP_PROTO(int cpu, int kick_cpu),

	TP_ARGS(cpu, kick_cpu),

	TP_STRUCT__entry(
		__field(int,		cpu)
		__field(unsigned int,	cpu_nr)
		__field(unsigned long,	misfit_task_load)
		__field(int,		cpu_overutil)
		__field(int,		kick_cpu)
		__field(unsigned long,	nohz_flags)
	),

	TP_fast_assign(
		__entry->cpu	          = cpu;
		__entry->cpu_nr		  = cpu_rq(cpu)->nr_running;
		__entry->misfit_task_load = cpu_rq(cpu)->misfit_task_load;
		__entry->cpu_overutil	  = cpu_overutilized(cpu);
		__entry->kick_cpu	  = kick_cpu;
		__entry->nohz_flags	  = atomic_read(nohz_flags(kick_cpu));
	),

	TP_printk("cpu=%d nr_run=%u misfit_task_load=%lu overutilized=%d kick_cpu=%d nohz_flags=0x%lx",
			__entry->cpu, __entry->cpu_nr,
			__entry->misfit_task_load, __entry->cpu_overutil,
			__entry->kick_cpu, __entry->nohz_flags)

);

TRACE_EVENT(sched_load_balance_sg_stats,

	TP_PROTO(unsigned long sg_cpus, int group_type, unsigned int idle_cpus,
		unsigned int sum_nr_running, unsigned long group_load,
		unsigned long group_capacity, unsigned long group_util,
		int group_no_capacity, unsigned long load_per_task,
		unsigned long misfit_load, unsigned long busiest),

	TP_ARGS(sg_cpus, group_type, idle_cpus, sum_nr_running, group_load,
		group_capacity, group_util, group_no_capacity, load_per_task,
		misfit_load, busiest),

	TP_STRUCT__entry(
		__field(unsigned long,		group_mask)
		__field(int,			group_type)
		__field(unsigned int,		group_idle_cpus)
		__field(unsigned int,		sum_nr_running)
		__field(unsigned long,		group_load)
		__field(unsigned long,		group_capacity)
		__field(unsigned long,		group_util)
		__field(int,			group_no_capacity)
		__field(unsigned long,		load_per_task)
		__field(unsigned long,		misfit_task_load)
		__field(unsigned long,		busiest)
	),

	TP_fast_assign(
		__entry->group_mask			= sg_cpus;
		__entry->group_type			= group_type;
		__entry->group_idle_cpus		= idle_cpus;
		__entry->sum_nr_running			= sum_nr_running;
		__entry->group_load			= group_load;
		__entry->group_capacity			= group_capacity;
		__entry->group_util			= group_util;
		__entry->group_no_capacity		= group_no_capacity;
		__entry->load_per_task			= load_per_task;
		__entry->misfit_task_load		= misfit_load;
		__entry->busiest			= busiest;
	),

	TP_printk("sched_group=%#lx type=%d idle_cpus=%u sum_nr_run=%u group_load=%lu capacity=%lu util=%lu no_capacity=%d lpt=%lu misfit_tload=%lu busiest_group=%#lx",
		__entry->group_mask, __entry->group_type,
		__entry->group_idle_cpus, __entry->sum_nr_running,
		__entry->group_load, __entry->group_capacity,
		__entry->group_util, __entry->group_no_capacity,
		__entry->load_per_task, __entry->misfit_task_load,
		__entry->busiest)
);

TRACE_EVENT(sched_load_balance_stats,

	TP_PROTO(unsigned long busiest, int bgroup_type,
		unsigned long bavg_load, unsigned long bload_per_task,
		unsigned long local, int lgroup_type, unsigned long lavg_load,
		unsigned long lload_per_task, unsigned long sds_avg_load,
		unsigned long imbalance),

	TP_ARGS(busiest, bgroup_type, bavg_load, bload_per_task, local,
		lgroup_type, lavg_load, lload_per_task, sds_avg_load,
		imbalance),

	TP_STRUCT__entry(
		__field(unsigned long,		busiest)
		__field(int,			bgp_type)
		__field(unsigned long,		bavg_load)
		__field(unsigned long,		blpt)
		__field(unsigned long,		local)
		__field(int,			lgp_type)
		__field(unsigned long,		lavg_load)
		__field(unsigned long,		llpt)
		__field(unsigned long,		sds_avg)
		__field(unsigned long,		imbalance)
	),

	TP_fast_assign(
		__entry->busiest			= busiest;
		__entry->bgp_type			= bgroup_type;
		__entry->bavg_load			= bavg_load;
		__entry->blpt				= bload_per_task;
		__entry->bgp_type			= bgroup_type;
		__entry->local				= local;
		__entry->lgp_type			= lgroup_type;
		__entry->lavg_load			= lavg_load;
		__entry->llpt				= lload_per_task;
		__entry->sds_avg			= sds_avg_load;
		__entry->imbalance			= imbalance;
	),

	TP_printk("busiest_group=%#lx busiest_type=%d busiest_avg_load=%ld busiest_lpt=%ld local_group=%#lx local_type=%d local_avg_load=%ld local_lpt=%ld domain_avg_load=%ld imbalance=%ld",
		__entry->busiest, __entry->bgp_type, __entry->bavg_load,
		__entry->blpt, __entry->local, __entry->lgp_type,
		__entry->lavg_load, __entry->llpt, __entry->sds_avg,
		__entry->imbalance)
);
#endif /* NR_CPUS > BITS_PER_LONG */
#endif /* CONFIG_SMP */

#ifdef CONFIG_SCHED_WALT
TRACE_EVENT(sched_load_balance_skip_tasks,

	TP_PROTO(int scpu, int dcpu, int grp_type, int pid,
		unsigned long h_load, unsigned long task_util,
		unsigned long affinity),

	TP_ARGS(scpu, dcpu, grp_type, pid, h_load, task_util, affinity),

	TP_STRUCT__entry(
		__field(int,            scpu)
		__field(unsigned long,  src_util_cum)
		__field(int,            grp_type)
		__field(int,            dcpu)
		__field(unsigned long,  dst_util_cum)
		__field(int,            pid)
		__field(unsigned long,  affinity)
		__field(unsigned long,  task_util)
		__field(unsigned long,  h_load)
	),

	TP_fast_assign(
		__entry->scpu           = scpu;
		__entry->src_util_cum   =
					cpu_rq(scpu)->cum_window_demand_scaled;
		__entry->grp_type       = grp_type;
		__entry->dcpu           = dcpu;
		__entry->dst_util_cum   =
					cpu_rq(dcpu)->cum_window_demand_scaled;
		__entry->pid            = pid;
		__entry->affinity       = affinity;
		__entry->task_util      = task_util;
		__entry->h_load         = h_load;
	),

	TP_printk("source_cpu=%d util_cum=%lu group_type=%d dest_cpu=%d util_cum=%lu pid=%d affinity=%#lx task_util=%lu task_h_load=%lu",
		__entry->scpu, __entry->src_util_cum, __entry->grp_type,
		__entry->dcpu, __entry->dst_util_cum, __entry->pid,
		__entry->affinity, __entry->task_util, __entry->h_load)
);
#endif

DECLARE_EVENT_CLASS(sched_process_template,

	TP_PROTO(struct task_struct *p),
@@ -671,6 +899,238 @@ DECLARE_TRACE(sched_overutilized_tp,
	TP_PROTO(struct root_domain *rd, bool overutilized),
	TP_ARGS(rd, overutilized));

TRACE_EVENT(sched_cpu_util,

	TP_PROTO(int cpu),

	TP_ARGS(cpu),

	TP_STRUCT__entry(
		__field(unsigned int,	cpu)
		__field(unsigned int,	nr_running)
		__field(long,		cpu_util)
		__field(long,		cpu_util_cum)
		__field(unsigned int,	capacity_curr)
		__field(unsigned int,	capacity)
		__field(unsigned int,	capacity_orig)
		__field(int,		idle_state)
		__field(u64,		irqload)
		__field(int,		online)
		__field(int,		isolated)
		__field(int,		reserved)
		__field(int,		high_irq_load)
	),

	TP_fast_assign(
		__entry->cpu                = cpu;
		__entry->nr_running         = cpu_rq(cpu)->nr_running;
		__entry->cpu_util           = cpu_util(cpu);
		__entry->cpu_util_cum       = cpu_util_cum(cpu, 0);
		__entry->capacity_curr      = capacity_curr_of(cpu);
		__entry->capacity           = capacity_of(cpu);
		__entry->capacity_orig      = capacity_orig_of(cpu);
		__entry->idle_state         = idle_get_state_idx(cpu_rq(cpu));
		__entry->irqload            = sched_irqload(cpu);
		__entry->online             = cpu_online(cpu);
		__entry->isolated           = cpu_isolated(cpu);
		__entry->reserved           = is_reserved(cpu);
		__entry->high_irq_load      = sched_cpu_high_irqload(cpu);
	),

	TP_printk("cpu=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld capacity_curr=%u capacity=%u capacity_orig=%u idle_state=%d irqload=%llu online=%u, isolated=%u, reserved=%u, high_irq_load=%u",
		__entry->cpu, __entry->nr_running, __entry->cpu_util,
		__entry->cpu_util_cum, __entry->capacity_curr,
		__entry->capacity, __entry->capacity_orig,
		__entry->idle_state, __entry->irqload, __entry->online,
		__entry->isolated, __entry->reserved, __entry->high_irq_load)
);

TRACE_EVENT(sched_compute_energy,

	TP_PROTO(struct task_struct *p, int eval_cpu,
		unsigned long eval_energy,
		unsigned long prev_energy,
		unsigned long best_energy,
		unsigned long best_energy_cpu),

	TP_ARGS(p, eval_cpu, eval_energy, prev_energy, best_energy,
		best_energy_cpu),

	TP_STRUCT__entry(
		__field(int,            pid)
		__array(char,           comm, TASK_COMM_LEN)
		__field(unsigned long,  util)
		__field(int,            prev_cpu)
		__field(unsigned long,  prev_energy)
		__field(int,            eval_cpu)
		__field(unsigned long,  eval_energy)
		__field(int,            best_energy_cpu)
		__field(unsigned long,  best_energy)
	),

	TP_fast_assign(
		__entry->pid                    = p->pid;
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->util                   = task_util(p);
		__entry->prev_cpu               = task_cpu(p);
		__entry->prev_energy            = prev_energy;
		__entry->eval_cpu               = eval_cpu;
		__entry->eval_energy            = eval_energy;
		__entry->best_energy_cpu        = best_energy_cpu;
		__entry->best_energy            = best_energy;
	),

	TP_printk("pid=%d comm=%s util=%lu prev_cpu=%d prev_energy=%llu eval_cpu=%d eval_energy=%llu best_energy_cpu=%d best_energy=%llu",
		__entry->pid, __entry->comm, __entry->util, __entry->prev_cpu,
		__entry->prev_energy, __entry->eval_cpu, __entry->eval_energy,
		__entry->best_energy_cpu, __entry->best_energy)
)

TRACE_EVENT(sched_task_util,

	TP_PROTO(struct task_struct *p, unsigned long candidates,
		int best_energy_cpu, bool sync, bool need_idle, int fastpath,
		bool placement_boost, u64 start_t,
		bool stune_boosted, bool is_rtg, bool rtg_skip_min,
		int start_cpu),

	TP_ARGS(p, candidates, best_energy_cpu, sync, need_idle, fastpath,
		placement_boost, start_t, stune_boosted, is_rtg, rtg_skip_min,
		start_cpu),

	TP_STRUCT__entry(
		__field(int,            pid)
		__array(char,           comm, TASK_COMM_LEN)
		__field(unsigned long,  util)
		__field(unsigned long,  candidates)
		__field(int,            prev_cpu)
		__field(int,            best_energy_cpu)
		__field(bool,           sync)
		__field(bool,           need_idle)
		__field(int,            fastpath)
		__field(int,            placement_boost)
		__field(int,            rtg_cpu)
		__field(u64,            latency)
		__field(bool,           stune_boosted)
		__field(bool,           is_rtg)
		__field(bool,           rtg_skip_min)
		__field(int,            start_cpu)
		__field(int,            unfilter)
		__field(unsigned long,	cpus_allowed)
	),

	TP_fast_assign(
		__entry->pid                    = p->pid;
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->util                   = task_util(p);
		__entry->prev_cpu               = task_cpu(p);
		__entry->candidates             = candidates;
		__entry->best_energy_cpu        = best_energy_cpu;
		__entry->sync                   = sync;
		__entry->need_idle              = need_idle;
		__entry->fastpath               = fastpath;
		__entry->placement_boost        = placement_boost;
		__entry->latency                = (sched_clock() - start_t);
		__entry->stune_boosted          = stune_boosted;
		__entry->is_rtg                 = is_rtg;
		__entry->rtg_skip_min           = rtg_skip_min;
		__entry->start_cpu              = start_cpu;
#ifdef CONFIG_SCHED_WALT
		__entry->unfilter               = p->unfilter;
#else
		__entry->unfilter               = 0;
#endif
		__entry->cpus_allowed		=
					cpumask_bits(&p->cpus_mask)[0];
	),

	TP_printk("pid=%d comm=%s util=%lu prev_cpu=%d candidates=%#lx best_energy_cpu=%d sync=%d need_idle=%d fastpath=%d placement_boost=%d latency=%llu stune_boosted=%d is_rtg=%d rtg_skip_min=%d start_cpu=%d unfilter=%d affinity=%lx",
		__entry->pid, __entry->comm, __entry->util, __entry->prev_cpu,
		__entry->candidates, __entry->best_energy_cpu, __entry->sync,
		__entry->need_idle, __entry->fastpath, __entry->placement_boost,
		__entry->latency, __entry->stune_boosted,
		__entry->is_rtg, __entry->rtg_skip_min, __entry->start_cpu,
		__entry->unfilter, __entry->cpus_allowed)
)

/*
 * Tracepoint for find_best_target
 */
TRACE_EVENT(sched_find_best_target,

	TP_PROTO(struct task_struct *tsk,
		 unsigned long min_util, int start_cpu,
		 int best_idle, int best_active, int most_spare_cap,
		 int target, int backup),

	TP_ARGS(tsk, min_util, start_cpu,
		best_idle, best_active, most_spare_cap,
		target, backup),

	TP_STRUCT__entry(
		__array(char,		comm, TASK_COMM_LEN)
		__field(pid_t,		pid)
		__field(unsigned long,	min_util)
		__field(int,		start_cpu)
		__field(int,		best_idle)
		__field(int,		best_active)
		__field(int,		most_spare_cap)
		__field(int,		target)
		__field(int,		backup)
		),

	TP_fast_assign(
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
		__entry->pid            = tsk->pid;
		__entry->min_util       = min_util;
		__entry->start_cpu      = start_cpu;
		__entry->best_idle      = best_idle;
		__entry->best_active    = best_active;
		__entry->most_spare_cap = most_spare_cap;
		__entry->target         = target;
		__entry->backup         = backup;
		),

	TP_printk("pid=%d comm=%s start_cpu=%d best_idle=%d best_active=%d most_spare_cap=%d target=%d backup=%d",
		  __entry->pid, __entry->comm,
		  __entry->start_cpu,
		  __entry->best_idle, __entry->best_active,
		  __entry->most_spare_cap,
		  __entry->target, __entry->backup)
);

TRACE_EVENT(sched_preempt_disable,

	TP_PROTO(u64 delta, bool irqs_disabled,
			unsigned long caddr0, unsigned long caddr1,
			unsigned long caddr2, unsigned long caddr3),

	TP_ARGS(delta, irqs_disabled, caddr0, caddr1, caddr2, caddr3),

	TP_STRUCT__entry(
		__field(u64, delta)
		__field(bool, irqs_disabled)
		__field(void*, caddr0)
		__field(void*, caddr1)
		__field(void*, caddr2)
		__field(void*, caddr3)
	),

	TP_fast_assign(
		__entry->delta = delta;
		__entry->irqs_disabled = irqs_disabled;
		__entry->caddr0 = (void *)caddr0;
		__entry->caddr1 = (void *)caddr1;
		__entry->caddr2 = (void *)caddr2;
		__entry->caddr3 = (void *)caddr3;
	),

	TP_printk("delta=%llu(ns) irqs_d=%d Callers:(%ps<-%ps<-%ps<-%ps)",
				__entry->delta, __entry->irqs_disabled,
				__entry->caddr0, __entry->caddr1,
				__entry->caddr2, __entry->caddr3)
);

#endif /* _TRACE_SCHED_H */

/* This part must be outside protection */
+56 −1
Original line number Diff line number Diff line
@@ -3652,6 +3652,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
	return ns;
}

unsigned int capacity_margin_freq = 1280; /* ~20% margin */

/*
 * This function gets called by the timer code, with HZ frequency.
 * We call it with interrupts disabled.
@@ -3847,17 +3849,55 @@ static inline void sched_tick_stop(int cpu) { }

#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
/*
 * preemptoff stack tracing threshold in ns.
 * default: 1ms
 */
unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000UL;

struct preempt_store {
	u64 ts;
	unsigned long caddr[4];
	bool irqs_disabled;
};

DEFINE_PER_CPU(struct preempt_store, the_ps);

/*
 * This is only called from __schedule() upon context switch.
 *
 * schedule() calls __schedule() with preemption disabled.
 * if we had entered idle and exiting idle now, reset the preemption
 * tracking otherwise we may think preemption is disabled the whole time
 * when the non idle task re-enables the preemption in schedule().
 */
static inline void preempt_latency_reset(void)
{
	if (is_idle_task(this_rq()->curr))
		this_cpu_ptr(&the_ps)->ts = 0;
}

/*
 * If the value passed in is equal to the current preempt count
 * then we just disabled preemption. Start timing the latency.
 */
static inline void preempt_latency_start(int val)
{
	int cpu = raw_smp_processor_id();
	struct preempt_store *ps = &per_cpu(the_ps, cpu);

	if (preempt_count() == val) {
		unsigned long ip = get_lock_parent_ip();
#ifdef CONFIG_DEBUG_PREEMPT
		current->preempt_disable_ip = ip;
#endif
		ps->ts = sched_clock();
		ps->caddr[0] = CALLER_ADDR0;
		ps->caddr[1] = CALLER_ADDR1;
		ps->caddr[2] = CALLER_ADDR2;
		ps->caddr[3] = CALLER_ADDR3;
		ps->irqs_disabled = irqs_disabled();

		trace_preempt_off(CALLER_ADDR0, ip);
	}
}
@@ -3890,9 +3930,22 @@ NOKPROBE_SYMBOL(preempt_count_add);
 */
static inline void preempt_latency_stop(int val)
{
	if (preempt_count() == val)
	if (preempt_count() == val) {
		struct preempt_store *ps = &per_cpu(the_ps,
				raw_smp_processor_id());
		u64 delta = ps->ts ? (sched_clock() - ps->ts) : 0;

		/*
		 * Trace preempt disable stack if preemption
		 * is disabled for more than the threshold.
		 */
		if (delta > sysctl_preemptoff_tracing_threshold_ns)
			trace_sched_preempt_disable(delta, ps->irqs_disabled,
						ps->caddr[0], ps->caddr[1],
						ps->caddr[2], ps->caddr[3]);
		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
	}
}

void preempt_count_sub(int val)
{
@@ -3919,6 +3972,7 @@ NOKPROBE_SYMBOL(preempt_count_sub);
#else
static inline void preempt_latency_start(int val) { }
static inline void preempt_latency_stop(int val) { }
static inline void preempt_latency_reset(void) { }
#endif

static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
@@ -4153,6 +4207,7 @@ static void __sched notrace __schedule(bool preempt)
			prev->last_sleep_ts = wallclock;
#endif

		preempt_latency_reset();
		walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
		walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
		rq->nr_switches++;
Loading