Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dcbbf13d authored by Lingutla Chandrasekhar's avatar Lingutla Chandrasekhar
Browse files

Sched: Improve load balancer tracing support



Add new trace events to improve load balancer tracing support,
which helps to debug load balance issues.

Change-Id: I1d537c1441b4cbe8cbad4168c356b54dd6d45bbf
Signed-off-by: default avatarLingutla Chandrasekhar <clingutla@codeaurora.org>
parent 9ad222c2
Loading
Loading
Loading
Loading
+111 −4
Original line number Diff line number Diff line
@@ -257,10 +257,10 @@ TRACE_EVENT(sched_load_balance,
	TP_PROTO(int cpu, enum cpu_idle_type idle, int balance,
		unsigned long group_mask, int busiest_nr_running,
		unsigned long imbalance, unsigned int env_flags, int ld_moved,
		unsigned int balance_interval),
		unsigned int balance_interval, int active_balance),

	TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running,
		imbalance, env_flags, ld_moved, balance_interval),
		imbalance, env_flags, ld_moved, balance_interval, active_balance),

	TP_STRUCT__entry(
		__field(        int,                    cpu)
@@ -272,6 +272,7 @@ TRACE_EVENT(sched_load_balance,
		__field(        unsigned int,           env_flags)
		__field(        int,                    ld_moved)
		__field(        unsigned int,           balance_interval)
		__field(        int,                    active_balance)
	),

	TP_fast_assign(
@@ -284,16 +285,122 @@ TRACE_EVENT(sched_load_balance,
		__entry->env_flags              = env_flags;
		__entry->ld_moved               = ld_moved;
		__entry->balance_interval       = balance_interval;
		__entry->active_balance		= active_balance;
	),

	TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d",
	TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d",
		__entry->cpu,
		__entry->idle == CPU_IDLE ? "idle" :
		(__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"),
		__entry->balance,
		__entry->group_mask, __entry->busiest_nr_running,
		__entry->imbalance, __entry->env_flags, __entry->ld_moved,
		__entry->balance_interval)
		__entry->balance_interval, __entry->active_balance)
);

TRACE_EVENT(sched_load_balance_nohz_kick,

	TP_PROTO(int cpu, int kick_cpu),

	TP_ARGS(cpu, kick_cpu),

	TP_STRUCT__entry(
		__field(int,		cpu			)
		__field(unsigned int,	cpu_nr			)
		__field(unsigned long,	misfit_task_load	)
		__field(int,		cpu_overutil		)
		__field(int,		kick_cpu		)
		__field(unsigned long,	nohz_flags		)
	),

	TP_fast_assign(
		__entry->cpu			= cpu;
		__entry->cpu_nr			= cpu_rq(cpu)->nr_running;
		__entry->misfit_task_load	= cpu_rq(cpu)->misfit_task_load;
		__entry->cpu_overutil		= cpu_overutilized(cpu);
		__entry->kick_cpu		= kick_cpu;
		__entry->nohz_flags		= *nohz_flags(kick_cpu);
	),

	TP_printk("cpu=%d nr_run=%u misfit_task_load=%lu overutilized=%d kick_cpu=%d nohz_flags=0x%lx",
			__entry->cpu, __entry->cpu_nr, __entry->misfit_task_load, __entry->cpu_overutil,
			__entry->kick_cpu, __entry->nohz_flags)

);

TRACE_EVENT(sched_load_balance_sg_stats,

	TP_PROTO(unsigned long sg_cpus, int group_type, unsigned int idle_cpus, unsigned int sum_nr_running, unsigned long group_load, unsigned long group_capacity, unsigned long group_util, int group_no_capacity, unsigned long load_per_task, unsigned long misfit_load, unsigned long busiest),

	TP_ARGS(sg_cpus, group_type, idle_cpus, sum_nr_running, group_load, group_capacity, group_util, group_no_capacity, load_per_task, misfit_load, busiest),

	TP_STRUCT__entry(
		__field(unsigned long,		group_mask		)
		__field(int,			group_type		)
		__field(unsigned int,		group_idle_cpus		)
		__field(unsigned int,		sum_nr_running		)
		__field(unsigned long,		group_load		)
		__field(unsigned long,		group_capacity		)
		__field(unsigned long,		group_util		)
		__field(int,			group_no_capacity	)
		__field(unsigned long,		load_per_task		)
		__field(unsigned long,		misfit_task_load	)
		__field(unsigned long,		busiest			)
	),

	TP_fast_assign(
		__entry->group_mask			= sg_cpus;
		__entry->group_type			= group_type;
		__entry->group_idle_cpus		= idle_cpus;
		__entry->sum_nr_running			= sum_nr_running;
		__entry->group_load			= group_load;
		__entry->group_capacity			= group_capacity;
		__entry->group_util			= group_util;
		__entry->group_no_capacity		= group_no_capacity;
		__entry->load_per_task			= load_per_task;
		__entry->misfit_task_load		= misfit_load;
		__entry->busiest			= busiest;
	),

	TP_printk("sched_group=%#lx type=%d idle_cpus=%u sum_nr_run=%u group_load=%lu capacity=%lu util=%lu no_capacity=%d lpt=%lu misfit_tload=%lu busiest_group=%#lx",
		__entry->group_mask, __entry->group_type, __entry->group_idle_cpus, __entry->sum_nr_running, __entry->group_load, __entry->group_capacity, __entry->group_util, __entry->group_no_capacity, __entry->load_per_task, __entry->misfit_task_load, __entry->busiest)
);

TRACE_EVENT(sched_load_balance_stats,

	TP_PROTO(unsigned long busiest, int bgroup_type, unsigned long bavg_load, unsigned long bload_per_task, unsigned long local, int lgroup_type, unsigned long lavg_load, unsigned long lload_per_task, unsigned long sds_avg_load, unsigned long imbalance),

	TP_ARGS(busiest, bgroup_type, bavg_load, bload_per_task, local, lgroup_type, lavg_load, lload_per_task, sds_avg_load, imbalance),

	TP_STRUCT__entry(
		__field(unsigned long,		busiest			)
		__field(int,			bgp_type		)
		__field(unsigned long,		bavg_load		)
		__field(unsigned long,		blpt			)
		__field(unsigned long,		local			)
		__field(int,			lgp_type		)
		__field(unsigned long,		lavg_load		)
		__field(unsigned long,		llpt			)
		__field(unsigned long,		sds_avg			)
		__field(unsigned long,		imbalance		)
	),

	TP_fast_assign(
		__entry->busiest			= busiest;
		__entry->bgp_type			= bgroup_type;
		__entry->bavg_load			= bavg_load;
		__entry->blpt				= bload_per_task;
		__entry->bgp_type			= bgroup_type;
		__entry->local				= local;
		__entry->lgp_type			= lgroup_type;
		__entry->lavg_load			= lavg_load;
		__entry->llpt				= lload_per_task;
		__entry->sds_avg			= sds_avg_load;
		__entry->imbalance			= imbalance;
	),

	TP_printk("busiest_group=%#lx busiest_type=%d busiest_avg_load=%ld busiest_lpt=%ld local_group=%#lx local_type=%d local_avg_load=%ld local_lpt=%ld domain_avg_load=%ld imbalance=%ld",
		__entry->busiest, __entry->bgp_type, __entry->bavg_load, __entry->blpt, __entry->local, __entry->lgp_type, __entry->lavg_load, __entry->llpt, __entry->sds_avg, __entry->imbalance)
);

DECLARE_EVENT_CLASS(sched_process_template,
+34 −0
Original line number Diff line number Diff line
@@ -442,6 +442,40 @@ TRACE_EVENT(sched_set_boost,
	TP_printk("type %d", __entry->type)
);

TRACE_EVENT(sched_load_balance_skip_tasks,

	TP_PROTO(int scpu, int dcpu, int grp_type, int pid, unsigned long h_load, unsigned long task_util, unsigned long affinity),

	TP_ARGS(scpu, dcpu, grp_type, pid, h_load, task_util, affinity),

	TP_STRUCT__entry(
		__field(int,		scpu		)
		__field(unsigned long,	src_util_cum	)
		__field(int,		grp_type	)
		__field(int,		dcpu		)
		__field(unsigned long,	dst_util_cum	)
		__field(int,		pid		)
		__field(unsigned long,	affinity	)
		__field(unsigned long,	task_util	)
		__field(unsigned long,	h_load		)
	),

	TP_fast_assign(
		__entry->scpu			= scpu;
		__entry->src_util_cum		= cpu_rq(scpu)->cum_window_demand_scaled;
		__entry->grp_type		= grp_type;
		__entry->dcpu			= dcpu;
		__entry->dst_util_cum		= cpu_rq(dcpu)->cum_window_demand_scaled;
		__entry->pid			= pid;
		__entry->affinity		= affinity;
		__entry->task_util		= task_util;
		__entry->h_load			= h_load;
	),

	TP_printk("source_cpu=%d util_cum=%lu group_type=%d dest_cpu=%d util_cum=%lu pid=%d affinity=%#lx task_util=%lu task_h_load=%lu",
		__entry->scpu, __entry->src_util_cum, __entry->grp_type, __entry->dcpu, __entry->dst_util_cum, __entry->pid, __entry->affinity, __entry->task_util, __entry->h_load)
);

DECLARE_EVENT_CLASS(sched_cpu_load,

	TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost),
+19 −2
Original line number Diff line number Diff line
@@ -9151,7 +9151,7 @@ static int detach_tasks(struct lb_env *env)
{
	struct list_head *tasks = &env->src_rq->cfs_tasks;
	struct task_struct *p;
	unsigned long load;
	unsigned long load = 0;
	int detached = 0;
	int orig_loop = env->loop;

@@ -9225,6 +9225,9 @@ static int detach_tasks(struct lb_env *env)

		continue;
next:
		trace_sched_load_balance_skip_tasks(env->src_cpu, env->dst_cpu,
				env->src_grp_type, p->pid, load, task_util(p),
				cpumask_bits(&p->cpus_allowed)[0]);
		list_move_tail(&p->se.group_node, tasks);
	}

@@ -10146,6 +10149,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
		sds->total_capacity += sgs->group_capacity;
		sds->total_util += sgs->group_util;

		trace_sched_load_balance_sg_stats(sg->cpumask[0], sgs->group_type,
					sgs->idle_cpus, sgs->sum_nr_running,
					sgs->group_load, sgs->group_capacity,
					sgs->group_util, sgs->group_no_capacity,
					sgs->load_per_task,
					sgs->group_misfit_task_load,
					sds->busiest ? sds->busiest->cpumask[0] : 0);

		sg = sg->next;
	} while (sg != env->sd->groups);

@@ -10573,6 +10584,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
	/* Looks like there is an imbalance. Compute it */
	env->src_grp_type = busiest->group_type;
	calculate_imbalance(env, &sds);
	trace_sched_load_balance_stats(sds.busiest->cpumask[0], busiest->group_type,
				busiest->avg_load, busiest->load_per_task,
				sds.local->cpumask[0], local->group_type,
				local->avg_load, local->load_per_task,
				sds.avg_load, env->imbalance);
	return sds.busiest;

out_balanced:
@@ -11078,7 +11094,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
				 group ? group->cpumask[0] : 0,
				 busiest ? busiest->nr_running : 0,
				 env.imbalance, env.flags, ld_moved,
				 sd->balance_interval);
				 sd->balance_interval, active_balance);
	return ld_moved;
}

@@ -11460,6 +11476,7 @@ static void nohz_balancer_kick(bool only_update)
	 * is idle. And the softirq performing nohz idle load balance
	 * will be run before returning from the IPI.
	 */
	trace_sched_load_balance_nohz_kick(smp_processor_id(), ilb_cpu);
	smp_send_reschedule(ilb_cpu);
	return;
}