Loading include/trace/events/sched.h +111 −4 Original line number Diff line number Diff line Loading @@ -257,10 +257,10 @@ TRACE_EVENT(sched_load_balance, TP_PROTO(int cpu, enum cpu_idle_type idle, int balance, unsigned long group_mask, int busiest_nr_running, unsigned long imbalance, unsigned int env_flags, int ld_moved, unsigned int balance_interval), unsigned int balance_interval, int active_balance), TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running, imbalance, env_flags, ld_moved, balance_interval), imbalance, env_flags, ld_moved, balance_interval, active_balance), TP_STRUCT__entry( __field( int, cpu) Loading @@ -272,6 +272,7 @@ TRACE_EVENT(sched_load_balance, __field( unsigned int, env_flags) __field( int, ld_moved) __field( unsigned int, balance_interval) __field( int, active_balance) ), TP_fast_assign( Loading @@ -284,16 +285,122 @@ TRACE_EVENT(sched_load_balance, __entry->env_flags = env_flags; __entry->ld_moved = ld_moved; __entry->balance_interval = balance_interval; __entry->active_balance = active_balance; ), TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d", TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d", __entry->cpu, __entry->idle == CPU_IDLE ? "idle" : (__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"), __entry->balance, __entry->group_mask, __entry->busiest_nr_running, __entry->imbalance, __entry->env_flags, __entry->ld_moved, __entry->balance_interval) __entry->balance_interval, __entry->active_balance) ); TRACE_EVENT(sched_load_balance_nohz_kick, TP_PROTO(int cpu, int kick_cpu), TP_ARGS(cpu, kick_cpu), TP_STRUCT__entry( __field(int, cpu ) __field(unsigned int, cpu_nr ) __field(unsigned long, misfit_task_load ) __field(int, cpu_overutil ) __field(int, kick_cpu ) __field(unsigned long, nohz_flags ) ), TP_fast_assign( __entry->cpu = cpu; __entry->cpu_nr = cpu_rq(cpu)->nr_running; __entry->misfit_task_load = cpu_rq(cpu)->misfit_task_load; __entry->cpu_overutil = cpu_overutilized(cpu); __entry->kick_cpu = kick_cpu; __entry->nohz_flags = *nohz_flags(kick_cpu); ), TP_printk("cpu=%d nr_run=%u misfit_task_load=%lu overutilized=%d kick_cpu=%d nohz_flags=0x%lx", __entry->cpu, __entry->cpu_nr, __entry->misfit_task_load, __entry->cpu_overutil, __entry->kick_cpu, __entry->nohz_flags) ); TRACE_EVENT(sched_load_balance_sg_stats, TP_PROTO(unsigned long sg_cpus, int group_type, unsigned int idle_cpus, unsigned int sum_nr_running, unsigned long group_load, unsigned long group_capacity, unsigned long group_util, int group_no_capacity, unsigned long load_per_task, unsigned long misfit_load, unsigned long busiest), TP_ARGS(sg_cpus, group_type, idle_cpus, sum_nr_running, group_load, group_capacity, group_util, group_no_capacity, load_per_task, misfit_load, busiest), TP_STRUCT__entry( __field(unsigned long, group_mask ) __field(int, group_type ) __field(unsigned int, group_idle_cpus ) __field(unsigned int, sum_nr_running ) __field(unsigned long, group_load ) __field(unsigned long, group_capacity ) __field(unsigned long, group_util ) __field(int, group_no_capacity ) __field(unsigned long, load_per_task ) __field(unsigned long, misfit_task_load ) __field(unsigned long, busiest ) ), TP_fast_assign( __entry->group_mask = sg_cpus; __entry->group_type = group_type; __entry->group_idle_cpus = idle_cpus; __entry->sum_nr_running = sum_nr_running; __entry->group_load = group_load; __entry->group_capacity = group_capacity; __entry->group_util = group_util; __entry->group_no_capacity = group_no_capacity; __entry->load_per_task = load_per_task; __entry->misfit_task_load = misfit_load; __entry->busiest = busiest; ), TP_printk("sched_group=%#lx type=%d idle_cpus=%u sum_nr_run=%u group_load=%lu capacity=%lu util=%lu no_capacity=%d lpt=%lu misfit_tload=%lu busiest_group=%#lx", __entry->group_mask, __entry->group_type, __entry->group_idle_cpus, __entry->sum_nr_running, __entry->group_load, __entry->group_capacity, __entry->group_util, __entry->group_no_capacity, __entry->load_per_task, __entry->misfit_task_load, __entry->busiest) ); TRACE_EVENT(sched_load_balance_stats, TP_PROTO(unsigned long busiest, int bgroup_type, unsigned long bavg_load, unsigned long bload_per_task, unsigned long local, int lgroup_type, unsigned long lavg_load, unsigned long lload_per_task, unsigned long sds_avg_load, unsigned long imbalance), TP_ARGS(busiest, bgroup_type, bavg_load, bload_per_task, local, lgroup_type, lavg_load, lload_per_task, sds_avg_load, imbalance), TP_STRUCT__entry( __field(unsigned long, busiest ) __field(int, bgp_type ) __field(unsigned long, bavg_load ) __field(unsigned long, blpt ) __field(unsigned long, local ) __field(int, lgp_type ) __field(unsigned long, lavg_load ) __field(unsigned long, llpt ) __field(unsigned long, sds_avg ) __field(unsigned long, imbalance ) ), TP_fast_assign( __entry->busiest = busiest; __entry->bgp_type = bgroup_type; __entry->bavg_load = bavg_load; __entry->blpt = bload_per_task; __entry->bgp_type = bgroup_type; __entry->local = local; __entry->lgp_type = lgroup_type; __entry->lavg_load = lavg_load; __entry->llpt = lload_per_task; __entry->sds_avg = sds_avg_load; __entry->imbalance = imbalance; ), TP_printk("busiest_group=%#lx busiest_type=%d busiest_avg_load=%ld busiest_lpt=%ld local_group=%#lx local_type=%d local_avg_load=%ld local_lpt=%ld domain_avg_load=%ld imbalance=%ld", __entry->busiest, __entry->bgp_type, __entry->bavg_load, __entry->blpt, __entry->local, __entry->lgp_type, __entry->lavg_load, __entry->llpt, __entry->sds_avg, __entry->imbalance) ); DECLARE_EVENT_CLASS(sched_process_template, Loading include/trace/events/walt.h +34 −0 Original line number Diff line number Diff line Loading @@ -442,6 +442,40 @@ TRACE_EVENT(sched_set_boost, TP_printk("type %d", __entry->type) ); TRACE_EVENT(sched_load_balance_skip_tasks, TP_PROTO(int scpu, int dcpu, int grp_type, int pid, unsigned long h_load, unsigned long task_util, unsigned long affinity), TP_ARGS(scpu, dcpu, grp_type, pid, h_load, task_util, affinity), TP_STRUCT__entry( __field(int, scpu ) __field(unsigned long, src_util_cum ) __field(int, grp_type ) __field(int, dcpu ) __field(unsigned long, dst_util_cum ) __field(int, pid ) __field(unsigned long, affinity ) __field(unsigned long, task_util ) __field(unsigned long, h_load ) ), TP_fast_assign( __entry->scpu = scpu; __entry->src_util_cum = cpu_rq(scpu)->cum_window_demand_scaled; __entry->grp_type = grp_type; __entry->dcpu = dcpu; __entry->dst_util_cum = cpu_rq(dcpu)->cum_window_demand_scaled; __entry->pid = pid; __entry->affinity = affinity; __entry->task_util = task_util; __entry->h_load = h_load; ), TP_printk("source_cpu=%d util_cum=%lu group_type=%d dest_cpu=%d util_cum=%lu pid=%d affinity=%#lx task_util=%lu task_h_load=%lu", __entry->scpu, __entry->src_util_cum, __entry->grp_type, __entry->dcpu, __entry->dst_util_cum, __entry->pid, __entry->affinity, __entry->task_util, __entry->h_load) ); DECLARE_EVENT_CLASS(sched_cpu_load, TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost), Loading kernel/sched/fair.c +19 −2 Original line number Diff line number Diff line Loading @@ -9152,7 +9152,7 @@ static int detach_tasks(struct lb_env *env) { struct list_head *tasks = &env->src_rq->cfs_tasks; struct task_struct *p; unsigned long load; unsigned long load = 0; int detached = 0; int orig_loop = env->loop; Loading Loading @@ -9226,6 +9226,9 @@ static int detach_tasks(struct lb_env *env) continue; next: trace_sched_load_balance_skip_tasks(env->src_cpu, env->dst_cpu, env->src_grp_type, p->pid, load, task_util(p), cpumask_bits(&p->cpus_allowed)[0]); list_move_tail(&p->se.group_node, tasks); } Loading Loading @@ -10146,6 +10149,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sds->total_capacity += sgs->group_capacity; sds->total_util += sgs->group_util; trace_sched_load_balance_sg_stats(sg->cpumask[0], sgs->group_type, sgs->idle_cpus, sgs->sum_nr_running, sgs->group_load, sgs->group_capacity, sgs->group_util, sgs->group_no_capacity, sgs->load_per_task, sgs->group_misfit_task_load, sds->busiest ? sds->busiest->cpumask[0] : 0); sg = sg->next; } while (sg != env->sd->groups); Loading Loading @@ -10573,6 +10584,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* Looks like there is an imbalance. Compute it */ env->src_grp_type = busiest->group_type; calculate_imbalance(env, &sds); trace_sched_load_balance_stats(sds.busiest->cpumask[0], busiest->group_type, busiest->avg_load, busiest->load_per_task, sds.local->cpumask[0], local->group_type, local->avg_load, local->load_per_task, sds.avg_load, env->imbalance); return sds.busiest; out_balanced: Loading Loading @@ -11078,7 +11094,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, group ? group->cpumask[0] : 0, busiest ? busiest->nr_running : 0, env.imbalance, env.flags, ld_moved, sd->balance_interval); sd->balance_interval, active_balance); return ld_moved; } Loading Loading @@ -11460,6 +11476,7 @@ static void nohz_balancer_kick(bool only_update) * is idle. And the softirq performing nohz idle load balance * will be run before returning from the IPI. */ trace_sched_load_balance_nohz_kick(smp_processor_id(), ilb_cpu); smp_send_reschedule(ilb_cpu); return; } Loading Loading
include/trace/events/sched.h +111 −4 Original line number Diff line number Diff line Loading @@ -257,10 +257,10 @@ TRACE_EVENT(sched_load_balance, TP_PROTO(int cpu, enum cpu_idle_type idle, int balance, unsigned long group_mask, int busiest_nr_running, unsigned long imbalance, unsigned int env_flags, int ld_moved, unsigned int balance_interval), unsigned int balance_interval, int active_balance), TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running, imbalance, env_flags, ld_moved, balance_interval), imbalance, env_flags, ld_moved, balance_interval, active_balance), TP_STRUCT__entry( __field( int, cpu) Loading @@ -272,6 +272,7 @@ TRACE_EVENT(sched_load_balance, __field( unsigned int, env_flags) __field( int, ld_moved) __field( unsigned int, balance_interval) __field( int, active_balance) ), TP_fast_assign( Loading @@ -284,16 +285,122 @@ TRACE_EVENT(sched_load_balance, __entry->env_flags = env_flags; __entry->ld_moved = ld_moved; __entry->balance_interval = balance_interval; __entry->active_balance = active_balance; ), TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d", TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d", __entry->cpu, __entry->idle == CPU_IDLE ? "idle" : (__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"), __entry->balance, __entry->group_mask, __entry->busiest_nr_running, __entry->imbalance, __entry->env_flags, __entry->ld_moved, __entry->balance_interval) __entry->balance_interval, __entry->active_balance) ); TRACE_EVENT(sched_load_balance_nohz_kick, TP_PROTO(int cpu, int kick_cpu), TP_ARGS(cpu, kick_cpu), TP_STRUCT__entry( __field(int, cpu ) __field(unsigned int, cpu_nr ) __field(unsigned long, misfit_task_load ) __field(int, cpu_overutil ) __field(int, kick_cpu ) __field(unsigned long, nohz_flags ) ), TP_fast_assign( __entry->cpu = cpu; __entry->cpu_nr = cpu_rq(cpu)->nr_running; __entry->misfit_task_load = cpu_rq(cpu)->misfit_task_load; __entry->cpu_overutil = cpu_overutilized(cpu); __entry->kick_cpu = kick_cpu; __entry->nohz_flags = *nohz_flags(kick_cpu); ), TP_printk("cpu=%d nr_run=%u misfit_task_load=%lu overutilized=%d kick_cpu=%d nohz_flags=0x%lx", __entry->cpu, __entry->cpu_nr, __entry->misfit_task_load, __entry->cpu_overutil, __entry->kick_cpu, __entry->nohz_flags) ); TRACE_EVENT(sched_load_balance_sg_stats, TP_PROTO(unsigned long sg_cpus, int group_type, unsigned int idle_cpus, unsigned int sum_nr_running, unsigned long group_load, unsigned long group_capacity, unsigned long group_util, int group_no_capacity, unsigned long load_per_task, unsigned long misfit_load, unsigned long busiest), TP_ARGS(sg_cpus, group_type, idle_cpus, sum_nr_running, group_load, group_capacity, group_util, group_no_capacity, load_per_task, misfit_load, busiest), TP_STRUCT__entry( __field(unsigned long, group_mask ) __field(int, group_type ) __field(unsigned int, group_idle_cpus ) __field(unsigned int, sum_nr_running ) __field(unsigned long, group_load ) __field(unsigned long, group_capacity ) __field(unsigned long, group_util ) __field(int, group_no_capacity ) __field(unsigned long, load_per_task ) __field(unsigned long, misfit_task_load ) __field(unsigned long, busiest ) ), TP_fast_assign( __entry->group_mask = sg_cpus; __entry->group_type = group_type; __entry->group_idle_cpus = idle_cpus; __entry->sum_nr_running = sum_nr_running; __entry->group_load = group_load; __entry->group_capacity = group_capacity; __entry->group_util = group_util; __entry->group_no_capacity = group_no_capacity; __entry->load_per_task = load_per_task; __entry->misfit_task_load = misfit_load; __entry->busiest = busiest; ), TP_printk("sched_group=%#lx type=%d idle_cpus=%u sum_nr_run=%u group_load=%lu capacity=%lu util=%lu no_capacity=%d lpt=%lu misfit_tload=%lu busiest_group=%#lx", __entry->group_mask, __entry->group_type, __entry->group_idle_cpus, __entry->sum_nr_running, __entry->group_load, __entry->group_capacity, __entry->group_util, __entry->group_no_capacity, __entry->load_per_task, __entry->misfit_task_load, __entry->busiest) ); TRACE_EVENT(sched_load_balance_stats, TP_PROTO(unsigned long busiest, int bgroup_type, unsigned long bavg_load, unsigned long bload_per_task, unsigned long local, int lgroup_type, unsigned long lavg_load, unsigned long lload_per_task, unsigned long sds_avg_load, unsigned long imbalance), TP_ARGS(busiest, bgroup_type, bavg_load, bload_per_task, local, lgroup_type, lavg_load, lload_per_task, sds_avg_load, imbalance), TP_STRUCT__entry( __field(unsigned long, busiest ) __field(int, bgp_type ) __field(unsigned long, bavg_load ) __field(unsigned long, blpt ) __field(unsigned long, local ) __field(int, lgp_type ) __field(unsigned long, lavg_load ) __field(unsigned long, llpt ) __field(unsigned long, sds_avg ) __field(unsigned long, imbalance ) ), TP_fast_assign( __entry->busiest = busiest; __entry->bgp_type = bgroup_type; __entry->bavg_load = bavg_load; __entry->blpt = bload_per_task; __entry->bgp_type = bgroup_type; __entry->local = local; __entry->lgp_type = lgroup_type; __entry->lavg_load = lavg_load; __entry->llpt = lload_per_task; __entry->sds_avg = sds_avg_load; __entry->imbalance = imbalance; ), TP_printk("busiest_group=%#lx busiest_type=%d busiest_avg_load=%ld busiest_lpt=%ld local_group=%#lx local_type=%d local_avg_load=%ld local_lpt=%ld domain_avg_load=%ld imbalance=%ld", __entry->busiest, __entry->bgp_type, __entry->bavg_load, __entry->blpt, __entry->local, __entry->lgp_type, __entry->lavg_load, __entry->llpt, __entry->sds_avg, __entry->imbalance) ); DECLARE_EVENT_CLASS(sched_process_template, Loading
include/trace/events/walt.h +34 −0 Original line number Diff line number Diff line Loading @@ -442,6 +442,40 @@ TRACE_EVENT(sched_set_boost, TP_printk("type %d", __entry->type) ); TRACE_EVENT(sched_load_balance_skip_tasks, TP_PROTO(int scpu, int dcpu, int grp_type, int pid, unsigned long h_load, unsigned long task_util, unsigned long affinity), TP_ARGS(scpu, dcpu, grp_type, pid, h_load, task_util, affinity), TP_STRUCT__entry( __field(int, scpu ) __field(unsigned long, src_util_cum ) __field(int, grp_type ) __field(int, dcpu ) __field(unsigned long, dst_util_cum ) __field(int, pid ) __field(unsigned long, affinity ) __field(unsigned long, task_util ) __field(unsigned long, h_load ) ), TP_fast_assign( __entry->scpu = scpu; __entry->src_util_cum = cpu_rq(scpu)->cum_window_demand_scaled; __entry->grp_type = grp_type; __entry->dcpu = dcpu; __entry->dst_util_cum = cpu_rq(dcpu)->cum_window_demand_scaled; __entry->pid = pid; __entry->affinity = affinity; __entry->task_util = task_util; __entry->h_load = h_load; ), TP_printk("source_cpu=%d util_cum=%lu group_type=%d dest_cpu=%d util_cum=%lu pid=%d affinity=%#lx task_util=%lu task_h_load=%lu", __entry->scpu, __entry->src_util_cum, __entry->grp_type, __entry->dcpu, __entry->dst_util_cum, __entry->pid, __entry->affinity, __entry->task_util, __entry->h_load) ); DECLARE_EVENT_CLASS(sched_cpu_load, TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost), Loading
kernel/sched/fair.c +19 −2 Original line number Diff line number Diff line Loading @@ -9152,7 +9152,7 @@ static int detach_tasks(struct lb_env *env) { struct list_head *tasks = &env->src_rq->cfs_tasks; struct task_struct *p; unsigned long load; unsigned long load = 0; int detached = 0; int orig_loop = env->loop; Loading Loading @@ -9226,6 +9226,9 @@ static int detach_tasks(struct lb_env *env) continue; next: trace_sched_load_balance_skip_tasks(env->src_cpu, env->dst_cpu, env->src_grp_type, p->pid, load, task_util(p), cpumask_bits(&p->cpus_allowed)[0]); list_move_tail(&p->se.group_node, tasks); } Loading Loading @@ -10146,6 +10149,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sds->total_capacity += sgs->group_capacity; sds->total_util += sgs->group_util; trace_sched_load_balance_sg_stats(sg->cpumask[0], sgs->group_type, sgs->idle_cpus, sgs->sum_nr_running, sgs->group_load, sgs->group_capacity, sgs->group_util, sgs->group_no_capacity, sgs->load_per_task, sgs->group_misfit_task_load, sds->busiest ? sds->busiest->cpumask[0] : 0); sg = sg->next; } while (sg != env->sd->groups); Loading Loading @@ -10573,6 +10584,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* Looks like there is an imbalance. Compute it */ env->src_grp_type = busiest->group_type; calculate_imbalance(env, &sds); trace_sched_load_balance_stats(sds.busiest->cpumask[0], busiest->group_type, busiest->avg_load, busiest->load_per_task, sds.local->cpumask[0], local->group_type, local->avg_load, local->load_per_task, sds.avg_load, env->imbalance); return sds.busiest; out_balanced: Loading Loading @@ -11078,7 +11094,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, group ? group->cpumask[0] : 0, busiest ? busiest->nr_running : 0, env.imbalance, env.flags, ld_moved, sd->balance_interval); sd->balance_interval, active_balance); return ld_moved; } Loading Loading @@ -11460,6 +11476,7 @@ static void nohz_balancer_kick(bool only_update) * is idle. And the softirq performing nohz idle load balance * will be run before returning from the IPI. */ trace_sched_load_balance_nohz_kick(smp_processor_id(), ilb_cpu); smp_send_reschedule(ilb_cpu); return; } Loading