Loading Documentation/scheduler/sched-hmp.txt +17 −0 Original line number Diff line number Diff line Loading @@ -1220,6 +1220,23 @@ This tunable is a percentage. Configure the minimum demand of big sync waker task. Scheduler places small wakee tasks woken up by big sync waker on the waker's cluster. *** 7.19 sched_prefer_sync_wakee_to_waker Appears at: /proc/sys/kernel/sched_prefer_sync_wakee_to_waker Default value: 0 The default sync wakee policy has a preference to select an idle CPU in the waker cluster compared to the waker CPU running only 1 task. By selecting an idle CPU, it eliminates the chance of waker migrating to a different CPU after the wakee preempts it. This policy is also not susceptible to the incorrect "sync" usage i.e the waker does not goto sleep after waking up the wakee. However LPM exit latency associated with an idle CPU outweigh the above benefits on some targets. When this knob is turned on, the waker CPU is selected if it has only 1 runnable task. ========================= 8. HMP SCHEDULER TRACE POINTS ========================= Loading include/linux/sched/sysctl.h +1 −0 Original line number Diff line number Diff line Loading @@ -64,6 +64,7 @@ extern unsigned int sysctl_sched_pred_alert_freq; extern unsigned int sysctl_sched_freq_aggregate; extern unsigned int sysctl_sched_enable_thread_grouping; extern unsigned int sysctl_sched_freq_aggregate_threshold_pct; extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker; #else /* CONFIG_SCHED_HMP */ Loading kernel/sched/fair.c +25 −8 Original line number Diff line number Diff line Loading @@ -2590,6 +2590,7 @@ static u32 __compute_runnable_contrib(u64 n) #define SBC_FLAG_COST_CSTATE_PREV_CPU_TIE_BREAKER 0x80 #define SBC_FLAG_CSTATE_LOAD 0x100 #define SBC_FLAG_BEST_SIBLING 0x200 #define SBC_FLAG_WAKER_CPU 0x400 /* Cluster selection flag */ #define SBC_FLAG_COLOC_CLUSTER 0x10000 Loading Loading @@ -3060,6 +3061,15 @@ wake_to_waker_cluster(struct cpu_select_env *env) task_load(env->p) < sched_small_wakee_task_load; } static inline bool bias_to_waker_cpu(struct task_struct *p, int cpu) { return sysctl_sched_prefer_sync_wakee_to_waker && cpu_rq(cpu)->nr_running == 1 && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) && cpu_active(cpu) && !cpu_isolated(cpu); } static inline int cluster_allowed(struct task_struct *p, struct sched_cluster *cluster) { Loading @@ -3080,6 +3090,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, struct cluster_cpu_stats stats; struct related_thread_group *grp; unsigned int sbc_flag = 0; int cpu = raw_smp_processor_id(); struct cpu_select_env env = { .p = p, Loading Loading @@ -3111,14 +3122,20 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, else env.rtg = grp; } else { cluster = cpu_rq(smp_processor_id())->cluster; if (wake_to_waker_cluster(&env) && cluster_allowed(p, cluster)) { cluster = cpu_rq(cpu)->cluster; if (wake_to_waker_cluster(&env)) { if (bias_to_waker_cpu(p, cpu)) { target = cpu; sbc_flag = SBC_FLAG_WAKER_CLUSTER | SBC_FLAG_WAKER_CPU; goto out; } else if (cluster_allowed(p, cluster)) { env.need_waker_cluster = 1; bitmap_zero(env.candidate_list, NR_CPUS); __set_bit(cluster->id, env.candidate_list); env.sbc_best_cluster_flag = SBC_FLAG_WAKER_CLUSTER; env.sbc_best_cluster_flag = SBC_FLAG_WAKER_CLUSTER; } } else if (bias_to_prev_cpu(&env, &stats)) { sbc_flag = SBC_FLAG_PREV_CPU; goto out; Loading kernel/sched/hmp.c +7 −0 Original line number Diff line number Diff line Loading @@ -896,6 +896,13 @@ unsigned int __read_mostly sysctl_sched_big_waker_task_load_pct = 25; unsigned int __read_mostly sched_spill_load; unsigned int __read_mostly sysctl_sched_spill_load_pct = 100; /* * Prefer the waker CPU for sync wakee task, if the CPU has only 1 runnable * task. This eliminates the LPM exit latency associated with the idle * CPUs in the waker cluster. */ unsigned int __read_mostly sysctl_sched_prefer_sync_wakee_to_waker; /* * Tasks whose bandwidth consumption on a cpu is more than * sched_upmigrate are considered "big" tasks. Big tasks will be Loading kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -421,6 +421,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, { .procname = "sched_prefer_sync_wakee_to_waker", .data = &sysctl_sched_prefer_sync_wakee_to_waker, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, { .procname = "sched_enable_thread_grouping", .data = &sysctl_sched_enable_thread_grouping, Loading Loading
Documentation/scheduler/sched-hmp.txt +17 −0 Original line number Diff line number Diff line Loading @@ -1220,6 +1220,23 @@ This tunable is a percentage. Configure the minimum demand of big sync waker task. Scheduler places small wakee tasks woken up by big sync waker on the waker's cluster. *** 7.19 sched_prefer_sync_wakee_to_waker Appears at: /proc/sys/kernel/sched_prefer_sync_wakee_to_waker Default value: 0 The default sync wakee policy has a preference to select an idle CPU in the waker cluster compared to the waker CPU running only 1 task. By selecting an idle CPU, it eliminates the chance of waker migrating to a different CPU after the wakee preempts it. This policy is also not susceptible to the incorrect "sync" usage i.e the waker does not goto sleep after waking up the wakee. However LPM exit latency associated with an idle CPU outweigh the above benefits on some targets. When this knob is turned on, the waker CPU is selected if it has only 1 runnable task. ========================= 8. HMP SCHEDULER TRACE POINTS ========================= Loading
include/linux/sched/sysctl.h +1 −0 Original line number Diff line number Diff line Loading @@ -64,6 +64,7 @@ extern unsigned int sysctl_sched_pred_alert_freq; extern unsigned int sysctl_sched_freq_aggregate; extern unsigned int sysctl_sched_enable_thread_grouping; extern unsigned int sysctl_sched_freq_aggregate_threshold_pct; extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker; #else /* CONFIG_SCHED_HMP */ Loading
kernel/sched/fair.c +25 −8 Original line number Diff line number Diff line Loading @@ -2590,6 +2590,7 @@ static u32 __compute_runnable_contrib(u64 n) #define SBC_FLAG_COST_CSTATE_PREV_CPU_TIE_BREAKER 0x80 #define SBC_FLAG_CSTATE_LOAD 0x100 #define SBC_FLAG_BEST_SIBLING 0x200 #define SBC_FLAG_WAKER_CPU 0x400 /* Cluster selection flag */ #define SBC_FLAG_COLOC_CLUSTER 0x10000 Loading Loading @@ -3060,6 +3061,15 @@ wake_to_waker_cluster(struct cpu_select_env *env) task_load(env->p) < sched_small_wakee_task_load; } static inline bool bias_to_waker_cpu(struct task_struct *p, int cpu) { return sysctl_sched_prefer_sync_wakee_to_waker && cpu_rq(cpu)->nr_running == 1 && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) && cpu_active(cpu) && !cpu_isolated(cpu); } static inline int cluster_allowed(struct task_struct *p, struct sched_cluster *cluster) { Loading @@ -3080,6 +3090,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, struct cluster_cpu_stats stats; struct related_thread_group *grp; unsigned int sbc_flag = 0; int cpu = raw_smp_processor_id(); struct cpu_select_env env = { .p = p, Loading Loading @@ -3111,14 +3122,20 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, else env.rtg = grp; } else { cluster = cpu_rq(smp_processor_id())->cluster; if (wake_to_waker_cluster(&env) && cluster_allowed(p, cluster)) { cluster = cpu_rq(cpu)->cluster; if (wake_to_waker_cluster(&env)) { if (bias_to_waker_cpu(p, cpu)) { target = cpu; sbc_flag = SBC_FLAG_WAKER_CLUSTER | SBC_FLAG_WAKER_CPU; goto out; } else if (cluster_allowed(p, cluster)) { env.need_waker_cluster = 1; bitmap_zero(env.candidate_list, NR_CPUS); __set_bit(cluster->id, env.candidate_list); env.sbc_best_cluster_flag = SBC_FLAG_WAKER_CLUSTER; env.sbc_best_cluster_flag = SBC_FLAG_WAKER_CLUSTER; } } else if (bias_to_prev_cpu(&env, &stats)) { sbc_flag = SBC_FLAG_PREV_CPU; goto out; Loading
kernel/sched/hmp.c +7 −0 Original line number Diff line number Diff line Loading @@ -896,6 +896,13 @@ unsigned int __read_mostly sysctl_sched_big_waker_task_load_pct = 25; unsigned int __read_mostly sched_spill_load; unsigned int __read_mostly sysctl_sched_spill_load_pct = 100; /* * Prefer the waker CPU for sync wakee task, if the CPU has only 1 runnable * task. This eliminates the LPM exit latency associated with the idle * CPUs in the waker cluster. */ unsigned int __read_mostly sysctl_sched_prefer_sync_wakee_to_waker; /* * Tasks whose bandwidth consumption on a cpu is more than * sched_upmigrate are considered "big" tasks. Big tasks will be Loading
kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -421,6 +421,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, { .procname = "sched_prefer_sync_wakee_to_waker", .data = &sysctl_sched_prefer_sync_wakee_to_waker, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, { .procname = "sched_enable_thread_grouping", .data = &sysctl_sched_enable_thread_grouping, Loading