Merge "sched: Add cgroup-based criteria for upmigration" (d22f4f5f) · Commits · e / devices / android_kernel_sony_msm8994

Documentation/scheduler/sched-hmp.txt

+25 −2

Original line number	Diff line number	Diff line
		@@ -557,8 +557,13 @@ both tasks and CPUs to aid in the placement of tasks.

		/proc/sys/kernel/sched_upmigrate

		This value is a percentage. If a task consumes more than this much of
		a particular CPU, that CPU will be considered too small for the task.
		This value is a percentage. If a task consumes more than this much of a
		particular CPU, that CPU will be considered too small for the task. The task
		will thus be seen as a "big" task on the cpu and will reflect in nr_big_tasks
		statistics maintained for that cpu. Note that certain tasks (whose nice
		value exceeds sched_upmigrate_min_nice value or those that belong to a cgroup
		whose upmigrate_discourage flag is set) will never be classified as big tasks
		despite their high demand.

		- mostly_idle

		@@ -1096,6 +1101,8 @@ A task whose nice value is greater than this tunable value will never
		be considered as a "big" task (it will not be allowed to run on a
		high-performance CPU).

		See also notes on 'cpu.upmigrate_discourage' tunable.

		*** 7.10 sched_enable_power_aware

		Appears at: /proc/sys/kernel/sched_enable_power_aware
		@@ -1284,6 +1291,22 @@ account of energy awareness reasons.
		The same logic also applies to the load balancer path to avoid frequent
		migrations due to energy awareness.

		** 7.25 cpu.upmigrate_discourage

		Default value : 0

		This is a cgroup attribute supported by the cpu resource controller. It normally
		appears at [root_cpu]/[name1]/../[name2]/cpu.upmigrate_discourage. Here
		"root_cpu" is the mount point for cgroup (cpu resource control) filesystem
		and name1, name2 etc are names of cgroups that form a hierarchy.

		Setting this flag to 1 discourages upmigration for all tasks of a cgroup. High
		demand tasks of such a cgroup will never be classified as big tasks and hence
		not upmigrated. Any task of the cgroup is allowed to upmigrate only under
		overcommitted scenario. See notes on sched_spill_nr_run and sched_spill_load for
		how overcommitment threshold is defined and also notes on
		'sched_upmigrate_min_nice' tunable.

		=========================
		8. HMP SCHEDULER TRACE POINTS
		=========================

kernel/sched/core.c

+46 −0

Original line number	Diff line number	Diff line
		@@ -9813,6 +9813,45 @@ static int cpu_notify_on_migrate_write_u64(struct cgroup *cgrp,
		return 0;
		}

		#ifdef CONFIG_SCHED_HMP

		static u64 cpu_upmigrate_discourage_read_u64(struct cgroup *cgrp,
		struct cftype *cft)
		{
		struct task_group *tg = cgroup_tg(cgrp);

		return tg->upmigrate_discouraged;
		}

		static int cpu_upmigrate_discourage_write_u64(struct cgroup *cgrp,
		struct cftype *cft, u64 upmigrate_discourage)
		{
		struct task_group *tg = cgroup_tg(cgrp);
		int discourage = upmigrate_discourage > 0;

		if (tg->upmigrate_discouraged == discourage)
		return 0;

		/*
		* Revisit big-task classification for tasks of this cgroup. It would
		* have been efficient to walk tasks of just this cgroup in running
		* state, but we don't have easy means to do that. Walk all tasks in
		* running state on all cpus instead and re-visit their big task
		* classification.
		*/
		get_online_cpus();
		pre_big_small_task_count_change(cpu_online_mask);

		tg->upmigrate_discouraged = discourage;

		post_big_small_task_count_change(cpu_online_mask);
		put_online_cpus();

		return 0;
		}

		#endif /* CONFIG_SCHED_HMP */

		#ifdef CONFIG_FAIR_GROUP_SCHED
		static int cpu_shares_write_u64(struct cgroup cgrp, struct cftype cftype,
		u64 shareval)
		@@ -10096,6 +10135,13 @@ static struct cftype cpu_files[] = {
		.read_u64 = cpu_notify_on_migrate_read_u64,
		.write_u64 = cpu_notify_on_migrate_write_u64,
		},
		#ifdef CONFIG_SCHED_HMP
		{
		.name = "upmigrate_discourage",
		.read_u64 = cpu_upmigrate_discourage_read_u64,
		.write_u64 = cpu_upmigrate_discourage_write_u64,
		},
		#endif
		#ifdef CONFIG_FAIR_GROUP_SCHED
		{
		.name = "shares",

kernel/sched/fair.c

+20 −6

Original line number	Diff line number	Diff line
		@@ -1499,14 +1499,29 @@ u64 scale_load_to_cpu(u64 task_load, int cpu)
		return task_load;
		}

		#ifdef CONFIG_CGROUP_SCHED

		static inline int upmigrate_discouraged(struct task_struct *p)
		{
		return task_group(p)->upmigrate_discouraged;
		}

		#else

		static inline int upmigrate_discouraged(struct task_struct *p)
		{
		return 0;
		}

		#endif

		/* Is a task "big" on its current cpu */
		static inline int is_big_task(struct task_struct *p)
		{
		u64 load = task_load(p);
		int nice = TASK_NICE(p);

		/* Todo: Provide cgroup-based control as well? */
		if (nice > sched_upmigrate_min_nice)
		if (nice > sched_upmigrate_min_nice \|\| upmigrate_discouraged(p))
		return 0;

		load = scale_load_to_cpu(load, task_cpu(p));
		@@ -1693,8 +1708,7 @@ static int task_will_fit(struct task_struct *p, int cpu)
		if (rq->capacity > prev_rq->capacity)
		return 1;
		} else {
		/* Todo: Provide cgroup-based control as well? */
		if (nice > sched_upmigrate_min_nice)
		if (nice > sched_upmigrate_min_nice \|\| upmigrate_discouraged(p))
		return 1;

		load = scale_load_to_cpu(task_load(p), cpu);
		@@ -2671,8 +2685,8 @@ static inline int migration_needed(struct rq rq, struct task_struct p)
		if (is_small_task(p))
		return 0;

		/* Todo: cgroup-based control? */
		if (nice > sched_upmigrate_min_nice && rq->capacity > min_capacity)
		if ((nice > sched_upmigrate_min_nice \|\| upmigrate_discouraged(p)) &&
		rq->capacity > min_capacity)
		return MOVE_TO_LITTLE_CPU;

		if (!task_will_fit(p, cpu_of(rq)))

kernel/sched/sched.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -137,6 +137,9 @@ struct task_group {
		struct cgroup_subsys_state css;

		bool notify_on_migrate;
		#ifdef CONFIG_SCHED_HMP
		bool upmigrate_discouraged;
		#endif

		#ifdef CONFIG_FAIR_GROUP_SCHED
		/* schedulable entities of this group on each cpu */