Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae71030f authored by Patrick Bellasi's avatar Patrick Bellasi Committed by Dmitry Shmidt
Browse files

ANDROID: sched/tune: add initial support for CGroups based boosting



To support task performance boosting, the usage of a single knob has the
advantage to be a simple solution, both from the implementation and the
usability standpoint.  However, on a real system it can be difficult to
identify a single value for the knob which fits the needs of multiple
different tasks. For example, some kernel threads and/or user-space
background services should be better managed the "standard" way while we
still want to be able to boost the performance of specific workloads.

In order to improve the flexibility of the task boosting mechanism this
patch is the first of a small series which extends the previous
implementation to introduce a "per task group" support.
This first patch introduces just the basic CGroups support, a new
"schedtune" CGroups controller is added which allows to configure
different boost value for different groups of tasks.
To keep the implementation simple but still effective for a boosting
strategy, the new controller:
  1. allows only a two layer hierarchy
  2. supports only a limited number of boost groups

A two layer hierarchy allows to place each task either:
  a) in the root control group
     thus being subject to a system-wide boosting value
  b) in a child of the root group
     thus being subject to the specific boost value defined by that
     "boost group"

The limited number of "boost groups" supported is mainly motivated by
the observation that in a real system it could be useful to have only
few classes of tasks which deserve different treatment.
For example, background vs foreground or interactive vs low-priority.
As an additional benefit, a limited number of boost groups allows also
to have a simpler implementation especially for the code required to
compute the boost value for CPUs which have runnable tasks belonging to
different boost groups.

cc: Tejun Heo <tj@kernel.org>
cc: Li Zefan <lizefan@huawei.com>
cc: Johannes Weiner <hannes@cmpxchg.org>
cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarPatrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: default avatarAndres Oportus <andresoportus@google.com>
parent caa24e4c
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -20,6 +20,10 @@ SUBSYS(cpu)
SUBSYS(cpuacct)
#endif

#if IS_ENABLED(CONFIG_CGROUP_SCHEDTUNE)
SUBSYS(schedtune)
#endif

#if IS_ENABLED(CONFIG_BLK_CGROUP)
SUBSYS(io)
#endif
+76 −0
Original line number Diff line number Diff line
@@ -971,6 +971,82 @@ menuconfig CGROUPS

if CGROUPS

config CGROUP_DEBUG
	bool "Example debug cgroup subsystem"
	default n
	help
	  This option enables a simple cgroup subsystem that
	  exports useful debugging information about the cgroups
	  framework.

	  Say N if unsure.

config CGROUP_FREEZER
	bool "Freezer cgroup subsystem"
	help
	  Provides a way to freeze and unfreeze all tasks in a
	  cgroup.

config CGROUP_PIDS
	bool "PIDs cgroup subsystem"
	help
	  Provides enforcement of process number limits in the scope of a
	  cgroup. Any attempt to fork more processes than is allowed in the
	  cgroup will fail. PIDs are fundamentally a global resource because it
	  is fairly trivial to reach PID exhaustion before you reach even a
	  conservative kmemcg limit. As a result, it is possible to grind a
	  system to halt without being limited by other cgroup policies. The
	  PIDs cgroup subsystem is designed to stop this from happening.

	  It should be noted that organisational operations (such as attaching
	  to a cgroup hierarchy will *not* be blocked by the PIDs subsystem),
	  since the PIDs limit only affects a process's ability to fork, not to
	  attach to a cgroup.

config CGROUP_DEVICE
	bool "Device controller for cgroups"
	help
	  Provides a cgroup implementing whitelists for devices which
	  a process in the cgroup can mknod or open.

config CPUSETS
	bool "Cpuset support"
	help
	  This option will let you create and manage CPUSETs which
	  allow dynamically partitioning a system into sets of CPUs and
	  Memory Nodes and assigning tasks to run only within those sets.
	  This is primarily useful on large SMP or NUMA systems.

	  Say N if unsure.

config PROC_PID_CPUSET
	bool "Include legacy /proc/<pid>/cpuset file"
	depends on CPUSETS
	default y

config CGROUP_CPUACCT
	bool "Simple CPU accounting cgroup subsystem"
	help
	  Provides a simple Resource Controller for monitoring the
	  total CPU consumed by the tasks in a cgroup.

config CGROUP_SCHEDTUNE
	bool "CFS tasks boosting cgroup subsystem (EXPERIMENTAL)"
	depends on SCHED_TUNE
	help
	  This option provides the "schedtune" controller which improves the
	  flexibility of the task boosting mechanism by introducing the support
	  to define "per task" boost values.

	  This new controller:
	  1. allows only a two layers hierarchy, where the root defines the
	     system-wide boost value and its direct childrens define each one a
	     different "class of tasks" to be boosted with a different value
	  2. supports up to 16 different task classes, each one which could be
	     configured with a different boost value

	  Say N if unsure.

config PAGE_COUNTER
       bool

+223 −0
Original line number Diff line number Diff line
#include <linux/cgroup.h>
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/printk.h>
#include <linux/slab.h>

#include "sched.h"

unsigned int sysctl_sched_cfs_boost __read_mostly;

#ifdef CONFIG_CGROUP_SCHEDTUNE

/*
 * EAS scheduler tunables for task groups.
 */

/* SchdTune tunables for a group of tasks */
struct schedtune {
	/* SchedTune CGroup subsystem */
	struct cgroup_subsys_state css;

	/* Boost group allocated ID */
	int idx;

	/* Boost value for tasks on that SchedTune CGroup */
	int boost;

};

static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct schedtune, css) : NULL;
}

static inline struct schedtune *task_schedtune(struct task_struct *tsk)
{
	return css_st(task_css(tsk, schedtune_cgrp_id));
}

static inline struct schedtune *parent_st(struct schedtune *st)
{
	return css_st(st->css.parent);
}

/*
 * SchedTune root control group
 * The root control group is used to defined a system-wide boosting tuning,
 * which is applied to all tasks in the system.
 * Task specific boost tuning could be specified by creating and
 * configuring a child control group under the root one.
 * By default, system-wide boosting is disabled, i.e. no boosting is applied
 * to tasks which are not into a child control group.
 */
static struct schedtune
root_schedtune = {
	.boost	= 0,
};

/*
 * Maximum number of boost groups to support
 * When per-task boosting is used we still allow only limited number of
 * boost groups for two main reasons:
 * 1. on a real system we usually have only few classes of workloads which
 *    make sense to boost with different values (e.g. background vs foreground
 *    tasks, interactive vs low-priority tasks)
 * 2. a limited number allows for a simpler and more memory/time efficient
 *    implementation especially for the computation of the per-CPU boost
 *    value
 */
#define BOOSTGROUPS_COUNT 4

/* Array of configured boostgroups */
static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
	&root_schedtune,
	NULL,
};

/* SchedTune boost groups
 * Keep track of all the boost groups which impact on CPU, for example when a
 * CPU has two RUNNABLE tasks belonging to two different boost groups and thus
 * likely with different boost values.
 * Since on each system we expect only a limited number of boost groups, here
 * we use a simple array to keep track of the metrics required to compute the
 * maximum per-CPU boosting value.
 */
struct boost_groups {
	/* Maximum boost value for all RUNNABLE tasks on a CPU */
	unsigned boost_max;
	struct {
		/* The boost for tasks on that boost group */
		unsigned boost;
		/* Count of RUNNABLE tasks on that boost group */
		unsigned tasks;
	} group[BOOSTGROUPS_COUNT];
};

/* Boost groups affecting each CPU in the system */
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);

static u64
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
	struct schedtune *st = css_st(css);

	return st->boost;
}

static int
boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
	    u64 boost)
{
	struct schedtune *st = css_st(css);

	if (boost < 0 || boost > 100)
		return -EINVAL;

	st->boost = boost;
	if (css == &root_schedtune.css)
		sysctl_sched_cfs_boost = boost;

	return 0;
}

static struct cftype files[] = {
	{
		.name = "boost",
		.read_u64 = boost_read,
		.write_u64 = boost_write,
	},
	{ }	/* terminate */
};

static int
schedtune_boostgroup_init(struct schedtune *st)
{
	/* Keep track of allocated boost groups */
	allocated_group[st->idx] = st;

	return 0;
}

static int
schedtune_init(void)
{
	struct boost_groups *bg;
	int cpu;

	/* Initialize the per CPU boost groups */
	for_each_possible_cpu(cpu) {
		bg = &per_cpu(cpu_boost_groups, cpu);
		memset(bg, 0, sizeof(struct boost_groups));
	}

	pr_info("  schedtune configured to support %d boost groups\n",
		BOOSTGROUPS_COUNT);
	return 0;
}

static struct cgroup_subsys_state *
schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
{
	struct schedtune *st;
	int idx;

	if (!parent_css) {
		schedtune_init();
		return &root_schedtune.css;
	}

	/* Allow only single level hierachies */
	if (parent_css != &root_schedtune.css) {
		pr_err("Nested SchedTune boosting groups not allowed\n");
		return ERR_PTR(-ENOMEM);
	}

	/* Allow only a limited number of boosting groups */
	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
		if (!allocated_group[idx])
			break;
	if (idx == BOOSTGROUPS_COUNT) {
		pr_err("Trying to create more than %d SchedTune boosting groups\n",
		       BOOSTGROUPS_COUNT);
		return ERR_PTR(-ENOSPC);
	}

	st = kzalloc(sizeof(*st), GFP_KERNEL);
	if (!st)
		goto out;

	/* Initialize per CPUs boost group support */
	st->idx = idx;
	if (schedtune_boostgroup_init(st))
		goto release;

	return &st->css;

release:
	kfree(st);
out:
	return ERR_PTR(-ENOMEM);
}

static void
schedtune_boostgroup_release(struct schedtune *st)
{
	/* Keep track of allocated boost groups */
	allocated_group[st->idx] = NULL;
}

static void
schedtune_css_free(struct cgroup_subsys_state *css)
{
	struct schedtune *st = css_st(css);

	schedtune_boostgroup_release(st);
	kfree(st);
}

struct cgroup_subsys schedtune_cgrp_subsys = {
	.css_alloc	= schedtune_css_alloc,
	.css_free	= schedtune_css_free,
	.legacy_cftypes	= files,
	.early_init	= 1,
};

#endif /* CONFIG_CGROUP_SCHEDTUNE */

int
sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
			       void __user *buffer, size_t *lenp,
+4 −0
Original line number Diff line number Diff line
@@ -454,7 +454,11 @@ static struct ctl_table kern_table[] = {
		.procname	= "sched_cfs_boost",
		.data		= &sysctl_sched_cfs_boost,
		.maxlen		= sizeof(sysctl_sched_cfs_boost),
#ifdef CONFIG_CGROUP_SCHEDTUNE
		.mode		= 0444,
#else
		.mode		= 0644,
#endif
		.proc_handler	= &sysctl_sched_cfs_boost_handler,
		.extra1		= &zero,
		.extra2		= &one_hundred,