Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22714a2b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup updates from Tejun Heo:
 "Cgroup2 cpu controller support is finally merged.

   - Basic cpu statistics support to allow monitoring by default without
     the CPU controller enabled.

   - cgroup2 cpu controller support.

   - /sys/kernel/cgroup files to help dealing with new / optional
     features"

* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: export list of cgroups v2 features using sysfs
  cgroup: export list of delegatable control files using sysfs
  cgroup: mark @cgrp __maybe_unused in cpu_stat_show()
  MAINTAINERS: relocate cpuset.c
  cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat
  sched: Implement interface for cgroup unified hierarchy
  sched: Misc preps for cgroup unified hierarchy interface
  sched/cputime: Add dummy cputime_adjust() implementation for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  cgroup: statically initialize init_css_set->dfl_cgrp
  cgroup: Implement cgroup2 basic CPU usage accounting
  cpuacct: Introduce cgroup_account_cputime[_field]()
  sched/cputime: Expose cputime_adjust()
parents 766ec76a 5f2e6734
Loading
Loading
Loading
Loading
+17 −25
Original line number Diff line number Diff line
@@ -893,10 +893,6 @@ Controllers
CPU
---

.. note::

	The interface for the cpu controller hasn't been merged yet

The "cpu" controllers regulates distribution of CPU cycles.  This
controller implements weight and absolute bandwidth limit models for
normal scheduling policy and absolute bandwidth allocation model for
@@ -910,12 +906,16 @@ All time durations are in microseconds.

  cpu.stat
	A read-only flat-keyed file which exists on non-root cgroups.
	This file exists whether the controller is enabled or not.

	It reports the following six stats:
	It always reports the following three stats:

	- usage_usec
	- user_usec
	- system_usec

	and the following three when the controller is enabled:

	- nr_periods
	- nr_throttled
	- throttled_usec
@@ -926,6 +926,18 @@ All time durations are in microseconds.

	The weight in the range [1, 10000].

  cpu.weight.nice
	A read-write single value file which exists on non-root
	cgroups.  The default is "0".

	The nice value is in the range [-20, 19].

	This interface file is an alternative interface for
	"cpu.weight" and allows reading and setting weight using the
	same values used by nice(2).  Because the range is smaller and
	granularity is coarser for the nice values, the read value is
	the closest approximation of the current weight.

  cpu.max
	A read-write two value file which exists on non-root cgroups.
	The default is "max 100000".
@@ -938,26 +950,6 @@ All time durations are in microseconds.
	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
	one number is written, $MAX is updated.

  cpu.rt.max
	.. note::

	   The semantics of this file is still under discussion and the
	   interface hasn't been merged yet

	A read-write two value file which exists on all cgroups.
	The default is "0 100000".

	The maximum realtime runtime allocation.  Over-committing
	configurations are disallowed and process migrations are
	rejected if not enough bandwidth is available.  It's in the
	following format::

	  $MAX $PERIOD

	which indicates that the group may consume upto $MAX in each
	$PERIOD duration.  If only one number is written, $MAX is
	updated.


Memory
------
+1 −1
Original line number Diff line number Diff line
@@ -3592,7 +3592,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
S:	Maintained
F:	Documentation/cgroup-v1/cpusets.txt
F:	include/linux/cpuset.h
F:	kernel/cpuset.c
F:	kernel/cgroup/cpuset.c

CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
M:	Johannes Weiner <hannes@cmpxchg.org>
+59 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/refcount.h>
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/u64_stats_sync.h>
#include <linux/workqueue.h>
#include <linux/bpf-cgroup.h>

@@ -255,6 +256,57 @@ struct css_set {
	struct rcu_head rcu_head;
};

/*
 * cgroup basic resource usage statistics.  Accounting is done per-cpu in
 * cgroup_cpu_stat which is then lazily propagated up the hierarchy on
 * reads.
 *
 * When a stat gets updated, the cgroup_cpu_stat and its ancestors are
 * linked into the updated tree.  On the following read, propagation only
 * considers and consumes the updated tree.  This makes reading O(the
 * number of descendants which have been active since last read) instead of
 * O(the total number of descendants).
 *
 * This is important because there can be a lot of (draining) cgroups which
 * aren't active and stat may be read frequently.  The combination can
 * become very expensive.  By propagating selectively, increasing reading
 * frequency decreases the cost of each read.
 */
struct cgroup_cpu_stat {
	/*
	 * ->sync protects all the current counters.  These are the only
	 * fields which get updated in the hot path.
	 */
	struct u64_stats_sync sync;
	struct task_cputime cputime;

	/*
	 * Snapshots at the last reading.  These are used to calculate the
	 * deltas to propagate to the global counters.
	 */
	struct task_cputime last_cputime;

	/*
	 * Child cgroups with stat updates on this cpu since the last read
	 * are linked on the parent's ->updated_children through
	 * ->updated_next.
	 *
	 * In addition to being more compact, singly-linked list pointing
	 * to the cgroup makes it unnecessary for each per-cpu struct to
	 * point back to the associated cgroup.
	 *
	 * Protected by per-cpu cgroup_cpu_stat_lock.
	 */
	struct cgroup *updated_children;	/* terminated by self cgroup */
	struct cgroup *updated_next;		/* NULL iff not on the list */
};

struct cgroup_stat {
	/* per-cpu statistics are collected into the folowing global counters */
	struct task_cputime cputime;
	struct prev_cputime prev_cputime;
};

struct cgroup {
	/* self css with NULL ->ss, points back to this cgroup */
	struct cgroup_subsys_state self;
@@ -354,6 +406,11 @@ struct cgroup {
	 */
	struct cgroup *dom_cgrp;

	/* cgroup basic resource statistics */
	struct cgroup_cpu_stat __percpu *cpu_stat;
	struct cgroup_stat pending_stat;	/* pending from children */
	struct cgroup_stat stat;

	/*
	 * list of pidlists, up to two for each namespace (one for procs, one
	 * for tasks); created on demand.
@@ -513,6 +570,8 @@ struct cgroup_subsys {
	void (*css_released)(struct cgroup_subsys_state *css);
	void (*css_free)(struct cgroup_subsys_state *css);
	void (*css_reset)(struct cgroup_subsys_state *css);
	int (*css_extra_stat_show)(struct seq_file *seq,
				   struct cgroup_subsys_state *css);

	int (*can_attach)(struct cgroup_taskset *tset);
	void (*cancel_attach)(struct cgroup_taskset *tset);
+58 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
#include <linux/refcount.h>
#include <linux/kernel_stat.h>

#include <linux/cgroup-defs.h>

@@ -689,6 +690,63 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
	char *buf, size_t buflen) {}
#endif /* !CONFIG_CGROUPS */

/*
 * Basic resource stats.
 */
#ifdef CONFIG_CGROUPS

#ifdef CONFIG_CGROUP_CPUACCT
void cpuacct_charge(struct task_struct *tsk, u64 cputime);
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
#else
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
static inline void cpuacct_account_field(struct task_struct *tsk, int index,
					 u64 val) {}
#endif

void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec);
void __cgroup_account_cputime_field(struct cgroup *cgrp,
				    enum cpu_usage_stat index, u64 delta_exec);

static inline void cgroup_account_cputime(struct task_struct *task,
					  u64 delta_exec)
{
	struct cgroup *cgrp;

	cpuacct_charge(task, delta_exec);

	rcu_read_lock();
	cgrp = task_dfl_cgroup(task);
	if (cgroup_parent(cgrp))
		__cgroup_account_cputime(cgrp, delta_exec);
	rcu_read_unlock();
}

static inline void cgroup_account_cputime_field(struct task_struct *task,
						enum cpu_usage_stat index,
						u64 delta_exec)
{
	struct cgroup *cgrp;

	cpuacct_account_field(task, index, delta_exec);

	rcu_read_lock();
	cgrp = task_dfl_cgroup(task);
	if (cgroup_parent(cgrp))
		__cgroup_account_cputime_field(cgrp, index, delta_exec);
	rcu_read_unlock();
}

#else	/* CONFIG_CGROUPS */

static inline void cgroup_account_cputime(struct task_struct *task,
					  u64 delta_exec) {}
static inline void cgroup_account_cputime_field(struct task_struct *task,
						enum cpu_usage_stat index,
						u64 delta_exec) {}

#endif	/* CONFIG_CGROUPS */

/*
 * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
 * definition in cgroup-defs.h.
+2 −1
Original line number Diff line number Diff line
@@ -54,7 +54,8 @@ static inline void task_cputime_scaled(struct task_struct *t,

extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);

extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
			   u64 *ut, u64 *st);

/*
 * Thread group CPU time accounting.
Loading