Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup (22714a2b) · Commits · e / devices / android_kernel_teracube_emerald

Documentation/cgroup-v2.txt

+17 −25

Original line number	Diff line number	Diff line
		@@ -893,10 +893,6 @@ Controllers
		CPU
		---

		.. note::

		The interface for the cpu controller hasn't been merged yet

		The "cpu" controllers regulates distribution of CPU cycles. This
		controller implements weight and absolute bandwidth limit models for
		normal scheduling policy and absolute bandwidth allocation model for
		@@ -910,12 +906,16 @@ All time durations are in microseconds.

		cpu.stat
		A read-only flat-keyed file which exists on non-root cgroups.
		This file exists whether the controller is enabled or not.

		It reports the following six stats:
		It always reports the following three stats:

		- usage_usec
		- user_usec
		- system_usec

		and the following three when the controller is enabled:

		- nr_periods
		- nr_throttled
		- throttled_usec
		@@ -926,6 +926,18 @@ All time durations are in microseconds.

		The weight in the range [1, 10000].

		cpu.weight.nice
		A read-write single value file which exists on non-root
		cgroups. The default is "0".

		The nice value is in the range [-20, 19].

		This interface file is an alternative interface for
		"cpu.weight" and allows reading and setting weight using the
		same values used by nice(2). Because the range is smaller and
		granularity is coarser for the nice values, the read value is
		the closest approximation of the current weight.

		cpu.max
		A read-write two value file which exists on non-root cgroups.
		The default is "max 100000".
		@@ -938,26 +950,6 @@ All time durations are in microseconds.
		$PERIOD duration. "max" for $MAX indicates no limit. If only
		one number is written, $MAX is updated.

		cpu.rt.max
		.. note::

		The semantics of this file is still under discussion and the
		interface hasn't been merged yet

		A read-write two value file which exists on all cgroups.
		The default is "0 100000".

		The maximum realtime runtime allocation. Over-committing
		configurations are disallowed and process migrations are
		rejected if not enough bandwidth is available. It's in the
		following format::

		$MAX $PERIOD

		which indicates that the group may consume upto $MAX in each
		$PERIOD duration. If only one number is written, $MAX is
		updated.


		Memory
		------

MAINTAINERS

+1 −1

Original line number	Diff line number	Diff line
		@@ -3592,7 +3592,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
		S: Maintained
		F: Documentation/cgroup-v1/cpusets.txt
		F: include/linux/cpuset.h
		F: kernel/cpuset.c
		F: kernel/cgroup/cpuset.c

		CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
		M: Johannes Weiner <hannes@cmpxchg.org>

include/linux/cgroup-defs.h

+59 −0

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@
		#include <linux/refcount.h>
		#include <linux/percpu-refcount.h>
		#include <linux/percpu-rwsem.h>
		#include <linux/u64_stats_sync.h>
		#include <linux/workqueue.h>
		#include <linux/bpf-cgroup.h>

		@@ -255,6 +256,57 @@ struct css_set {
		struct rcu_head rcu_head;
		};

		/*
		* cgroup basic resource usage statistics. Accounting is done per-cpu in
		* cgroup_cpu_stat which is then lazily propagated up the hierarchy on
		* reads.
		*
		* When a stat gets updated, the cgroup_cpu_stat and its ancestors are
		* linked into the updated tree. On the following read, propagation only
		* considers and consumes the updated tree. This makes reading O(the
		* number of descendants which have been active since last read) instead of
		* O(the total number of descendants).
		*
		* This is important because there can be a lot of (draining) cgroups which
		* aren't active and stat may be read frequently. The combination can
		* become very expensive. By propagating selectively, increasing reading
		* frequency decreases the cost of each read.
		*/
		struct cgroup_cpu_stat {
		/*
		* ->sync protects all the current counters. These are the only
		* fields which get updated in the hot path.
		*/
		struct u64_stats_sync sync;
		struct task_cputime cputime;

		/*
		* Snapshots at the last reading. These are used to calculate the
		* deltas to propagate to the global counters.
		*/
		struct task_cputime last_cputime;

		/*
		* Child cgroups with stat updates on this cpu since the last read
		* are linked on the parent's ->updated_children through
		* ->updated_next.
		*
		* In addition to being more compact, singly-linked list pointing
		* to the cgroup makes it unnecessary for each per-cpu struct to
		* point back to the associated cgroup.
		*
		* Protected by per-cpu cgroup_cpu_stat_lock.
		*/
		struct cgroup updated_children; / terminated by self cgroup */
		struct cgroup updated_next; / NULL iff not on the list */
		};

		struct cgroup_stat {
		/* per-cpu statistics are collected into the folowing global counters */
		struct task_cputime cputime;
		struct prev_cputime prev_cputime;
		};

		struct cgroup {
		/* self css with NULL ->ss, points back to this cgroup */
		struct cgroup_subsys_state self;
		@@ -354,6 +406,11 @@ struct cgroup {
		*/
		struct cgroup *dom_cgrp;

		/* cgroup basic resource statistics */
		struct cgroup_cpu_stat __percpu *cpu_stat;
		struct cgroup_stat pending_stat; /* pending from children */
		struct cgroup_stat stat;

		/*
		* list of pidlists, up to two for each namespace (one for procs, one
		* for tasks); created on demand.
		@@ -513,6 +570,8 @@ struct cgroup_subsys {
		void (css_released)(struct cgroup_subsys_state css);
		void (css_free)(struct cgroup_subsys_state css);
		void (css_reset)(struct cgroup_subsys_state css);
		int (css_extra_stat_show)(struct seq_file seq,
		struct cgroup_subsys_state *css);

		int (can_attach)(struct cgroup_taskset tset);
		void (cancel_attach)(struct cgroup_taskset tset);

include/linux/cgroup.h

+58 −0

Original line number	Diff line number	Diff line
		@@ -23,6 +23,7 @@
		#include <linux/nsproxy.h>
		#include <linux/user_namespace.h>
		#include <linux/refcount.h>
		#include <linux/kernel_stat.h>

		#include <linux/cgroup-defs.h>

		@@ -689,6 +690,63 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
		char *buf, size_t buflen) {}
		#endif /* !CONFIG_CGROUPS */

		/*
		* Basic resource stats.
		*/
		#ifdef CONFIG_CGROUPS

		#ifdef CONFIG_CGROUP_CPUACCT
		void cpuacct_charge(struct task_struct *tsk, u64 cputime);
		void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
		#else
		static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
		static inline void cpuacct_account_field(struct task_struct *tsk, int index,
		u64 val) {}
		#endif

		void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec);
		void __cgroup_account_cputime_field(struct cgroup *cgrp,
		enum cpu_usage_stat index, u64 delta_exec);

		static inline void cgroup_account_cputime(struct task_struct *task,
		u64 delta_exec)
		{
		struct cgroup *cgrp;

		cpuacct_charge(task, delta_exec);

		rcu_read_lock();
		cgrp = task_dfl_cgroup(task);
		if (cgroup_parent(cgrp))
		__cgroup_account_cputime(cgrp, delta_exec);
		rcu_read_unlock();
		}

		static inline void cgroup_account_cputime_field(struct task_struct *task,
		enum cpu_usage_stat index,
		u64 delta_exec)
		{
		struct cgroup *cgrp;

		cpuacct_account_field(task, index, delta_exec);

		rcu_read_lock();
		cgrp = task_dfl_cgroup(task);
		if (cgroup_parent(cgrp))
		__cgroup_account_cputime_field(cgrp, index, delta_exec);
		rcu_read_unlock();
		}

		#else /* CONFIG_CGROUPS */

		static inline void cgroup_account_cputime(struct task_struct *task,
		u64 delta_exec) {}
		static inline void cgroup_account_cputime_field(struct task_struct *task,
		enum cpu_usage_stat index,
		u64 delta_exec) {}

		#endif /* CONFIG_CGROUPS */

		/*
		* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
		* definition in cgroup-defs.h.

include/linux/sched/cputime.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -54,7 +54,8 @@ static inline void task_cputime_scaled(struct task_struct *t,

		extern void task_cputime_adjusted(struct task_struct p, u64 ut, u64 *st);
		extern void thread_group_cputime_adjusted(struct task_struct p, u64 ut, u64 *st);

		extern void cputime_adjust(struct task_cputime curr, struct prev_cputime prev,
		u64 ut, u64 st);

		/*
		* Thread group CPU time accounting.