Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae1bcb88 authored by Johannes Weiner's avatar Johannes Weiner Committed by Patrick Daly
Browse files

psi: cgroup support

On a system that executes multiple cgrouped jobs and independent
workloads, we don't just care about the health of the overall system, but
also that of individual jobs, so that we can ensure individual job health,
fairness between jobs, or prioritize some jobs over others.

This patch implements pressure stall tracking for cgroups.  In kernels
with CONFIG_PSI=y, cgroup2 groups will have cpu.pressure, memory.pressure,
and io.pressure files that track aggregate pressure stall times for only
the tasks inside the cgroup.

Link: http://lkml.kernel.org/r/20180828172258.3185-10-hannes@cmpxchg.org


Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarDaniel Drake <drake@endlessm.com>
Tested-by: default avatarSuren Baghdasaryan <surenb@google.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Change-Id: Ic32dd53343c498a45f8931d8af2ccadce8006947
Git-commit: 2ce7135adc9ad081aa3c49744144376ac74fea60
Git-repo: https://source.codeaurora.org/quic/la/kernel/msm-4.19


Signed-off-by: default avatarPatrick Daly <pdaly@codeaurora.org>
parent 4ab663af
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -62,3 +62,12 @@ well as medium and long term trends. The total absolute stall time is
tracked and exported as well, to allow detection of latency spikes
which wouldn't necessarily make a dent in the time averages, or to
average trends over custom time frames.

Cgroup2 interface
=================

In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
mounted, pressure stall information is also tracked for tasks grouped
into cgroups. Each subdirectory in the cgroupfs mountpoint contains
cpu.pressure, memory.pressure, and io.pressure files; the format is
the same as the /proc/pressure/ files.
+18 −0
Original line number Diff line number Diff line
@@ -966,6 +966,12 @@ All time durations are in microseconds.
	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
	one number is written, $MAX is updated.

  cpu.pressure
	A read-only nested-key file which exists on non-root cgroups.

	Shows pressure stall information for CPU. See
	Documentation/accounting/psi.txt for details.


Memory
------
@@ -1271,6 +1277,12 @@ PAGE_SIZE multiple when read back.
	higher than the limit for an extended period of time.  This
	reduces the impact on the workload and memory management.

  memory.pressure
	A read-only nested-key file which exists on non-root cgroups.

	Shows pressure stall information for memory. See
	Documentation/accounting/psi.txt for details.


Usage Guidelines
~~~~~~~~~~~~~~~~
@@ -1408,6 +1420,12 @@ IO Interface Files

	  8:16 rbps=2097152 wbps=max riops=max wiops=max

  io.pressure
	A read-only nested-key file which exists on non-root cgroups.

	Shows pressure stall information for IO. See
	Documentation/accounting/psi.txt for details.


Writeback
~~~~~~~~~
+4 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/workqueue.h>
#include <linux/bpf-cgroup.h>
#include <linux/psi_types.h>

#ifdef CONFIG_CGROUPS

@@ -436,6 +437,9 @@ struct cgroup {
	/* used to schedule release agent */
	struct work_struct release_agent_work;

	/* used to track pressure stalls */
	struct psi_group psi;

	/* used to store eBPF programs */
	struct cgroup_bpf bpf;

+15 −0
Original line number Diff line number Diff line
@@ -657,6 +657,11 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
	pr_cont_kernfs_path(cgrp->kn);
}

static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
{
	return &cgrp->psi;
}

static inline void cgroup_init_kthreadd(void)
{
	/*
@@ -710,6 +715,16 @@ static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp)
	return NULL;
}

static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
{
	return NULL;
}

static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
{
	return NULL;
}

static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
					       struct cgroup *ancestor)
{
+25 −0
Original line number Diff line number Diff line
@@ -4,6 +4,9 @@
#include <linux/psi_types.h>
#include <linux/sched.h>

struct seq_file;
struct css_set;

#ifdef CONFIG_PSI

extern bool psi_disabled;
@@ -16,6 +19,14 @@ void psi_memstall_tick(struct task_struct *task, int cpu);
void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);

int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);

#ifdef CONFIG_CGROUPS
int psi_cgroup_alloc(struct cgroup *cgrp);
void psi_cgroup_free(struct cgroup *cgrp);
void cgroup_move_task(struct task_struct *p, struct css_set *to);
#endif

#else /* CONFIG_PSI */

static inline void psi_init(void) {}
@@ -23,6 +34,20 @@ static inline void psi_init(void) {}
static inline void psi_memstall_enter(unsigned long *flags) {}
static inline void psi_memstall_leave(unsigned long *flags) {}

#ifdef CONFIG_CGROUPS
static inline int psi_cgroup_alloc(struct cgroup *cgrp)
{
	return 0;
}
static inline void psi_cgroup_free(struct cgroup *cgrp)
{
}
static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
{
	rcu_assign_pointer(p->cgroups, to);
}
#endif

#endif /* CONFIG_PSI */

#endif /* _LINUX_PSI_H */
Loading