Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f075e0f6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup updates from Tejun Heo:
 "The bulk of changes are cleanups and preparations for the upcoming
  kernfs conversion.

   - cgroup_event mechanism which is and will be used only by memcg is
     moved to memcg.

   - pidlist handling is updated so that it can be served by seq_file.

     Also, the list is not sorted if sane_behavior.  cgroup
     documentation explicitly states that the file is not sorted but it
     has been for quite some time.

   - All cgroup file handling now happens on top of seq_file.  This is
     to prepare for kernfs conversion.  In addition, all operations are
     restructured so that they map 1-1 to kernfs operations.

   - Other cleanups and low-pri fixes"

* 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (40 commits)
  cgroup: trivial style updates
  cgroup: remove stray references to css_id
  doc: cgroups: Fix typo in doc/cgroups
  cgroup: fix fail path in cgroup_load_subsys()
  cgroup: fix missing unlock on error in cgroup_load_subsys()
  cgroup: remove for_each_root_subsys()
  cgroup: implement for_each_css()
  cgroup: factor out cgroup_subsys_state creation into create_css()
  cgroup: combine css handling loops in cgroup_create()
  cgroup: reorder operations in cgroup_create()
  cgroup: make for_each_subsys() useable under cgroup_root_mutex
  cgroup: css iterations and css_from_dir() are safe under cgroup_mutex
  cgroup: unify pidlist and other file handling
  cgroup: replace cftype->read_seq_string() with cftype->seq_show()
  cgroup: attach cgroup_open_file to all cgroup files
  cgroup: generalize cgroup_pidlist_open_file
  cgroup: unify read path so that seq_file is always used
  cgroup: unify cgroup_write_X64() and cgroup_write_string()
  cgroup: remove cftype->read(), ->read_map() and ->write()
  hugetlb_cgroup: convert away from cftype->read()
  ...
parents 5cb7398c dd4b0a46
Loading
Loading
Loading
Loading
+0 −20
Original line number Diff line number Diff line
@@ -24,7 +24,6 @@ CONTENTS:
  2.1 Basic Usage
  2.2 Attaching processes
  2.3 Mounting hierarchies by name
  2.4 Notification API
3. Kernel API
  3.1 Overview
  3.2 Synchronization
@@ -472,25 +471,6 @@ you give a subsystem a name.
The name of the subsystem appears as part of the hierarchy description
in /proc/mounts and /proc/<pid>/cgroups.

2.4 Notification API
--------------------

There is mechanism which allows to get notifications about changing
status of a cgroup.

To register a new notification handler you need to:
 - create a file descriptor for event notification using eventfd(2);
 - open a control file to be monitored (e.g. memory.usage_in_bytes);
 - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
   Interpretation of args is defined by control file implementation;

eventfd will be woken up by control file implementation or when the
cgroup is removed.

To unregister a notification handler just close eventfd.

NOTE: Support of notifications should be implemented for the control
file. See documentation for the subsystem.

3. Kernel API
=============
+2 −2
Original line number Diff line number Diff line
@@ -577,7 +577,7 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
per-node page counts including "hierarchical_<counter>" which sums up all
hierarchical children's values in addition to the memcg's own value.

The ouput format of memory.numa_stat is:
The output format of memory.numa_stat is:

total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
@@ -670,7 +670,7 @@ page tables.

8.1 Interface

This feature is disabled by default. It can be enabledi (and disabled again) by
This feature is disabled by default. It can be enabled (and disabled again) by
writing to memory.move_charge_at_immigrate of the destination cgroup.

If you want to enable it:
+2 −2
Original line number Diff line number Diff line
@@ -97,8 +97,8 @@ to work with it.
		(struct res_counter *rc, struct res_counter *top,
		 unsinged long val)

	Almost same as res_cunter_uncharge() but propagation of uncharge
	stops when rc == top. This is useful when kill a res_coutner in
	Almost same as res_counter_uncharge() but propagation of uncharge
	stops when rc == top. This is useful when kill a res_counter in
	child cgroup.

 2.1 Other accounting routines
+15 −20
Original line number Diff line number Diff line
@@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
	return __blkg_prfill_rwstat(sf, pd, &rwstat);
}

static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css,
			       struct cftype *cft, struct seq_file *sf)
static int tg_print_cpu_rwstat(struct seq_file *sf, void *v)
{
	struct blkcg *blkcg = css_to_blkcg(css);

	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
			  cft->private, true);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat,
			  &blkcg_policy_throtl, seq_cft(sf)->private, true);
	return 0;
}

@@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
	return __blkg_prfill_u64(sf, pd, v);
}

static int tg_print_conf_u64(struct cgroup_subsys_state *css,
			     struct cftype *cft, struct seq_file *sf)
static int tg_print_conf_u64(struct seq_file *sf, void *v)
{
	blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64,
			  &blkcg_policy_throtl, cft->private, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
	return 0;
}

static int tg_print_conf_uint(struct cgroup_subsys_state *css,
			      struct cftype *cft, struct seq_file *sf)
static int tg_print_conf_uint(struct seq_file *sf, void *v)
{
	blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint,
			  &blkcg_policy_throtl, cft->private, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
	return 0;
}

@@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = {
	{
		.name = "throttle.read_bps_device",
		.private = offsetof(struct throtl_grp, bps[READ]),
		.read_seq_string = tg_print_conf_u64,
		.seq_show = tg_print_conf_u64,
		.write_string = tg_set_conf_u64,
		.max_write_len = 256,
	},
	{
		.name = "throttle.write_bps_device",
		.private = offsetof(struct throtl_grp, bps[WRITE]),
		.read_seq_string = tg_print_conf_u64,
		.seq_show = tg_print_conf_u64,
		.write_string = tg_set_conf_u64,
		.max_write_len = 256,
	},
	{
		.name = "throttle.read_iops_device",
		.private = offsetof(struct throtl_grp, iops[READ]),
		.read_seq_string = tg_print_conf_uint,
		.seq_show = tg_print_conf_uint,
		.write_string = tg_set_conf_uint,
		.max_write_len = 256,
	},
	{
		.name = "throttle.write_iops_device",
		.private = offsetof(struct throtl_grp, iops[WRITE]),
		.read_seq_string = tg_print_conf_uint,
		.seq_show = tg_print_conf_uint,
		.write_string = tg_set_conf_uint,
		.max_write_len = 256,
	},
	{
		.name = "throttle.io_service_bytes",
		.private = offsetof(struct tg_stats_cpu, service_bytes),
		.read_seq_string = tg_print_cpu_rwstat,
		.seq_show = tg_print_cpu_rwstat,
	},
	{
		.name = "throttle.io_serviced",
		.private = offsetof(struct tg_stats_cpu, serviced),
		.read_seq_string = tg_print_cpu_rwstat,
		.seq_show = tg_print_cpu_rwstat,
	},
	{ }	/* terminate */
};
+58 −73
Original line number Diff line number Diff line
@@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf,
	return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
}

static int cfqg_print_weight_device(struct cgroup_subsys_state *css,
				    struct cftype *cft, struct seq_file *sf)
static int cfqg_print_weight_device(struct seq_file *sf, void *v)
{
	blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device,
			  &blkcg_policy_cfq, 0, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
			  cfqg_prfill_weight_device, &blkcg_policy_cfq,
			  0, false);
	return 0;
}

@@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
	return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
}

static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css,
					 struct cftype *cft,
					 struct seq_file *sf)
static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
{
	blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device,
			  &blkcg_policy_cfq, 0, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
			  cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
			  0, false);
	return 0;
}

static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft,
			    struct seq_file *sf)
static int cfq_print_weight(struct seq_file *sf, void *v)
{
	seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight);
	seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight);
	return 0;
}

static int cfq_print_leaf_weight(struct cgroup_subsys_state *css,
				 struct cftype *cft, struct seq_file *sf)
static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
{
	seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight);
	seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight);
	return 0;
}

@@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
	return __cfq_set_weight(css, cft, val, true);
}

static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft,
			   struct seq_file *sf)
static int cfqg_print_stat(struct seq_file *sf, void *v)
{
	struct blkcg *blkcg = css_to_blkcg(css);

	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
			  cft->private, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
			  &blkcg_policy_cfq, seq_cft(sf)->private, false);
	return 0;
}

static int cfqg_print_rwstat(struct cgroup_subsys_state *css,
			     struct cftype *cft, struct seq_file *sf)
static int cfqg_print_rwstat(struct seq_file *sf, void *v)
{
	struct blkcg *blkcg = css_to_blkcg(css);

	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
			  cft->private, true);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
			  &blkcg_policy_cfq, seq_cft(sf)->private, true);
	return 0;
}

@@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
	return __blkg_prfill_rwstat(sf, pd, &sum);
}

static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css,
				     struct cftype *cft, struct seq_file *sf)
static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
{
	struct blkcg *blkcg = css_to_blkcg(css);

	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive,
			  &blkcg_policy_cfq, cft->private, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
			  cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
			  seq_cft(sf)->private, false);
	return 0;
}

static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css,
				       struct cftype *cft, struct seq_file *sf)
static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
{
	struct blkcg *blkcg = css_to_blkcg(css);

	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive,
			  &blkcg_policy_cfq, cft->private, true);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
			  cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
			  seq_cft(sf)->private, true);
	return 0;
}

@@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
}

/* print avg_queue_size */
static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css,
				     struct cftype *cft, struct seq_file *sf)
static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
{
	struct blkcg *blkcg = css_to_blkcg(css);

	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
			  &blkcg_policy_cfq, 0, false);
	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
			  cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
			  0, false);
	return 0;
}
#endif	/* CONFIG_DEBUG_BLK_CGROUP */
@@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = {
	{
		.name = "weight_device",
		.flags = CFTYPE_ONLY_ON_ROOT,
		.read_seq_string = cfqg_print_leaf_weight_device,
		.seq_show = cfqg_print_leaf_weight_device,
		.write_string = cfqg_set_leaf_weight_device,
		.max_write_len = 256,
	},
	{
		.name = "weight",
		.flags = CFTYPE_ONLY_ON_ROOT,
		.read_seq_string = cfq_print_leaf_weight,
		.seq_show = cfq_print_leaf_weight,
		.write_u64 = cfq_set_leaf_weight,
	},

@@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = {
	{
		.name = "weight_device",
		.flags = CFTYPE_NOT_ON_ROOT,
		.read_seq_string = cfqg_print_weight_device,
		.seq_show = cfqg_print_weight_device,
		.write_string = cfqg_set_weight_device,
		.max_write_len = 256,
	},
	{
		.name = "weight",
		.flags = CFTYPE_NOT_ON_ROOT,
		.read_seq_string = cfq_print_weight,
		.seq_show = cfq_print_weight,
		.write_u64 = cfq_set_weight,
	},

	{
		.name = "leaf_weight_device",
		.read_seq_string = cfqg_print_leaf_weight_device,
		.seq_show = cfqg_print_leaf_weight_device,
		.write_string = cfqg_set_leaf_weight_device,
		.max_write_len = 256,
	},
	{
		.name = "leaf_weight",
		.read_seq_string = cfq_print_leaf_weight,
		.seq_show = cfq_print_leaf_weight,
		.write_u64 = cfq_set_leaf_weight,
	},

@@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = {
	{
		.name = "time",
		.private = offsetof(struct cfq_group, stats.time),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
	{
		.name = "sectors",
		.private = offsetof(struct cfq_group, stats.sectors),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
	{
		.name = "io_service_bytes",
		.private = offsetof(struct cfq_group, stats.service_bytes),
		.read_seq_string = cfqg_print_rwstat,
		.seq_show = cfqg_print_rwstat,
	},
	{
		.name = "io_serviced",
		.private = offsetof(struct cfq_group, stats.serviced),
		.read_seq_string = cfqg_print_rwstat,
		.seq_show = cfqg_print_rwstat,
	},
	{
		.name = "io_service_time",
		.private = offsetof(struct cfq_group, stats.service_time),
		.read_seq_string = cfqg_print_rwstat,
		.seq_show = cfqg_print_rwstat,
	},
	{
		.name = "io_wait_time",
		.private = offsetof(struct cfq_group, stats.wait_time),
		.read_seq_string = cfqg_print_rwstat,
		.seq_show = cfqg_print_rwstat,
	},
	{
		.name = "io_merged",
		.private = offsetof(struct cfq_group, stats.merged),
		.read_seq_string = cfqg_print_rwstat,
		.seq_show = cfqg_print_rwstat,
	},
	{
		.name = "io_queued",
		.private = offsetof(struct cfq_group, stats.queued),
		.read_seq_string = cfqg_print_rwstat,
		.seq_show = cfqg_print_rwstat,
	},

	/* the same statictics which cover the cfqg and its descendants */
	{
		.name = "time_recursive",
		.private = offsetof(struct cfq_group, stats.time),
		.read_seq_string = cfqg_print_stat_recursive,
		.seq_show = cfqg_print_stat_recursive,
	},
	{
		.name = "sectors_recursive",
		.private = offsetof(struct cfq_group, stats.sectors),
		.read_seq_string = cfqg_print_stat_recursive,
		.seq_show = cfqg_print_stat_recursive,
	},
	{
		.name = "io_service_bytes_recursive",
		.private = offsetof(struct cfq_group, stats.service_bytes),
		.read_seq_string = cfqg_print_rwstat_recursive,
		.seq_show = cfqg_print_rwstat_recursive,
	},
	{
		.name = "io_serviced_recursive",
		.private = offsetof(struct cfq_group, stats.serviced),
		.read_seq_string = cfqg_print_rwstat_recursive,
		.seq_show = cfqg_print_rwstat_recursive,
	},
	{
		.name = "io_service_time_recursive",
		.private = offsetof(struct cfq_group, stats.service_time),
		.read_seq_string = cfqg_print_rwstat_recursive,
		.seq_show = cfqg_print_rwstat_recursive,
	},
	{
		.name = "io_wait_time_recursive",
		.private = offsetof(struct cfq_group, stats.wait_time),
		.read_seq_string = cfqg_print_rwstat_recursive,
		.seq_show = cfqg_print_rwstat_recursive,
	},
	{
		.name = "io_merged_recursive",
		.private = offsetof(struct cfq_group, stats.merged),
		.read_seq_string = cfqg_print_rwstat_recursive,
		.seq_show = cfqg_print_rwstat_recursive,
	},
	{
		.name = "io_queued_recursive",
		.private = offsetof(struct cfq_group, stats.queued),
		.read_seq_string = cfqg_print_rwstat_recursive,
		.seq_show = cfqg_print_rwstat_recursive,
	},
#ifdef CONFIG_DEBUG_BLK_CGROUP
	{
		.name = "avg_queue_size",
		.read_seq_string = cfqg_print_avg_queue_size,
		.seq_show = cfqg_print_avg_queue_size,
	},
	{
		.name = "group_wait_time",
		.private = offsetof(struct cfq_group, stats.group_wait_time),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
	{
		.name = "idle_time",
		.private = offsetof(struct cfq_group, stats.idle_time),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
	{
		.name = "empty_time",
		.private = offsetof(struct cfq_group, stats.empty_time),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
	{
		.name = "dequeue",
		.private = offsetof(struct cfq_group, stats.dequeue),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
	{
		.name = "unaccounted_time",
		.private = offsetof(struct cfq_group, stats.unaccounted_time),
		.read_seq_string = cfqg_print_stat,
		.seq_show = cfqg_print_stat,
	},
#endif	/* CONFIG_DEBUG_BLK_CGROUP */
	{ }	/* terminate */
Loading