mm: /proc/sys/vm/stat_refresh to force vmstat update (52b6f46b) · Commits · e / devices / android_kernel_fairphone_FP4

Documentation/sysctl/vm.txt

+14 −0

Original line number	Diff line number	Diff line
		@@ -57,6 +57,7 @@ Currently, these files are in /proc/sys/vm:
		- panic_on_oom
		- percpu_pagelist_fraction
		- stat_interval
		- stat_refresh
		- swappiness
		- user_reserve_kbytes
		- vfs_cache_pressure
		@@ -755,6 +756,19 @@ is 1 second.

		==============================================================

		stat_refresh

		Any read or write (by root only) flushes all the per-cpu vm statistics
		into their global totals, for more accurate reports when testing
		e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo

		As a side-effect, it also checks for negative totals (elsewhere reported
		as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
		(At time of writing, a few stats are known sometimes to be found negative,
		with no ill effects: errors and warnings on these stats are suppressed.)

		==============================================================

		swappiness

		This control is used to define how aggressive the kernel will swap

include/linux/vmstat.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -193,6 +193,10 @@ void quiet_vmstat(void);
		void cpu_vm_stats_fold(int cpu);
		void refresh_zone_stat_thresholds(void);

		struct ctl_table;
		int vmstat_refresh(struct ctl_table *, int write,
		void __user buffer, size_t lenp, loff_t *ppos);

		void drain_zonestat(struct zone zone, struct per_cpu_pageset );

		int calculate_pressure_threshold(struct zone *zone);

kernel/sysctl.c

+7 −0

Original line number	Diff line number	Diff line
		@@ -1521,6 +1521,13 @@ static struct ctl_table vm_table[] = {
		.mode = 0644,
		.proc_handler = proc_dointvec_jiffies,
		},
		{
		.procname = "stat_refresh",
		.data = NULL,
		.maxlen = 0,
		.mode = 0600,
		.proc_handler = vmstat_refresh,
		},
		#endif
		#ifdef CONFIG_MMU
		{

mm/vmstat.c

+60 −0

Original line number	Diff line number	Diff line
		@@ -1379,6 +1379,66 @@ static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
		int sysctl_stat_interval __read_mostly = HZ;
		static cpumask_var_t cpu_stat_off;

		#ifdef CONFIG_PROC_FS
		static void refresh_vm_stats(struct work_struct *work)
		{
		refresh_cpu_vm_stats(true);
		}

		int vmstat_refresh(struct ctl_table *table, int write,
		void __user buffer, size_t lenp, loff_t *ppos)
		{
		long val;
		int err;
		int i;

		/*
		* The regular update, every sysctl_stat_interval, may come later
		* than expected: leaving a significant amount in per_cpu buckets.
		* This is particularly misleading when checking a quantity of HUGE
		* pages, immediately after running a test. /proc/sys/vm/stat_refresh,
		* which can equally be echo'ed to or cat'ted from (by root),
		* can be used to update the stats just before reading them.
		*
		* Oh, and since global_page_state() etc. are so careful to hide
		* transiently negative values, report an error here if any of
		* the stats is negative, so we know to go looking for imbalance.
		*/
		err = schedule_on_each_cpu(refresh_vm_stats);
		if (err)
		return err;
		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
		val = atomic_long_read(&vm_stat[i]);
		if (val < 0) {
		switch (i) {
		case NR_ALLOC_BATCH:
		case NR_PAGES_SCANNED:
		/*
		* These are often seen to go negative in
		* recent kernels, but not to go permanently
		* negative. Whilst it would be nicer not to
		* have exceptions, rooting them out would be
		* another task, of rather low priority.
		*/
		break;
		default:
		pr_warn("%s: %s %ld\n",
		__func__, vmstat_text[i], val);
		err = -EINVAL;
		break;
		}
		}
		}
		if (err)
		return err;
		if (write)
		ppos += lenp;
		else
		*lenp = 0;
		return 0;
		}
		#endif /* CONFIG_PROC_FS */

		static void vmstat_update(struct work_struct *w)
		{
		if (refresh_cpu_vm_stats(true)) {