Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca78f6ba authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq

* master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq:
  Move workqueue exports to where the functions are defined.
  [CPUFREQ] Misc cleanups in ondemand.
  [CPUFREQ] Make ondemand sampling per CPU and remove the mutex usage in sampling path.
  [CPUFREQ] Add queue_delayed_work_on() interface for workqueues.
  [CPUFREQ] Remove slowdown from ondemand sampling path.
parents 7ad7153b ae90dd5d
Loading
Loading
Loading
Loading
+91 −169
Original line number Diff line number Diff line
@@ -12,22 +12,11 @@

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/ctype.h>
#include <linux/cpufreq.h>
#include <linux/sysctl.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/kmod.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/mutex.h>

/*
@@ -56,16 +45,15 @@ static unsigned int def_sampling_rate;
#define MIN_SAMPLING_RATE			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
#define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
#define DEF_SAMPLING_DOWN_FACTOR		(1)
#define MAX_SAMPLING_DOWN_FACTOR		(10)
#define TRANSITION_LATENCY_LIMIT		(10 * 1000)

static void do_dbs_timer(void *data);

struct cpu_dbs_info_s {
	cputime64_t prev_cpu_idle;
	cputime64_t prev_cpu_wall;
	struct cpufreq_policy *cur_policy;
	unsigned int prev_cpu_idle_up;
	unsigned int prev_cpu_idle_down;
 	struct work_struct work;
	unsigned int enable;
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
@@ -81,30 +69,31 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
 * is recursive for the same process. -Venki
 */
static DEFINE_MUTEX(dbs_mutex);
static DECLARE_WORK	(dbs_work, do_dbs_timer, NULL);

static struct workqueue_struct *dbs_workq;
static struct workqueue_struct	*kondemand_wq;

struct dbs_tuners {
	unsigned int sampling_rate;
	unsigned int sampling_down_factor;
	unsigned int up_threshold;
	unsigned int ignore_nice;
};

static struct dbs_tuners dbs_tuners_ins = {
	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
	.ignore_nice = 0,
};

static inline unsigned int get_cpu_idle_time(unsigned int cpu)
static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
{
	return	kstat_cpu(cpu).cpustat.idle +
		kstat_cpu(cpu).cpustat.iowait +
		( dbs_tuners_ins.ignore_nice ?
		  kstat_cpu(cpu).cpustat.nice :
		  0);
	cputime64_t retval;

	retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
			kstat_cpu(cpu).cpustat.iowait);

	if (dbs_tuners_ins.ignore_nice)
		retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);

	return retval;
}

/************************** sysfs interface ************************/
@@ -133,29 +122,9 @@ static ssize_t show_##file_name \
	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
}
show_one(sampling_rate, sampling_rate);
show_one(sampling_down_factor, sampling_down_factor);
show_one(up_threshold, up_threshold);
show_one(ignore_nice_load, ignore_nice);

static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
	ret = sscanf (buf, "%u", &input);
	if (ret != 1 )
		return -EINVAL;

	if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
		return -EINVAL;

	mutex_lock(&dbs_mutex);
	dbs_tuners_ins.sampling_down_factor = input;
	mutex_unlock(&dbs_mutex);

	return count;
}

static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
		const char *buf, size_t count)
{
@@ -217,12 +186,12 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
	}
	dbs_tuners_ins.ignore_nice = input;

	/* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
	/* we need to re-evaluate prev_cpu_idle */
	for_each_online_cpu(j) {
		struct cpu_dbs_info_s *j_dbs_info;
		j_dbs_info = &per_cpu(cpu_dbs_info, j);
		j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
		j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
		struct cpu_dbs_info_s *dbs_info;
		dbs_info = &per_cpu(cpu_dbs_info, j);
		dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
		dbs_info->prev_cpu_wall = get_jiffies_64();
	}
	mutex_unlock(&dbs_mutex);

@@ -234,7 +203,6 @@ static struct freq_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)

define_one_rw(sampling_rate);
define_one_rw(sampling_down_factor);
define_one_rw(up_threshold);
define_one_rw(ignore_nice_load);

@@ -242,7 +210,6 @@ static struct attribute * dbs_attributes[] = {
	&sampling_rate_max.attr,
	&sampling_rate_min.attr,
	&sampling_rate.attr,
	&sampling_down_factor.attr,
	&up_threshold.attr,
	&ignore_nice_load.attr,
	NULL
@@ -255,26 +222,27 @@ static struct attribute_group dbs_attr_group = {

/************************** sysfs end ************************/

static void dbs_check_cpu(int cpu)
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
{
	unsigned int idle_ticks, up_idle_ticks, total_ticks;
	unsigned int freq_next;
	unsigned int freq_down_sampling_rate;
	static int down_skip[NR_CPUS];
	struct cpu_dbs_info_s *this_dbs_info;
	unsigned int idle_ticks, total_ticks;
	unsigned int load;
	cputime64_t cur_jiffies;

	struct cpufreq_policy *policy;
	unsigned int j;

	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
	if (!this_dbs_info->enable)
		return;

	policy = this_dbs_info->cur_policy;
	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
	total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
			this_dbs_info->prev_cpu_wall);
	this_dbs_info->prev_cpu_wall = cur_jiffies;
	/*
	 * Every sampling_rate, we check, if current idle time is less
	 * than 20% (default), then we try to increase frequency
	 * Every sampling_rate*sampling_down_factor, we look for a the lowest
	 * Every sampling_rate, we look for a the lowest
	 * frequency which can sustain the load while keeping idle time over
	 * 30%. If such a frequency exist, we try to decrease to this frequency.
	 *
@@ -283,36 +251,26 @@ static void dbs_check_cpu(int cpu)
	 * 5% (default) of current frequency
	 */

	/* Check for frequency increase */
	/* Get Idle Time */
	idle_ticks = UINT_MAX;
	for_each_cpu_mask(j, policy->cpus) {
		unsigned int tmp_idle_ticks, total_idle_ticks;
		cputime64_t total_idle_ticks;
		unsigned int tmp_idle_ticks;
		struct cpu_dbs_info_s *j_dbs_info;

		j_dbs_info = &per_cpu(cpu_dbs_info, j);
		total_idle_ticks = get_cpu_idle_time(j);
		tmp_idle_ticks = total_idle_ticks -
			j_dbs_info->prev_cpu_idle_up;
		j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
		tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
				j_dbs_info->prev_cpu_idle);
		j_dbs_info->prev_cpu_idle = total_idle_ticks;

		if (tmp_idle_ticks < idle_ticks)
			idle_ticks = tmp_idle_ticks;
	}
	load = (100 * (total_ticks - idle_ticks)) / total_ticks;

	/* Scale idle ticks by 100 and compare with up and down ticks */
	idle_ticks *= 100;
	up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
			usecs_to_jiffies(dbs_tuners_ins.sampling_rate);

	if (idle_ticks < up_idle_ticks) {
		down_skip[cpu] = 0;
		for_each_cpu_mask(j, policy->cpus) {
			struct cpu_dbs_info_s *j_dbs_info;

			j_dbs_info = &per_cpu(cpu_dbs_info, j);
			j_dbs_info->prev_cpu_idle_down =
					j_dbs_info->prev_cpu_idle_up;
		}
	/* Check for frequency increase */
	if (load > dbs_tuners_ins.up_threshold) {
		/* if we are already at full speed then break out early */
		if (policy->cur == policy->max)
			return;
@@ -323,83 +281,49 @@ static void dbs_check_cpu(int cpu)
	}

	/* Check for frequency decrease */
	down_skip[cpu]++;
	if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
		return;

	idle_ticks = UINT_MAX;
	for_each_cpu_mask(j, policy->cpus) {
		unsigned int tmp_idle_ticks, total_idle_ticks;
		struct cpu_dbs_info_s *j_dbs_info;

		j_dbs_info = &per_cpu(cpu_dbs_info, j);
		/* Check for frequency decrease */
		total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
		tmp_idle_ticks = total_idle_ticks -
			j_dbs_info->prev_cpu_idle_down;
		j_dbs_info->prev_cpu_idle_down = total_idle_ticks;

		if (tmp_idle_ticks < idle_ticks)
			idle_ticks = tmp_idle_ticks;
	}

	down_skip[cpu] = 0;
	/* if we cannot reduce the frequency anymore, break out early */
	if (policy->cur == policy->min)
		return;

	/* Compute how many ticks there are between two measurements */
	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
		dbs_tuners_ins.sampling_down_factor;
	total_ticks = usecs_to_jiffies(freq_down_sampling_rate);

	/*
	 * The optimal frequency is the frequency that is the lowest that
	 * can support the current CPU usage without triggering the up
	 * policy. To be safe, we focus 10 points under the threshold.
	 */
	freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks;
	freq_next = (freq_next * policy->cur) /
	if (load < (dbs_tuners_ins.up_threshold - 10)) {
		unsigned int freq_next;
		freq_next = (policy->cur * load) /
			(dbs_tuners_ins.up_threshold - 10);

	if (freq_next < policy->min)
		freq_next = policy->min;

	if (freq_next <= ((policy->cur * 95) / 100))
		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
	}
}

static void do_dbs_timer(void *data)
{
	int i;
	lock_cpu_hotplug();
	mutex_lock(&dbs_mutex);
	for_each_online_cpu(i)
		dbs_check_cpu(i);
	queue_delayed_work(dbs_workq, &dbs_work,
	unsigned int cpu = smp_processor_id();
	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);

	dbs_check_cpu(dbs_info);
	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
	mutex_unlock(&dbs_mutex);
	unlock_cpu_hotplug();
}

static inline void dbs_timer_init(void)
static inline void dbs_timer_init(unsigned int cpu)
{
	INIT_WORK(&dbs_work, do_dbs_timer, NULL);
	if (!dbs_workq)
		dbs_workq = create_singlethread_workqueue("ondemand");
	if (!dbs_workq) {
		printk(KERN_ERR "ondemand: Cannot initialize kernel thread\n");
		return;
	}
	queue_delayed_work(dbs_workq, &dbs_work,
	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);

	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
	return;
}

static inline void dbs_timer_exit(void)
static inline void dbs_timer_exit(unsigned int cpu)
{
	if (dbs_workq)
		cancel_rearming_delayed_workqueue(dbs_workq, &dbs_work);
	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);

	cancel_rearming_delayed_workqueue(kondemand_wq, &dbs_info->work);
}

static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
@@ -413,8 +337,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,

	switch (event) {
	case CPUFREQ_GOV_START:
		if ((!cpu_online(cpu)) ||
		    (!policy->cur))
		if ((!cpu_online(cpu)) || (!policy->cur))
			return -EINVAL;

		if (policy->cpuinfo.transition_latency >
@@ -427,18 +350,26 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
			break;

		mutex_lock(&dbs_mutex);
		dbs_enable++;
		if (dbs_enable == 1) {
			kondemand_wq = create_workqueue("kondemand");
			if (!kondemand_wq) {
				printk(KERN_ERR "Creation of kondemand failed\n");
				dbs_enable--;
				mutex_unlock(&dbs_mutex);
				return -ENOSPC;
			}
		}
		for_each_cpu_mask(j, policy->cpus) {
			struct cpu_dbs_info_s *j_dbs_info;
			j_dbs_info = &per_cpu(cpu_dbs_info, j);
			j_dbs_info->cur_policy = policy;

			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
			j_dbs_info->prev_cpu_idle_down
				= j_dbs_info->prev_cpu_idle_up;
			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
			j_dbs_info->prev_cpu_wall = get_jiffies_64();
		}
		this_dbs_info->enable = 1;
		sysfs_create_group(&policy->kobj, &dbs_attr_group);
		dbs_enable++;
		/*
		 * Start the timerschedule work, when this governor
		 * is used for first time
@@ -457,23 +388,20 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
				def_sampling_rate = MIN_STAT_SAMPLING_RATE;

			dbs_tuners_ins.sampling_rate = def_sampling_rate;
			dbs_timer_init();
		}
		dbs_timer_init(policy->cpu);

		mutex_unlock(&dbs_mutex);
		break;

	case CPUFREQ_GOV_STOP:
		mutex_lock(&dbs_mutex);
		dbs_timer_exit(policy->cpu);
		this_dbs_info->enable = 0;
		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
		dbs_enable--;
		/*
		 * Stop the timerschedule work, when this governor
		 * is used for first time
		 */
		if (dbs_enable == 0)
			dbs_timer_exit();
			destroy_workqueue(kondemand_wq);

		mutex_unlock(&dbs_mutex);

@@ -483,13 +411,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
		lock_cpu_hotplug();
		mutex_lock(&dbs_mutex);
		if (policy->max < this_dbs_info->cur_policy->cur)
			__cpufreq_driver_target(
					this_dbs_info->cur_policy,
					policy->max, CPUFREQ_RELATION_H);
			__cpufreq_driver_target(this_dbs_info->cur_policy,
			                        policy->max,
			                        CPUFREQ_RELATION_H);
		else if (policy->min > this_dbs_info->cur_policy->cur)
			__cpufreq_driver_target(
					this_dbs_info->cur_policy,
					policy->min, CPUFREQ_RELATION_L);
			__cpufreq_driver_target(this_dbs_info->cur_policy,
			                        policy->min,
			                        CPUFREQ_RELATION_L);
		mutex_unlock(&dbs_mutex);
		unlock_cpu_hotplug();
		break;
@@ -510,18 +438,12 @@ static int __init cpufreq_gov_dbs_init(void)

static void __exit cpufreq_gov_dbs_exit(void)
{
	/* Make sure that the scheduled work is indeed not running.
	   Assumes the timer has been cancelled first. */
	if (dbs_workq) {
		flush_workqueue(dbs_workq);
		destroy_workqueue(dbs_workq);
	}

	cpufreq_unregister_governor(&cpufreq_gov_dbs);
}


MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
                   "Low Latency Frequency Transition capable processors");
MODULE_LICENSE("GPL");
+2 −0
Original line number Diff line number Diff line
@@ -24,7 +24,9 @@ typedef u64 cputime64_t;

#define cputime64_zero (0ULL)
#define cputime64_add(__a, __b)		((__a) + (__b))
#define cputime64_sub(__a, __b)		((__a) - (__b))
#define cputime64_to_jiffies64(__ct)	(__ct)
#define jiffies64_to_cputime64(__jif)	(__jif)
#define cputime_to_cputime64(__ct)	((u64) __ct)


+2 −0
Original line number Diff line number Diff line
@@ -63,6 +63,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq);

extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
	struct work_struct *work, unsigned long delay);
extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));

extern int FASTCALL(schedule_work(struct work_struct *work));
+32 −25
Original line number Diff line number Diff line
@@ -114,6 +114,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
	put_cpu();
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work);

static void delayed_work_timer_fn(unsigned long __data)
{
@@ -147,6 +148,29 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_delayed_work);

int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
			struct work_struct *work, unsigned long delay)
{
	int ret = 0;
	struct timer_list *timer = &work->timer;

	if (!test_and_set_bit(0, &work->pending)) {
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

		/* This stores wq for the moment, for the timer_fn */
		work->wq_data = wq;
		timer->expires = jiffies + delay;
		timer->data = (unsigned long)work;
		timer->function = delayed_work_timer_fn;
		add_timer_on(timer, cpu);
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);

static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
@@ -281,6 +305,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
		unlock_cpu_hotplug();
	}
}
EXPORT_SYMBOL_GPL(flush_workqueue);

static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
						   int cpu)
@@ -358,6 +383,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
	}
	return wq;
}
EXPORT_SYMBOL_GPL(__create_workqueue);

static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
{
@@ -395,6 +421,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
	free_percpu(wq->cpu_wq);
	kfree(wq);
}
EXPORT_SYMBOL_GPL(destroy_workqueue);

static struct workqueue_struct *keventd_wq;

@@ -402,31 +429,20 @@ int fastcall schedule_work(struct work_struct *work)
{
	return queue_work(keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work);

int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
{
	return queue_delayed_work(keventd_wq, work, delay);
}
EXPORT_SYMBOL(schedule_delayed_work);

int schedule_delayed_work_on(int cpu,
			struct work_struct *work, unsigned long delay)
{
	int ret = 0;
	struct timer_list *timer = &work->timer;

	if (!test_and_set_bit(0, &work->pending)) {
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));
		/* This stores keventd_wq for the moment, for the timer_fn */
		work->wq_data = keventd_wq;
		timer->expires = jiffies + delay;
		timer->data = (unsigned long)work;
		timer->function = delayed_work_timer_fn;
		add_timer_on(timer, cpu);
		ret = 1;
	}
	return ret;
	return queue_delayed_work_on(cpu, keventd_wq, work, delay);
}
EXPORT_SYMBOL(schedule_delayed_work_on);

/**
 * schedule_on_each_cpu - call a function on each online CPU from keventd
@@ -463,6 +479,7 @@ void flush_scheduled_work(void)
{
	flush_workqueue(keventd_wq);
}
EXPORT_SYMBOL(flush_scheduled_work);

/**
 * cancel_rearming_delayed_workqueue - reliably kill off a delayed
@@ -619,13 +636,3 @@ void init_workqueues(void)
	BUG_ON(!keventd_wq);
}
EXPORT_SYMBOL_GPL(__create_workqueue);
EXPORT_SYMBOL_GPL(queue_work);
EXPORT_SYMBOL_GPL(queue_delayed_work);
EXPORT_SYMBOL_GPL(flush_workqueue);
EXPORT_SYMBOL_GPL(destroy_workqueue);

EXPORT_SYMBOL(schedule_work);
EXPORT_SYMBOL(schedule_delayed_work);
EXPORT_SYMBOL(schedule_delayed_work_on);
EXPORT_SYMBOL(flush_scheduled_work);