Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0199114c authored by Fenghua Yu's avatar Fenghua Yu Committed by H. Peter Anvin
Browse files

x86, hwmon: Package Level Thermal/Power: power limit



Power limit notification feature is published in Intel 64 and IA-32
Architectures SDMV Vol 3A 14.5.6 Power Limit Notification.

It is implemented first on Intel Sandy Bridge platform.

The patch handles notification interrupt. Interrupt handler dumps power limit
information in log_buf, logs the event in mce log, and increases the event
counters (core_power_limit and package_power_limit). Upper level applications
could use the data to detect system health or diagnose functionality/performance
issues.

In the future, the event could be handled in a more fancy way.

Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
LKML-Reference: <1280448826-12004-5-git-send-email-fenghua.yu@intel.com>
Reviewed-by: default avatarLen Brown <len.brown@intel.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 55d435a2
Loading
Loading
Loading
Loading
+129 −54
Original line number Original line Diff line number Diff line
@@ -34,20 +34,25 @@
/* How long to wait between reporting thermal events */
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL		(300 * HZ)
#define CHECK_INTERVAL		(300 * HZ)


#define THERMAL_THROTTLING_EVENT	0
#define POWER_LIMIT_EVENT		1

/*
/*
 * Current thermal throttling state:
 * Current thermal event state:
 */
 */
struct _thermal_state {
struct _thermal_state {
	bool			is_throttled;
	bool			new_event;

	int			event;
	u64			next_check;
	u64			next_check;
	unsigned long		throttle_count;
	unsigned long		count;
	unsigned long		last_throttle_count;
	unsigned long		last_count;
};
};


struct thermal_state {
struct thermal_state {
	struct _thermal_state core;
	struct _thermal_state core_throttle;
	struct _thermal_state package;
	struct _thermal_state core_power_limit;
	struct _thermal_state package_throttle;
	struct _thermal_state package_power_limit;
};
};


static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
@@ -62,9 +67,9 @@ static u32 lvtthmr_init __read_mostly;
			   therm_throt_sysdev_show_##_name,		\
			   therm_throt_sysdev_show_##_name,		\
				   NULL)				\
				   NULL)				\


#define define_therm_throt_sysdev_show_func(level, name)		\
#define define_therm_throt_sysdev_show_func(event, name)		\
									\
									\
static ssize_t therm_throt_sysdev_show_##level##_##name(		\
static ssize_t therm_throt_sysdev_show_##event##_##name(		\
			struct sys_device *dev,				\
			struct sys_device *dev,				\
			struct sysdev_attribute *attr,			\
			struct sysdev_attribute *attr,			\
			char *buf)					\
			char *buf)					\
@@ -75,7 +80,7 @@ static ssize_t therm_throt_sysdev_show_##level##_##name( \
	preempt_disable();	/* CPU hotplug */			\
	preempt_disable();	/* CPU hotplug */			\
	if (cpu_online(cpu)) {						\
	if (cpu_online(cpu)) {						\
		ret = sprintf(buf, "%lu\n",				\
		ret = sprintf(buf, "%lu\n",				\
			      per_cpu(thermal_state, cpu).level.name);	\
			      per_cpu(thermal_state, cpu).event.name);	\
	} else								\
	} else								\
		ret = 0;						\
		ret = 0;						\
	preempt_enable();						\
	preempt_enable();						\
@@ -83,23 +88,32 @@ static ssize_t therm_throt_sysdev_show_##level##_##name( \
	return ret;							\
	return ret;							\
}
}


define_therm_throt_sysdev_show_func(core, throttle_count);
define_therm_throt_sysdev_show_func(core_throttle, count);
define_therm_throt_sysdev_one_ro(core_throttle_count);
define_therm_throt_sysdev_one_ro(core_throttle_count);


define_therm_throt_sysdev_show_func(package, throttle_count);
define_therm_throt_sysdev_show_func(core_power_limit, count);
define_therm_throt_sysdev_one_ro(core_power_limit_count);

define_therm_throt_sysdev_show_func(package_throttle, count);
define_therm_throt_sysdev_one_ro(package_throttle_count);
define_therm_throt_sysdev_one_ro(package_throttle_count);


define_therm_throt_sysdev_show_func(package_power_limit, count);
define_therm_throt_sysdev_one_ro(package_power_limit_count);

static struct attribute *thermal_throttle_attrs[] = {
static struct attribute *thermal_throttle_attrs[] = {
	&attr_core_throttle_count.attr,
	&attr_core_throttle_count.attr,
	NULL
	NULL
};
};


static struct attribute_group thermal_throttle_attr_group = {
static struct attribute_group thermal_attr_group = {
	.attrs	= thermal_throttle_attrs,
	.attrs	= thermal_throttle_attrs,
	.name	= "thermal_throttle"
	.name	= "thermal_throttle"
};
};
#endif /* CONFIG_SYSFS */
#endif /* CONFIG_SYSFS */


#define CORE_LEVEL	0
#define PACKAGE_LEVEL	1

/***
/***
 * therm_throt_process - Process thermal throttling event from interrupt
 * therm_throt_process - Process thermal throttling event from interrupt
 * @curr: Whether the condition is current or not (boolean), since the
 * @curr: Whether the condition is current or not (boolean), since the
@@ -116,49 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = {
 *          1 : Event should be logged further, and a message has been
 *          1 : Event should be logged further, and a message has been
 *              printed to the syslog.
 *              printed to the syslog.
 */
 */
#define CORE_LEVEL	0
static int therm_throt_process(bool new_event, int event, int level)
#define PACKAGE_LEVEL	1
static int therm_throt_process(bool is_throttled, int level)
{
{
	struct _thermal_state *state;
	struct _thermal_state *state;
	unsigned int this_cpu;
	unsigned int this_cpu = smp_processor_id();
	bool was_throttled;
	bool old_event;
	u64 now;
	u64 now;
	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);


	this_cpu = smp_processor_id();
	now = get_jiffies_64();
	now = get_jiffies_64();
	if (level == CORE_LEVEL)
	if (level == CORE_LEVEL) {
		state = &per_cpu(thermal_state, this_cpu).core;
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->core_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->core_power_limit;
		else
		else
		state = &per_cpu(thermal_state, this_cpu).package;
			 return 0;
	} else if (level == PACKAGE_LEVEL) {
		if (event == THERMAL_THROTTLING_EVENT)
			state = &pstate->package_throttle;
		else if (event == POWER_LIMIT_EVENT)
			state = &pstate->package_power_limit;
		else
			return 0;
	} else
		return 0;


	was_throttled = state->is_throttled;
	old_event = state->new_event;
	state->is_throttled = is_throttled;
	state->new_event = new_event;


	if (is_throttled)
	if (new_event)
		state->throttle_count++;
		state->count++;


	if (time_before64(now, state->next_check) &&
	if (time_before64(now, state->next_check) &&
			state->throttle_count != state->last_throttle_count)
			state->count != state->last_count)
		return 0;
		return 0;


	state->next_check = now + CHECK_INTERVAL;
	state->next_check = now + CHECK_INTERVAL;
	state->last_throttle_count = state->throttle_count;
	state->last_count = state->count;


	/* if we just entered the thermal event */
	/* if we just entered the thermal event */
	if (is_throttled) {
	if (new_event) {
		if (event == THERMAL_THROTTLING_EVENT)
			printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
			printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
				this_cpu,
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package",
				level == CORE_LEVEL ? "Core" : "Package",
		      state->throttle_count);
				state->count);
		else
			printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package",
				state->count);


		add_taint(TAINT_MACHINE_CHECK);
		add_taint(TAINT_MACHINE_CHECK);
		return 1;
		return 1;
	}
	}
	if (was_throttled) {
	if (old_event) {
		if (event == THERMAL_THROTTLING_EVENT)
			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
				this_cpu,
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package");
				level == CORE_LEVEL ? "Core" : "Package");
		else
			printk(KERN_INFO "CPU%d: %s power limit normal\n",
				this_cpu,
				level == CORE_LEVEL ? "Core" : "Package");
		return 1;
		return 1;
	}
	}


@@ -172,21 +207,29 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
	int err;
	int err;
	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());


	err = sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
	err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
	if (err)
	if (err)
		return err;
		return err;


	if (cpu_has(c, X86_FEATURE_PLN))
		err = sysfs_add_file_to_group(&sys_dev->kobj,
					      &attr_core_power_limit_count.attr,
					      thermal_attr_group.name);
	if (cpu_has(c, X86_FEATURE_PTS))
	if (cpu_has(c, X86_FEATURE_PTS))
		err = sysfs_add_file_to_group(&sys_dev->kobj,
		err = sysfs_add_file_to_group(&sys_dev->kobj,
					      &attr_package_throttle_count.attr,
					      &attr_package_throttle_count.attr,
					      thermal_throttle_attr_group.name);
					      thermal_attr_group.name);
		if (cpu_has(c, X86_FEATURE_PLN))
			err = sysfs_add_file_to_group(&sys_dev->kobj,
					&attr_package_power_limit_count.attr,
					thermal_attr_group.name);


	return err;
	return err;
}
}


static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
{
{
	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
	sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group);
}
}


/* Mutex protecting device creation against CPU hotplug: */
/* Mutex protecting device creation against CPU hotplug: */
@@ -257,6 +300,17 @@ device_initcall(thermal_throttle_init_device);


#endif /* CONFIG_SYSFS */
#endif /* CONFIG_SYSFS */


/*
 * Set up the most two significant bit to notify mce log that this thermal
 * event type.
 * This is a temp solution. May be changed in the future with mce log
 * infrasture.
 */
#define CORE_THROTTLED		(0)
#define CORE_POWER_LIMIT	((__u64)1 << 62)
#define PACKAGE_THROTTLED	((__u64)2 << 62)
#define PACKAGE_POWER_LIMIT	((__u64)3 << 62)

/* Thermal transition interrupt handler */
/* Thermal transition interrupt handler */
static void intel_thermal_interrupt(void)
static void intel_thermal_interrupt(void)
{
{
@@ -264,21 +318,31 @@ static void intel_thermal_interrupt(void)
	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());


	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);

	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
				THERMAL_THROTTLING_EVENT,
				CORE_LEVEL) != 0)
		mce_log_therm_throt_event(CORE_THROTTLED | msr_val);

	if (cpu_has(c, X86_FEATURE_PLN))
		if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
					POWER_LIMIT_EVENT,
					CORE_LEVEL) != 0)
					CORE_LEVEL) != 0)
		mce_log_therm_throt_event(msr_val);
			mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);


	if (cpu_has(c, X86_FEATURE_PTS)) {
	if (cpu_has(c, X86_FEATURE_PTS)) {
		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
					THERMAL_THROTTLING_EVENT,
					PACKAGE_LEVEL) != 0)
					PACKAGE_LEVEL) != 0)
			/*
			mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
			 * Set up the most significant bit to notify mce log
		if (cpu_has(c, X86_FEATURE_PLN))
			 * that this thermal event is a package level event.
			if (therm_throt_process(msr_val &
			 * This is a temp solution. May be changed in the future
					PACKAGE_THERM_STATUS_POWER_LIMIT,
			 * with mce log infrasture.
					POWER_LIMIT_EVENT,
			 */
					PACKAGE_LEVEL) != 0)
			mce_log_therm_throt_event(((__u64)1 << 63) | msr_val);
				mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
							  | msr_val);
	}
	}
}
}


@@ -381,11 +445,22 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
	apic_write(APIC_LVTTHMR, h);
	apic_write(APIC_LVTTHMR, h);


	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
	if (cpu_has(c, X86_FEATURE_PLN))
		wrmsr(MSR_IA32_THERM_INTERRUPT,
		      l | (THERM_INT_LOW_ENABLE
			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
	else
		wrmsr(MSR_IA32_THERM_INTERRUPT,
		wrmsr(MSR_IA32_THERM_INTERRUPT,
		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);


	if (cpu_has(c, X86_FEATURE_PTS)) {
	if (cpu_has(c, X86_FEATURE_PTS)) {
		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
		if (cpu_has(c, X86_FEATURE_PLN))
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
			      l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE
				| PACKAGE_THERM_INT_PLN_ENABLE), h);
		else
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
			      l | (PACKAGE_THERM_INT_LOW_ENABLE
			      l | (PACKAGE_THERM_INT_LOW_ENABLE
				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
				| PACKAGE_THERM_INT_HIGH_ENABLE), h);