Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 991528d7 authored by Venkatesh Pallipadi's avatar Venkatesh Pallipadi Committed by Len Brown
Browse files

ACPI: Processor native C-states using MWAIT

Intel processors starting with the Core Duo support
support processor native C-state using the MWAIT instruction.
Refer: Intel Architecture Software Developer's Manual
http://www.intel.com/design/Pentium4/manuals/253668.htm

Platform firmware exports the support for Native C-state to OS using
ACPI _PDC and _CST methods.
Refer: Intel Processor Vendor-Specific ACPI: Interface Specification
http://www.intel.com/technology/iapc/acpi/downloads/302223.htm



With Processor Native C-state, we use 'MWAIT' instruction on the processor
to enter different C-states (C1, C2, C3).  We won't use the special IO
ports to enter C-state and no SMM mode etc required to enter C-state.
Overall this will mean better C-state support.

One major advantage of using MWAIT for all C-states is, with this and
"treat interrupt as break event" feature of MWAIT, we can now get accurate
timing for the time spent in C1, C2, ..  states.

Signed-off-by: default avatarVenkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLen Brown <len.brown@intel.com>
parent b4bd8c66
Loading
Loading
Loading
Loading
+121 −1
Original line number Original line Diff line number Diff line
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/acpi.h>
#include <linux/cpu.h>


#include <acpi/processor.h>
#include <acpi/processor.h>
#include <asm/acpi.h>
#include <asm/acpi.h>
@@ -41,5 +42,124 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
		flags->bm_check = 1;
		flags->bm_check = 1;
	}
	}
}
}

EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);

/* The code below handles cstate entry with monitor-mwait pair on Intel*/

struct cstate_entry_s {
	struct {
		unsigned int eax;
		unsigned int ecx;
	} states[ACPI_PROCESSOR_MAX_POWER];
};
static struct cstate_entry_s *cpu_cstate_entry;	/* per CPU ptr */

static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];

#define MWAIT_SUBSTATE_MASK	(0xf)
#define MWAIT_SUBSTATE_SIZE	(4)

#define CPUID_MWAIT_LEAF (5)
#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
#define CPUID5_ECX_INTERRUPT_BREAK	(0x2)

#define MWAIT_ECX_INTERRUPT_BREAK	(0x1)

#define NATIVE_CSTATE_BEYOND_HALT	(2)

int acpi_processor_ffh_cstate_probe(unsigned int cpu,
		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
{
	struct cstate_entry_s *percpu_entry;
	struct cpuinfo_x86 *c = cpu_data + cpu;

	cpumask_t saved_mask;
	int retval;
	unsigned int eax, ebx, ecx, edx;
	unsigned int edx_part;
	unsigned int cstate_type; /* C-state type and not ACPI C-state type */
	unsigned int num_cstate_subtype;

	if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF )
		return -1;

	if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
		return -1;

	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
	percpu_entry->states[cx->index].eax = 0;
	percpu_entry->states[cx->index].ecx = 0;

	/* Make sure we are running on right CPU */
	saved_mask = current->cpus_allowed;
	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
	if (retval)
		return -1;

	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);

	/* Check whether this particular cx_type (in CST) is supported or not */
	cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1;
	edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
	num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;

	retval = 0;
	if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) {
		retval = -1;
		goto out;
	}

	/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) {
		retval = -1;
		goto out;
	}
	percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;

	/* Use the hint in CST */
	percpu_entry->states[cx->index].eax = cx->address;

	if (!mwait_supported[cstate_type]) {
		mwait_supported[cstate_type] = 1;
		printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
		       "state\n", cx->type);
	}

out:
	set_cpus_allowed(current, saved_mask);
	return retval;
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);

void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
{
	unsigned int cpu = smp_processor_id();
	struct cstate_entry_s *percpu_entry;

	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
	mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
	                      percpu_entry->states[cx->index].ecx);
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);

static int __init ffh_cstate_init(void)
{
	struct cpuinfo_x86 *c = &boot_cpu_data;
	if (c->x86_vendor != X86_VENDOR_INTEL)
		return -1;

	cpu_cstate_entry = alloc_percpu(struct cstate_entry_s);
	return 0;
}

static void __exit ffh_cstate_exit(void)
{
	if (cpu_cstate_entry) {
		free_percpu(cpu_cstate_entry);
		cpu_cstate_entry = NULL;
	}
}

arch_initcall(ffh_cstate_init);
__exitcall(ffh_cstate_exit);
+15 −7
Original line number Original line Diff line number Diff line
@@ -236,20 +236,28 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 * We execute MONITOR against need_resched and enter optimized wait state
 * We execute MONITOR against need_resched and enter optimized wait state
 * through MWAIT. Whenever someone changes need_resched, we would be woken
 * through MWAIT. Whenever someone changes need_resched, we would be woken
 * up from MWAIT (without an IPI).
 * up from MWAIT (without an IPI).
 *
 * New with Core Duo processors, MWAIT can take some hints based on CPU
 * capability.
 */
 */
static void mwait_idle(void)
void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
{
	local_irq_enable();
	if (!need_resched()) {

	while (!need_resched()) {
		__monitor((void *)&current_thread_info()->flags, 0, 0);
		__monitor((void *)&current_thread_info()->flags, 0, 0);
		smp_mb();
		smp_mb();
		if (need_resched())
		if (!need_resched())
			break;
			__mwait(eax, ecx);
		__mwait(0, 0);
	}
	}
}
}


/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
	local_irq_enable();
	while (!need_resched())
		mwait_idle_with_hints(0, 0);
}

void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
{
{
	if (cpu_has(c, X86_FEATURE_MWAIT)) {
	if (cpu_has(c, X86_FEATURE_MWAIT)) {
+15 −7
Original line number Original line Diff line number Diff line
@@ -238,18 +238,26 @@ void cpu_idle (void)
 * We execute MONITOR against need_resched and enter optimized wait state
 * We execute MONITOR against need_resched and enter optimized wait state
 * through MWAIT. Whenever someone changes need_resched, we would be woken
 * through MWAIT. Whenever someone changes need_resched, we would be woken
 * up from MWAIT (without an IPI).
 * up from MWAIT (without an IPI).
 *
 * New with Core Duo processors, MWAIT can take some hints based on CPU
 * capability.
 */
 */
static void mwait_idle(void)
void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
{
	local_irq_enable();
	if (!need_resched()) {

	while (!need_resched()) {
		__monitor((void *)&current_thread_info()->flags, 0, 0);
		__monitor((void *)&current_thread_info()->flags, 0, 0);
		smp_mb();
		smp_mb();
		if (need_resched())
		if (!need_resched())
			break;
			__mwait(eax, ecx);
		__mwait(0, 0);
	}
}
}

/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
	local_irq_enable();
	while (!need_resched())
		mwait_idle_with_hints(0,0);
}
}


void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+63 −38
Original line number Original line Diff line number Diff line
@@ -219,6 +219,23 @@ static void acpi_safe_halt(void)


static atomic_t c3_cpu_count;
static atomic_t c3_cpu_count;


/* Common C-state entry for C2, C3, .. */
static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
{
	if (cstate->space_id == ACPI_CSTATE_FFH) {
		/* Call into architectural FFH based C-state */
		acpi_processor_ffh_cstate_enter(cstate);
	} else {
		int unused;
		/* IO port based C-state */
		inb(cstate->address);
		/* Dummy wait op - must do something useless after P_LVL2 read
		   because chipsets cannot guarantee that STPCLK# signal
		   gets asserted in time to freeze execution properly. */
		unused = inl(acpi_fadt.xpm_tmr_blk.address);
	}
}

static void acpi_processor_idle(void)
static void acpi_processor_idle(void)
{
{
	struct acpi_processor *pr = NULL;
	struct acpi_processor *pr = NULL;
@@ -361,11 +378,7 @@ static void acpi_processor_idle(void)
		/* Get start time (ticks) */
		/* Get start time (ticks) */
		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
		/* Invoke C2 */
		/* Invoke C2 */
		inb(cx->address);
		acpi_cstate_enter(cx);
		/* Dummy wait op - must do something useless after P_LVL2 read
		   because chipsets cannot guarantee that STPCLK# signal
		   gets asserted in time to freeze execution properly. */
		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
		/* Get end time (ticks) */
		/* Get end time (ticks) */
		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
		t2 = inl(acpi_fadt.xpm_tmr_blk.address);


@@ -401,9 +414,7 @@ static void acpi_processor_idle(void)
		/* Get start time (ticks) */
		/* Get start time (ticks) */
		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
		/* Invoke C3 */
		/* Invoke C3 */
		inb(cx->address);
		acpi_cstate_enter(cx);
		/* Dummy wait op (see above) */
		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
		/* Get end time (ticks) */
		/* Get end time (ticks) */
		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
		if (pr->flags.bm_check) {
		if (pr->flags.bm_check) {
@@ -628,20 +639,16 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
	return 0;
	return 0;
}
}


static int acpi_processor_get_power_info_default_c1(struct acpi_processor *pr)
static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
{
{

	if (!pr->power.states[ACPI_STATE_C1].valid) {
	/* Zero initialize all the C-states info. */
	memset(pr->power.states, 0, sizeof(pr->power.states));

		/* set the first C-State to C1 */
		/* set the first C-State to C1 */
		/* all processors need to support C1 */
		pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
		pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;

	/* the C0 state only exists as a filler in our array,
	 * and all processors need to support C1 */
	pr->power.states[ACPI_STATE_C0].valid = 1;
		pr->power.states[ACPI_STATE_C1].valid = 1;
		pr->power.states[ACPI_STATE_C1].valid = 1;

	}
	/* the C0 state only exists as a filler in our array */
	pr->power.states[ACPI_STATE_C0].valid = 1;
	return 0;
	return 0;
}
}


@@ -658,12 +665,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
	if (nocst)
	if (nocst)
		return -ENODEV;
		return -ENODEV;


	current_count = 1;
	current_count = 0;

	/* Zero initialize C2 onwards and prepare for fresh CST lookup */
	for (i = 2; i < ACPI_PROCESSOR_MAX_POWER; i++)
		memset(&(pr->power.states[i]), 0, 
				sizeof(struct acpi_processor_cx));


	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
	if (ACPI_FAILURE(status)) {
	if (ACPI_FAILURE(status)) {
@@ -718,22 +720,39 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
			continue;
			continue;


		cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ?
		    0 : reg->address;

		/* There should be an easy way to extract an integer... */
		/* There should be an easy way to extract an integer... */
		obj = (union acpi_object *)&(element->package.elements[1]);
		obj = (union acpi_object *)&(element->package.elements[1]);
		if (obj->type != ACPI_TYPE_INTEGER)
		if (obj->type != ACPI_TYPE_INTEGER)
			continue;
			continue;


		cx.type = obj->integer.value;
		cx.type = obj->integer.value;
		/*
		 * Some buggy BIOSes won't list C1 in _CST -
		 * Let acpi_processor_get_power_info_default() handle them later
		 */
		if (i == 1 && cx.type != ACPI_STATE_C1)
			current_count++;


		if ((cx.type != ACPI_STATE_C1) &&
		cx.address = reg->address;
		    (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO))
		cx.index = current_count + 1;
			continue;


		if ((cx.type < ACPI_STATE_C2) || (cx.type > ACPI_STATE_C3))
		cx.space_id = ACPI_CSTATE_SYSTEMIO;
		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
			if (acpi_processor_ffh_cstate_probe
					(pr->id, &cx, reg) == 0) {
				cx.space_id = ACPI_CSTATE_FFH;
			} else if (cx.type != ACPI_STATE_C1) {
				/*
				 * C1 is a special case where FIXED_HARDWARE
				 * can be handled in non-MWAIT way as well.
				 * In that case, save this _CST entry info.
				 * That is, we retain space_id of SYSTEM_IO for
				 * halt based C1.
				 * Otherwise, ignore this info and continue.
				 */
				continue;
				continue;
			}
		}


		obj = (union acpi_object *)&(element->package.elements[2]);
		obj = (union acpi_object *)&(element->package.elements[2]);
		if (obj->type != ACPI_TYPE_INTEGER)
		if (obj->type != ACPI_TYPE_INTEGER)
@@ -938,12 +957,18 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
	/* NOTE: the idle thread may not be running while calling
	/* NOTE: the idle thread may not be running while calling
	 * this function */
	 * this function */


	/* Adding C1 state */
	/* Zero initialize all the C-states info. */
	acpi_processor_get_power_info_default_c1(pr);
	memset(pr->power.states, 0, sizeof(pr->power.states));

	result = acpi_processor_get_power_info_cst(pr);
	result = acpi_processor_get_power_info_cst(pr);
	if (result == -ENODEV)
	if (result == -ENODEV)
		acpi_processor_get_power_info_fadt(pr);
		acpi_processor_get_power_info_fadt(pr);


	if (result)
		return result;

	acpi_processor_get_power_info_default(pr);

	pr->power.count = acpi_processor_power_verify(pr);
	pr->power.count = acpi_processor_power_verify(pr);


	/*
	/*
+6 −3
Original line number Original line Diff line number Diff line
@@ -13,6 +13,7 @@
#define ACPI_PDC_SMP_C_SWCOORD		(0x0040)
#define ACPI_PDC_SMP_C_SWCOORD		(0x0040)
#define ACPI_PDC_SMP_T_SWCOORD		(0x0080)
#define ACPI_PDC_SMP_T_SWCOORD		(0x0080)
#define ACPI_PDC_C_C1_FFH		(0x0100)
#define ACPI_PDC_C_C1_FFH		(0x0100)
#define ACPI_PDC_C_C2C3_FFH		(0x0200)


#define ACPI_PDC_EST_CAPABILITY_SMP	(ACPI_PDC_SMP_C1PT | \
#define ACPI_PDC_EST_CAPABILITY_SMP	(ACPI_PDC_SMP_C1PT | \
					 ACPI_PDC_C_C1_HALT | \
					 ACPI_PDC_C_C1_HALT | \
@@ -25,6 +26,8 @@


#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3  | \
#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3  | \
					 ACPI_PDC_SMP_C1PT  | \
					 ACPI_PDC_SMP_C1PT  | \
					 ACPI_PDC_C_C1_HALT)
					 ACPI_PDC_C_C1_HALT | \
					 ACPI_PDC_C_C1_FFH  | \
					 ACPI_PDC_C_C2C3_FFH)


#endif				/* __PDC_INTEL_H__ */
#endif				/* __PDC_INTEL_H__ */
Loading