Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5d2bd700 authored by Suresh Siddha's avatar Suresh Siddha Committed by H. Peter Anvin
Browse files

x86, fpu: decouple non-lazy/eager fpu restore from xsave



Decouple non-lazy/eager fpu restore policy from the existence of the xsave
feature. Introduce a synthetic CPUID flag to represent the eagerfpu
policy. "eagerfpu=on" boot paramter will enable the policy.

Requested-by: default avatarH. Peter Anvin <hpa@zytor.com>
Requested-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com


Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 304bceda
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1833,6 +1833,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
			and restore using xsave. The kernel will fallback to
			enabling legacy floating-point and sse state.

	eagerfpu=	[X86]
			on	enable eager fpu restore
			off	disable eager fpu restore

	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
			wfi(ARM) instruction doesn't work correctly and not to
			use it. This is also useful when using JTAG debugger.
+2 −0
Original line number Diff line number Diff line
@@ -97,6 +97,7 @@
#define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU	(3*32+29) /* "eagerfpu" Non lazy FPU restore */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
@@ -305,6 +306,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)

#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg		1
+39 −15
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,

extern unsigned int mxcsr_feature_mask;
extern void fpu_init(void);
extern void eager_fpu_init(void);

DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);

@@ -84,6 +85,11 @@ static inline int is_x32_frame(void)

#define X87_FSW_ES (1 << 7)	/* Exception Summary */

static __always_inline __pure bool use_eager_fpu(void)
{
	return static_cpu_has(X86_FEATURE_EAGER_FPU);
}

static __always_inline __pure bool use_xsaveopt(void)
{
	return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -99,6 +105,14 @@ static __always_inline __pure bool use_fxsr(void)
        return static_cpu_has(X86_FEATURE_FXSR);
}

static inline void fx_finit(struct i387_fxsave_struct *fx)
{
	memset(fx, 0, xstate_size);
	fx->cwd = 0x37f;
	if (cpu_has_xmm)
		fx->mxcsr = MXCSR_DEFAULT;
}

extern void __sanitize_i387_state(struct task_struct *);

static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -291,13 +305,13 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
static inline void __thread_fpu_end(struct task_struct *tsk)
{
	__thread_clear_has_fpu(tsk);
	if (!use_xsave())
	if (!use_eager_fpu())
		stts();
}

static inline void __thread_fpu_begin(struct task_struct *tsk)
{
	if (!use_xsave())
	if (!use_eager_fpu())
		clts();
	__thread_set_has_fpu(tsk);
}
@@ -327,10 +341,14 @@ static inline void drop_fpu(struct task_struct *tsk)

static inline void drop_init_fpu(struct task_struct *tsk)
{
	if (!use_xsave())
	if (!use_eager_fpu())
		drop_fpu(tsk);
	else
	else {
		if (use_xsave())
			xrstor_state(init_xstate_buf, -1);
		else
			fxrstor_checking(&init_xstate_buf->i387);
	}
}

/*
@@ -370,7 +388,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
	 * If the task has used the math, pre-load the FPU on xsave processors
	 * or if the past 5 consecutive context-switches used math.
	 */
	fpu.preload = tsk_used_math(new) && (use_xsave() ||
	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
					     new->fpu_counter > 5);
	if (__thread_has_fpu(old)) {
		if (!__save_init_fpu(old))
@@ -383,14 +401,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
			new->fpu_counter++;
			__thread_set_has_fpu(new);
			prefetch(new->thread.fpu.state);
		} else if (!use_xsave())
		} else if (!use_eager_fpu())
			stts();
	} else {
		old->fpu_counter = 0;
		old->thread.fpu.last_cpu = ~0;
		if (fpu.preload) {
			new->fpu_counter++;
			if (!use_xsave() && fpu_lazy_restore(new, cpu))
			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
				fpu.preload = 0;
			else
				prefetch(new->thread.fpu.state);
@@ -452,6 +470,14 @@ static inline void user_fpu_begin(void)
	preempt_enable();
}

static inline void __save_fpu(struct task_struct *tsk)
{
	if (use_xsave())
		xsave_state(&tsk->thread.fpu.state->xsave, -1);
	else
		fpu_fxsave(&tsk->thread.fpu);
}

/*
 * These disable preemption on their own and are safe
 */
@@ -459,8 +485,8 @@ static inline void save_init_fpu(struct task_struct *tsk)
{
	WARN_ON_ONCE(!__thread_has_fpu(tsk));

	if (use_xsave()) {
		xsave_state(&tsk->thread.fpu.state->xsave, -1);
	if (use_eager_fpu()) {
		__save_fpu(tsk);
		return;
	}

@@ -526,11 +552,9 @@ static inline void fpu_free(struct fpu *fpu)

static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
{
	if (use_xsave()) {
		struct xsave_struct *xsave = &dst->thread.fpu.state->xsave;

		memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct));
		xsave_state(xsave, -1);
	if (use_eager_fpu()) {
		memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
		__save_fpu(dst);
	} else {
		struct fpu *dfpu = &dst->thread.fpu;
		struct fpu *sfpu = &src->thread.fpu;
+0 −2
Original line number Diff line number Diff line
@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void)
	dbg_restore_debug_regs();

	fpu_init();
	xsave_init();

	raw_local_save_flags(kernel_eflags);

@@ -1352,6 +1351,5 @@ void __cpuinit cpu_init(void)
	dbg_restore_debug_regs();

	fpu_init();
	xsave_init();
}
#endif
+8 −17
Original line number Diff line number Diff line
@@ -22,9 +22,8 @@
/*
 * Were we in an interrupt that interrupted kernel mode?
 *
 * For now, on xsave platforms we will return interrupted
 * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore
 * for xsave platforms, ideally we can change the return value
 * For now, with eagerfpu we will return interrupted kernel FPU
 * state as not-idle. TBD: Ideally we can change the return value
 * to something like __thread_has_fpu(current). But we need to
 * be careful of doing __thread_clear_has_fpu() before saving
 * the FPU etc for supporting nested uses etc. For now, take
@@ -38,7 +37,7 @@
 */
static inline bool interrupted_kernel_fpu_idle(void)
{
	if (use_xsave())
	if (use_eager_fpu())
		return 0;

	return !__thread_has_fpu(current) &&
@@ -84,7 +83,7 @@ void kernel_fpu_begin(void)
		__save_init_fpu(me);
		__thread_clear_has_fpu(me);
		/* We do 'stts()' in kernel_fpu_end() */
	} else if (!use_xsave()) {
	} else if (!use_eager_fpu()) {
		this_cpu_write(fpu_owner_task, NULL);
		clts();
	}
@@ -93,7 +92,7 @@ EXPORT_SYMBOL(kernel_fpu_begin);

void kernel_fpu_end(void)
{
	if (use_xsave())
	if (use_eager_fpu())
		math_state_restore();
	else
		stts();
@@ -122,7 +121,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
{
	unsigned long mask = 0;

	clts();
	if (cpu_has_fxsr) {
		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
		asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -131,7 +129,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
			mask = 0x0000ffbf;
	}
	mxcsr_feature_mask &= mask;
	stts();
}

static void __cpuinit init_thread_xstate(void)
@@ -185,9 +182,8 @@ void __cpuinit fpu_init(void)
		init_thread_xstate();

	mxcsr_feature_mask_init();
	/* clean state in init */
	current_thread_info()->status = 0;
	clear_used_math();
	xsave_init();
	eager_fpu_init();
}

void fpu_finit(struct fpu *fpu)
@@ -198,12 +194,7 @@ void fpu_finit(struct fpu *fpu)
	}

	if (cpu_has_fxsr) {
		struct i387_fxsave_struct *fx = &fpu->state->fxsave;

		memset(fx, 0, xstate_size);
		fx->cwd = 0x37f;
		if (cpu_has_xmm)
			fx->mxcsr = MXCSR_DEFAULT;
		fx_finit(&fpu->state->fxsave);
	} else {
		struct i387_fsave_struct *fp = &fpu->state->fsave;
		memset(fp, 0, xstate_size);
Loading