Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 625037cc authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86-64: move clts into batch cpu state updates when preloading fpu
  x86-64: move unlazy_fpu() into lazy cpu state part of context switch
  x86-32: make sure clts is batched during context switch
  x86: split out core __math_state_restore
parents 8fafa0a7 17950c5b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void);
extern void __math_state_restore(void);
extern void init_thread_xstate(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);

+16 −11
Original line number Diff line number Diff line
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
	bool preload_fpu;

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);
	/*
	 * If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 */
	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;

	__unlazy_fpu(prev_p);

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
	if (preload_fpu)
		prefetch(next->xstate);

	/*
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);

	/* If we're going to preload the fpu context, make sure clts
	   is run while we're batching the cpu state updates. */
	if (preload_fpu)
		clts();

	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
	 */
	arch_end_context_switch(next_p);

	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
	 */
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
		math_state_restore();
	if (preload_fpu)
		__math_state_restore();

	/*
	 * Restore %gs if needed (which is common)
+21 −12
Original line number Diff line number Diff line
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
	unsigned fsindex, gsindex;
	bool preload_fpu;

	/*
	 * If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 */
	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
	if (preload_fpu)
		prefetch(next->xstate);

	/*
@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)

	load_TLS(next, cpu);

	/* Must be after DS reload */
	unlazy_fpu(prev_p);

	/* Make sure cpu is ready for new context */
	if (preload_fpu)
		clts();

	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
	prev->gsindex = gsindex;

	/* Must be after DS reload */
	unlazy_fpu(prev_p);

	/*
	 * Switch the PDA and FPU contexts.
	 */
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
		__switch_to_xtra(prev_p, next_p, tss);

	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
	/*
	 * Preload the FPU context, now that we've determined that the
	 * task is likely to be using it. 
	 */
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
		math_state_restore();
	if (preload_fpu)
		__math_state_restore();
	return prev_p;
}

+23 −10
Original line number Diff line number Diff line
@@ -794,6 +794,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{
}

/*
 * __math_state_restore assumes that cr0.TS is already clear and the
 * fpu state is all ready for use.  Used during context switch.
 */
void __math_state_restore(void)
{
	struct thread_info *thread = current_thread_info();
	struct task_struct *tsk = thread->task;

	/*
	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
	 */
	if (unlikely(restore_fpu_checking(tsk))) {
		stts();
		force_sig(SIGSEGV, tsk);
		return;
	}

	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
	tsk->fpu_counter++;
}

/*
 * 'math_state_restore()' saves the current math information in the
 * old math state array, and gets the new ones from the current task
@@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void)
	}

	clts();				/* Allow maths ops (or we recurse) */
	/*
	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
	 */
	if (unlikely(restore_fpu_checking(tsk))) {
		stts();
		force_sig(SIGSEGV, tsk);
		return;
	}

	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
	tsk->fpu_counter++;
	__math_state_restore();
}
EXPORT_SYMBOL_GPL(math_state_restore);