Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 518bacf5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 FPU updates from Ingo Molnar:
 "The main changes in this cycle were:

   - do a large round of simplifications after all CPUs do 'eager' FPU
     context switching in v4.9: remove CR0 twiddling, remove leftover
     eager/lazy bts, etc (Andy Lutomirski)

   - more FPU code simplifications: remove struct fpu::counter, clarify
     nomenclature, remove unnecessary arguments/functions and better
     structure the code (Rik van Riel)"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu: Remove clts()
  x86/fpu: Remove stts()
  x86/fpu: Handle #NM without FPU emulation as an error
  x86/fpu, lguest: Remove CR0.TS support
  x86/fpu, kvm: Remove host CR0.TS manipulation
  x86/fpu: Remove irq_ts_save() and irq_ts_restore()
  x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs()
  x86/fpu: Get rid of two redundant clts() calls
  x86/fpu: Finish excising 'eagerfpu'
  x86/fpu: Split old_fpu & new_fpu handling into separate functions
  x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state()
  x86/fpu: Split old & new FPU code paths
  x86/fpu: Remove __fpregs_(de)activate()
  x86/fpu: Rename lazy restore functions to "register state valid"
  x86/fpu, kvm: Remove KVM vcpu->fpu_counter
  x86/fpu: Remove struct fpu::counter
  x86/fpu: Remove use_eager_fpu()
  x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction
  x86/fpu: Hard-disable lazy FPU mode
  x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code
parents 535b2f73 064e6a8b
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -1079,12 +1079,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	nopku		[X86] Disable Memory Protection Keys CPU feature found
			in some Intel CPUs.

	eagerfpu=	[X86]
			on	enable eager fpu restore
			off	disable eager fpu restore
			auto	selects the default scheme, which automatically
				enables eagerfpu restore for xsaveopt.

	module.async_probe [KNL]
			Enable asynchronous probe on this module.

+4 −18
Original line number Diff line number Diff line
@@ -48,26 +48,13 @@
#ifdef CONFIG_X86_64
/*
 * use carryless multiply version of crc32c when buffer
 * size is >= 512 (when eager fpu is enabled) or
 * >= 1024 (when eager fpu is disabled) to account
 * size is >= 512 to account
 * for fpu state save/restore overhead.
 */
#define CRC32C_PCL_BREAKEVEN_EAGERFPU	512
#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU	1024
#define CRC32C_PCL_BREAKEVEN	512

asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
				unsigned int crc_init);
static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
#if defined(X86_FEATURE_EAGER_FPU)
#define set_pcl_breakeven_point()					\
do {									\
	if (!use_eager_fpu())						\
		crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU;	\
} while (0)
#else
#define set_pcl_breakeven_point()					\
	(crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
#endif
#endif /* CONFIG_X86_64 */

static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
@@ -190,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
	 * use faster PCL version if datasize is large enough to
	 * overcome kernel fpu state save/restore overhead
	 */
	if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
	if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
		kernel_fpu_begin();
		*crcp = crc_pcl(data, len, *crcp);
		kernel_fpu_end();
@@ -202,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
				u8 *out)
{
	if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
	if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
		kernel_fpu_begin();
		*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
		kernel_fpu_end();
@@ -261,7 +248,6 @@ static int __init crc32c_intel_mod_init(void)
		alg.update = crc32c_pcl_intel_update;
		alg.finup = crc32c_pcl_intel_finup;
		alg.digest = crc32c_pcl_intel_digest;
		set_pcl_breakeven_point();
	}
#endif
	return crypto_register_shash(&alg);
+0 −1
Original line number Diff line number Diff line
@@ -104,7 +104,6 @@
#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+0 −10
Original line number Diff line number Diff line
@@ -26,16 +26,6 @@ extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);

/*
 * Some instructions like VIA's padlock instructions generate a spurious
 * DNA fault but don't modify SSE registers. And these instructions
 * get used from interrupt context as well. To prevent these kernel instructions
 * in interrupt context interacting wrongly with other user/kernel fpu usage, we
 * should use them only in the context of irq_ts_save/restore()
 */
extern int  irq_ts_save(void);
extern void irq_ts_restore(int TS_state);

/*
 * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
 *
+42 −97
Original line number Diff line number Diff line
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
 * FPU related CPU feature flag helper routines:
 */
static __always_inline __pure bool use_eager_fpu(void)
{
	return static_cpu_has(X86_FEATURE_EAGER_FPU);
}

static __always_inline __pure bool use_xsaveopt(void)
{
	return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size)
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);

/*
 * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
 * on this CPU.
 * The in-register FPU state for an FPU context on a CPU is assumed to be
 * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
 * matches the FPU.
 *
 * This will disable any lazy FPU state restore of the current FPU state,
 * but if the current thread owns the FPU, it will still be saved by.
 * If the FPU register state is valid, the kernel can skip restoring the
 * FPU state from memory.
 *
 * Any code that clobbers the FPU registers or updates the in-memory
 * FPU state for a task MUST let the rest of the kernel know that the
 * FPU registers are no longer valid for this task.
 *
 * Either one of these invalidation functions is enough. Invalidate
 * a resource you control: CPU if using the CPU for something else
 * (with preemption disabled), FPU for the current task, or a task that
 * is prevented from running by the current task.
 */
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
{
	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
}

static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
static inline void __cpu_invalidate_fpregs_state(void)
{
	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
	__this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}


/*
 * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
 * idiom, which is then paired with the sw-flag (fpregs_active) later on:
 */

static inline void __fpregs_activate_hw(void)
static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
	if (!use_eager_fpu())
		clts();
	fpu->last_cpu = -1;
}

static inline void __fpregs_deactivate_hw(void)
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
	if (!use_eager_fpu())
		stts();
	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}

/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
static inline void __fpregs_deactivate(struct fpu *fpu)
/*
 * These generally need preemption protection to work,
 * do try to avoid using these on their own:
 */
static inline void fpregs_deactivate(struct fpu *fpu)
{
	WARN_ON_FPU(!fpu->fpregs_active);

@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
	trace_x86_fpu_regs_deactivated(fpu);
}

/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
static inline void __fpregs_activate(struct fpu *fpu)
static inline void fpregs_activate(struct fpu *fpu)
{
	WARN_ON_FPU(fpu->fpregs_active);

@@ -553,52 +547,20 @@ static inline int fpregs_active(void)
	return current->thread.fpu.fpregs_active;
}

/*
 * Encapsulate the CR0.TS handling together with the
 * software flag.
 *
 * These generally need preemption protection to work,
 * do try to avoid using these on their own.
 */
static inline void fpregs_activate(struct fpu *fpu)
{
	__fpregs_activate_hw();
	__fpregs_activate(fpu);
}

static inline void fpregs_deactivate(struct fpu *fpu)
{
	__fpregs_deactivate(fpu);
	__fpregs_deactivate_hw();
}

/*
 * FPU state switching for scheduling.
 *
 * This is a two-stage process:
 *
 *  - switch_fpu_prepare() saves the old state and
 *    sets the new state of the CR0.TS bit. This is
 *    done within the context of the old process.
 *  - switch_fpu_prepare() saves the old state.
 *    This is done within the context of the old process.
 *
 *  - switch_fpu_finish() restores the new state as
 *    necessary.
 */
typedef struct { int preload; } fpu_switch_t;

static inline fpu_switch_t
switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
	fpu_switch_t fpu;

	/*
	 * If the task has used the math, pre-load the FPU on xsave processors
	 * or if the past 5 consecutive context-switches used math.
	 */
	fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
		      new_fpu->fpstate_active &&
		      (use_eager_fpu() || new_fpu->counter > 5);

	if (old_fpu->fpregs_active) {
		if (!copy_fpregs_to_fpstate(old_fpu))
			old_fpu->last_cpu = -1;
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
		/* But leave fpu_fpregs_owner_ctx! */
		old_fpu->fpregs_active = 0;
		trace_x86_fpu_regs_deactivated(old_fpu);

		/* Don't change CR0.TS if we just switch! */
		if (fpu.preload) {
			new_fpu->counter++;
			__fpregs_activate(new_fpu);
			trace_x86_fpu_regs_activated(new_fpu);
			prefetch(&new_fpu->state);
		} else {
			__fpregs_deactivate_hw();
		}
	} else {
		old_fpu->counter = 0;
	} else
		old_fpu->last_cpu = -1;
		if (fpu.preload) {
			new_fpu->counter++;
			if (fpu_want_lazy_restore(new_fpu, cpu))
				fpu.preload = 0;
			else
				prefetch(&new_fpu->state);
			fpregs_activate(new_fpu);
		}
	}
	return fpu;
}

/*
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
 */

/*
 * By the time this gets called, we've already cleared CR0.TS and
 * given the process the FPU if we are going to preload the FPU
 * state - all we need to do is to conditionally restore the register
 * state itself.
 * Set up the userspace FPU context for the new task, if the task
 * has used the FPU.
 */
static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
	if (fpu_switch.preload)
	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
		       new_fpu->fpstate_active;

	if (preload) {
		if (!fpregs_state_valid(new_fpu, cpu))
			copy_kernel_to_fpregs(&new_fpu->state);
		fpregs_activate(new_fpu);
	}
}

/*
Loading