Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d652df0b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 FPU changes from Ingo Molnar:
 "There are two bigger changes in this tree:

   - Add an [early-use-]safe static_cpu_has() variant and other
     robustness improvements, including the new X86_DEBUG_STATIC_CPU_HAS
     configurable debugging facility, motivated by recent obscure FPU
     code bugs, by Borislav Petkov

   - Reimplement FPU detection code in C and drop the old asm code, by
     Peter Anvin."

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, fpu: Use static_cpu_has_safe before alternatives
  x86: Add a static_cpu_has_safe variant
  x86: Sanity-check static_cpu_has usage
  x86, cpu: Add a synthetic, always true, cpu feature
  x86: Get rid of ->hard_math and all the FPU asm fu
parents 4d6f843a 5f8c4218
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -303,4 +303,14 @@ config DEBUG_NMI_SELFTEST

	  If unsure, say N.

config X86_DEBUG_STATIC_CPU_HAS
	bool "Debug alternatives"
	depends on DEBUG_KERNEL
	---help---
	  This option causes additional code to be generated which
	  fails if static_cpu_has() is used before alternatives have
	  run.

	  If unsure, say N.

endmenu
+114 −4
Original line number Diff line number Diff line
@@ -92,7 +92,7 @@
#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
#define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
					  /* 21 available, was AMD_C1E */
#define X86_FEATURE_ALWAYS	(3*32+21) /* "" Always-present feature */
#define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
@@ -356,15 +356,36 @@ extern const char * const x86_power_flags[32];
#endif /* CONFIG_X86_64 */

#if __GNUC__ >= 4
extern void warn_pre_alternatives(void);
extern bool __static_cpu_has_safe(u16 bit);

/*
 * Static testing of CPU features.  Used the same as boot_cpu_has().
 * These are only valid after alternatives have run, but will statically
 * patch the target code for additional performance.
 *
 */
static __always_inline __pure bool __static_cpu_has(u16 bit)
{
#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5

#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
		/*
		 * Catch too early usage of this before alternatives
		 * have run.
		 */
		asm goto("1: jmp %l[t_warn]\n"
			 "2:\n"
			 ".section .altinstructions,\"a\"\n"
			 " .long 1b - .\n"
			 " .long 0\n"		/* no replacement */
			 " .word %P0\n"		/* 1: do replace */
			 " .byte 2b - 1b\n"	/* source len */
			 " .byte 0\n"		/* replacement len */
			 ".previous\n"
			 /* skipping size check since replacement size = 0 */
			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
#endif

		asm goto("1: jmp %l[t_no]\n"
			 "2:\n"
			 ".section .altinstructions,\"a\"\n"
@@ -379,7 +400,13 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
		return true;
	t_no:
		return false;
#else

#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
	t_warn:
		warn_pre_alternatives();
		return false;
#endif
#else /* GCC_VERSION >= 40500 */
		u8 flag;
		/* Open-coded due to __stringify() in ALTERNATIVE() */
		asm volatile("1: movb $0,%0\n"
@@ -411,11 +438,94 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
		__static_cpu_has(bit) :				\
		boot_cpu_has(bit)				\
)

static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
{
#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
/*
 * We need to spell the jumps to the compiler because, depending on the offset,
 * the replacement jump can be bigger than the original jump, and this we cannot
 * have. Thus, we force the jump to the widest, 4-byte, signed relative
 * offset even though the last would often fit in less bytes.
 */
		asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
			 "2:\n"
			 ".section .altinstructions,\"a\"\n"
			 " .long 1b - .\n"		/* src offset */
			 " .long 3f - .\n"		/* repl offset */
			 " .word %P1\n"			/* always replace */
			 " .byte 2b - 1b\n"		/* src len */
			 " .byte 4f - 3f\n"		/* repl len */
			 ".previous\n"
			 ".section .altinstr_replacement,\"ax\"\n"
			 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
			 "4:\n"
			 ".previous\n"
			 ".section .altinstructions,\"a\"\n"
			 " .long 1b - .\n"		/* src offset */
			 " .long 0\n"			/* no replacement */
			 " .word %P0\n"			/* feature bit */
			 " .byte 2b - 1b\n"		/* src len */
			 " .byte 0\n"			/* repl len */
			 ".previous\n"
			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
			 : : t_dynamic, t_no);
		return true;
	t_no:
		return false;
	t_dynamic:
		return __static_cpu_has_safe(bit);
#else /* GCC_VERSION >= 40500 */
		u8 flag;
		/* Open-coded due to __stringify() in ALTERNATIVE() */
		asm volatile("1: movb $2,%0\n"
			     "2:\n"
			     ".section .altinstructions,\"a\"\n"
			     " .long 1b - .\n"		/* src offset */
			     " .long 3f - .\n"		/* repl offset */
			     " .word %P2\n"		/* always replace */
			     " .byte 2b - 1b\n"		/* source len */
			     " .byte 4f - 3f\n"		/* replacement len */
			     ".previous\n"
			     ".section .discard,\"aw\",@progbits\n"
			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
			     ".previous\n"
			     ".section .altinstr_replacement,\"ax\"\n"
			     "3: movb $0,%0\n"
			     "4:\n"
			     ".previous\n"
			     ".section .altinstructions,\"a\"\n"
			     " .long 1b - .\n"		/* src offset */
			     " .long 5f - .\n"		/* repl offset */
			     " .word %P1\n"		/* feature bit */
			     " .byte 4b - 3b\n"		/* src len */
			     " .byte 6f - 5f\n"		/* repl len */
			     ".previous\n"
			     ".section .discard,\"aw\",@progbits\n"
			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
			     ".previous\n"
			     ".section .altinstr_replacement,\"ax\"\n"
			     "5: movb $1,%0\n"
			     "6:\n"
			     ".previous\n"
			     : "=qm" (flag)
			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
#endif
}

#define static_cpu_has_safe(bit)				\
(								\
	__builtin_constant_p(boot_cpu_has(bit)) ?		\
		boot_cpu_has(bit) :				\
		_static_cpu_has_safe(bit)			\
)
#else
/*
 * gcc 3.x is too stupid to do the static test; fall back to dynamic.
 */
#define static_cpu_has(bit)		boot_cpu_has(bit)
#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
#endif

#define cpu_has_bug(c, bit)	cpu_has(c, (bit))
+1 −3
Original line number Diff line number Diff line
@@ -62,10 +62,8 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
#define xstateregs_active	fpregs_active

#ifdef CONFIG_MATH_EMULATION
# define HAVE_HWFP		(boot_cpu_data.hard_math)
extern void finit_soft_fpu(struct i387_soft_struct *soft);
#else
# define HAVE_HWFP		1
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif

@@ -345,7 +343,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk)

static inline void __thread_fpu_begin(struct task_struct *tsk)
{
	if (!use_eager_fpu())
	if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU))
		clts();
	__thread_set_has_fpu(tsk);
}
+2 −1
Original line number Diff line number Diff line
@@ -89,9 +89,9 @@ struct cpuinfo_x86 {
	char			wp_works_ok;	/* It doesn't on 386's */

	/* Problems on some 486Dx4's and old 386's: */
	char			hard_math;
	char			rfu;
	char			pad0;
	char			pad1;
#else
	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
	int			x86_tlbsize;
@@ -164,6 +164,7 @@ extern const struct seq_operations cpuinfo_op;
#define cache_line_size()	(boot_cpu_data.x86_cache_alignment)

extern void cpu_detect(struct cpuinfo_x86 *c);
extern void __cpuinit fpu_detect(struct cpuinfo_x86 *c);

extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
+0 −1
Original line number Diff line number Diff line
@@ -28,7 +28,6 @@ void foo(void)
	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
	OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
	OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
	OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
	OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
Loading