Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cb84d11e authored by Dave Martin's avatar Dave Martin Committed by Catalin Marinas
Browse files

arm64: neon: Remove support for nested or hardirq kernel-mode NEON



Support for kernel-mode NEON to be nested and/or used in hardirq
context adds significant complexity, and the benefits may be
marginal.  In practice, kernel-mode NEON is not used in hardirq
context, and is rarely used in softirq context (by certain mac80211
drivers).

This patch implements an arm64 may_use_simd() function to allow
clients to check whether kernel-mode NEON is usable in the current
context, and simplifies kernel_neon_{begin,end}() to handle only
saving of the task FPSIMD state (if any).  Without nesting, there
is no other state to save.

The partial fpsimd save/restore functions become redundant as a
result of these changes, so they are removed too.

The save/restore model is changed to operate directly on
task_struct without additional percpu storage.  This simplifies the
code and saves a bit of memory, but means that softirqs must now be
disabled when manipulating the task fpsimd state from task context:
correspondingly, preempt_{en,dis}sable() calls are upgraded to
local_bh_{en,dis}able() as appropriate.  fpsimd_thread_switch()
already runs with hardirqs disabled and so is already protected
from softirqs.

These changes should make it easier to support kernel-mode NEON in
the presence of the Scalable Vector extension in the future.

Signed-off-by: default avatarDave Martin <Dave.Martin@arm.com>
Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 4328825d
Loading
Loading
Loading
Loading
+0 −14
Original line number Diff line number Diff line
@@ -41,16 +41,6 @@ struct fpsimd_state {
	unsigned int cpu;
};

/*
 * Struct for stacking the bottom 'n' FP/SIMD registers.
 */
struct fpsimd_partial_state {
	u32		fpsr;
	u32		fpcr;
	u32		num_regs;
	__uint128_t	vregs[32];
};


#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -77,10 +67,6 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);

extern void fpsimd_flush_task_state(struct task_struct *target);

extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
				      u32 num_regs);
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);

/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
+0 −56
Original line number Diff line number Diff line
@@ -75,59 +75,3 @@
	ldr	w\tmpnr, [\state, #16 * 2 + 4]
	fpsimd_restore_fpcr x\tmpnr, \state
.endm

.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
	mrs	x\tmpnr1, fpsr
	str	w\numnr, [\state, #8]
	mrs	x\tmpnr2, fpcr
	stp	w\tmpnr1, w\tmpnr2, [\state]
	adr	x\tmpnr1, 0f
	add	\state, \state, x\numnr, lsl #4
	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
	br	x\tmpnr1
	stp	q30, q31, [\state, #-16 * 30 - 16]
	stp	q28, q29, [\state, #-16 * 28 - 16]
	stp	q26, q27, [\state, #-16 * 26 - 16]
	stp	q24, q25, [\state, #-16 * 24 - 16]
	stp	q22, q23, [\state, #-16 * 22 - 16]
	stp	q20, q21, [\state, #-16 * 20 - 16]
	stp	q18, q19, [\state, #-16 * 18 - 16]
	stp	q16, q17, [\state, #-16 * 16 - 16]
	stp	q14, q15, [\state, #-16 * 14 - 16]
	stp	q12, q13, [\state, #-16 * 12 - 16]
	stp	q10, q11, [\state, #-16 * 10 - 16]
	stp	q8, q9, [\state, #-16 * 8 - 16]
	stp	q6, q7, [\state, #-16 * 6 - 16]
	stp	q4, q5, [\state, #-16 * 4 - 16]
	stp	q2, q3, [\state, #-16 * 2 - 16]
	stp	q0, q1, [\state, #-16 * 0 - 16]
0:
.endm

.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
	ldp	w\tmpnr1, w\tmpnr2, [\state]
	msr	fpsr, x\tmpnr1
	fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
	adr	x\tmpnr1, 0f
	ldr	w\tmpnr2, [\state, #8]
	add	\state, \state, x\tmpnr2, lsl #4
	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
	br	x\tmpnr1
	ldp	q30, q31, [\state, #-16 * 30 - 16]
	ldp	q28, q29, [\state, #-16 * 28 - 16]
	ldp	q26, q27, [\state, #-16 * 26 - 16]
	ldp	q24, q25, [\state, #-16 * 24 - 16]
	ldp	q22, q23, [\state, #-16 * 22 - 16]
	ldp	q20, q21, [\state, #-16 * 20 - 16]
	ldp	q18, q19, [\state, #-16 * 18 - 16]
	ldp	q16, q17, [\state, #-16 * 16 - 16]
	ldp	q14, q15, [\state, #-16 * 14 - 16]
	ldp	q12, q13, [\state, #-16 * 12 - 16]
	ldp	q10, q11, [\state, #-16 * 10 - 16]
	ldp	q8, q9, [\state, #-16 * 8 - 16]
	ldp	q6, q7, [\state, #-16 * 6 - 16]
	ldp	q4, q5, [\state, #-16 * 4 - 16]
	ldp	q2, q3, [\state, #-16 * 2 - 16]
	ldp	q0, q1, [\state, #-16 * 0 - 16]
0:
.endm
+1 −3
Original line number Diff line number Diff line
@@ -16,9 +16,7 @@

#define cpu_has_neon()		system_supports_fpsimd()

#define kernel_neon_begin()	kernel_neon_begin_partial(32)

void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_begin(void);
void kernel_neon_end(void);

#endif /* ! __ASM_NEON_H */
+32 −1
Original line number Diff line number Diff line
@@ -9,15 +9,46 @@
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H

#include <linux/compiler.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>

#ifdef CONFIG_KERNEL_MODE_NEON

DECLARE_PER_CPU(bool, kernel_neon_busy);

/*
 * may_use_simd - whether it is allowable at this time to issue SIMD
 *                instructions or access the SIMD register file
 *
 * Callers must not assume that the result remains true beyond the next
 * preempt_enable() or return from softirq context.
 */
static __must_check inline bool may_use_simd(void)
{
	return true;
	/*
	 * The raw_cpu_read() is racy if called with preemption enabled.
	 * This is not a bug: kernel_neon_busy is only set when
	 * preemption is disabled, so we cannot migrate to another CPU
	 * while it is set, nor can we migrate to a CPU where it is set.
	 * So, if we find it clear on some CPU then we're guaranteed to
	 * find it clear on any CPU we could migrate to.
	 *
	 * If we are in between kernel_neon_begin()...kernel_neon_end(),
	 * the flag will be set, but preemption is also disabled, so we
	 * can't migrate to another CPU and spuriously see it become
	 * false.
	 */
	return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy);
}

#else /* ! CONFIG_KERNEL_MODE_NEON */

static __must_check inline bool may_use_simd(void) {
	return false;
}

#endif /* ! CONFIG_KERNEL_MODE_NEON */

#endif
+0 −24
Original line number Diff line number Diff line
@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
	fpsimd_restore x0, 8
	ret
ENDPROC(fpsimd_load_state)

#ifdef CONFIG_KERNEL_MODE_NEON

/*
 * Save the bottom n FP registers.
 *
 * x0 - pointer to struct fpsimd_partial_state
 */
ENTRY(fpsimd_save_partial_state)
	fpsimd_save_partial x0, 1, 8, 9
	ret
ENDPROC(fpsimd_save_partial_state)

/*
 * Load the bottom n FP registers.
 *
 * x0 - pointer to struct fpsimd_partial_state
 */
ENTRY(fpsimd_load_partial_state)
	fpsimd_restore_partial x0, 8, 9
	ret
ENDPROC(fpsimd_load_partial_state)

#endif
Loading