Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 90ec4e1d authored by Will Deacon's avatar Will Deacon Committed by Sami Tolvanen
Browse files

BACKPORT: arm64: entry: use ldp/stp instead of push/pop when saving/restoring regs



The push/pop instructions can be suboptimal when saving/restoring large
amounts of data to/from the stack, for example on entry/exit from the
kernel. This is because:

  (1) They act on descending addresses (i.e. the newly decremented sp),
      which may defeat some hardware prefetchers

  (2) They introduce an implicit dependency between each instruction, as
      the sp has to be updated in order to resolve the address of the
      next access.

This patch removes the push/pop instructions from our kernel entry/exit
macros in favour of ldp/stp plus offset.

Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>

Bug: 31432001
Change-Id: Iacc638e1c93f6490dd615630901ca3804459be26
(cherry picked from commit 63648dd20fa0780ab6c1e923b5c276d257422cb3)
Signed-off-by: default avatarSami Tolvanen <samitolvanen@google.com>
parent 6079041f
Loading
Loading
Loading
Loading
+36 −38
Original line number Original line Diff line number Diff line
@@ -66,25 +66,26 @@
#define BAD_ERROR	3
#define BAD_ERROR	3


	.macro	kernel_entry, el, regsize = 64
	.macro	kernel_entry, el, regsize = 64
	sub	sp, sp, #S_FRAME_SIZE - S_LR	// room for LR, SP, SPSR, ELR
	sub	sp, sp, #S_FRAME_SIZE
	.if	\regsize == 32
	.if	\regsize == 32
	mov	w0, w0				// zero upper 32 bits of x0
	mov	w0, w0				// zero upper 32 bits of x0
	.endif
	.endif
	push	x28, x29
	stp	x0, x1, [sp, #16 * 0]
	push	x26, x27
	stp	x2, x3, [sp, #16 * 1]
	push	x24, x25
	stp	x4, x5, [sp, #16 * 2]
	push	x22, x23
	stp	x6, x7, [sp, #16 * 3]
	push	x20, x21
	stp	x8, x9, [sp, #16 * 4]
	push	x18, x19
	stp	x10, x11, [sp, #16 * 5]
	push	x16, x17
	stp	x12, x13, [sp, #16 * 6]
	push	x14, x15
	stp	x14, x15, [sp, #16 * 7]
	push	x12, x13
	stp	x16, x17, [sp, #16 * 8]
	push	x10, x11
	stp	x18, x19, [sp, #16 * 9]
	push	x8, x9
	stp	x20, x21, [sp, #16 * 10]
	push	x6, x7
	stp	x22, x23, [sp, #16 * 11]
	push	x4, x5
	stp	x24, x25, [sp, #16 * 12]
	push	x2, x3
	stp	x26, x27, [sp, #16 * 13]
	push	x0, x1
	stp	x28, x29, [sp, #16 * 14]

	.if	\el == 0
	.if	\el == 0
	mrs	x21, sp_el0
	mrs	x21, sp_el0
	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
@@ -140,32 +141,29 @@ alternative_else
alternative_endif
alternative_endif
#endif
#endif
	.endif
	.endif
	msr	elr_el1, x21			// set up the return data
	msr	spsr_el1, x22
	.if	\ret
	.if	\ret
	ldr	x1, [sp, #S_X1]			// preserve x0 (syscall return)
	ldr	x1, [sp, #S_X1]			// preserve x0 (syscall return)
	add	sp, sp, S_X2
	.else
	.else
	pop	x0, x1
	ldp	x0, x1, [sp, #16 * 0]
	.endif
	pop	x2, x3				// load the rest of the registers
	pop	x4, x5
	pop	x6, x7
	pop	x8, x9
	msr	elr_el1, x21			// set up the return data
	msr	spsr_el1, x22
	.if	\el == 0
	msr	sp_el0, x23
	.endif
	.endif
	pop	x10, x11
	ldp	x2, x3, [sp, #16 * 1]
	pop	x12, x13
	ldp	x4, x5, [sp, #16 * 2]
	pop	x14, x15
	ldp	x6, x7, [sp, #16 * 3]
	pop	x16, x17
	ldp	x8, x9, [sp, #16 * 4]
	pop	x18, x19
	ldp	x10, x11, [sp, #16 * 5]
	pop	x20, x21
	ldp	x12, x13, [sp, #16 * 6]
	pop	x22, x23
	ldp	x14, x15, [sp, #16 * 7]
	pop	x24, x25
	ldp	x16, x17, [sp, #16 * 8]
	pop	x26, x27
	ldp	x18, x19, [sp, #16 * 9]
	pop	x28, x29
	ldp	x20, x21, [sp, #16 * 10]
	ldr	lr, [sp], #S_FRAME_SIZE - S_LR	// load LR and restore SP
	ldp	x22, x23, [sp, #16 * 11]
	ldp	x24, x25, [sp, #16 * 12]
	ldp	x26, x27, [sp, #16 * 13]
	ldp	x28, x29, [sp, #16 * 14]
	ldr	lr, [sp, #S_LR]
	add	sp, sp, #S_FRAME_SIZE		// restore sp
	eret					// return to kernel
	eret					// return to kernel
	.endm
	.endm