Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Verified Commit f1cb0ea7 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Ahmed Harhash
Browse files

crypto: arm64/aes-modes - get rid of literal load of addend vector

commit ed6ed11830a9ded520db31a6e2b69b6b0a1eb0e2 upstream.

Replace the literal load of the addend vector with a sequence that
performs each add individually. This sequence is only 2 instructions
longer than the original, and 2% faster on Cortex-A53.

This is an improvement by itself, but also works around a Clang issue,
whose integrated assembler does not implement the GNU ARM asm syntax
completely, and does not support the =literal notation for FP registers
(more info at https://bugs.llvm.org/show_bug.cgi?id=38642

)

Cc: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: default avatarNick Desaulniers <ndesaulniers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 4b016b39
Loading
Loading
Loading
Loading
+9 −7
Original line number Diff line number Diff line
@@ -232,17 +232,19 @@ AES_ENTRY(aes_ctr_encrypt)
	bmi		.Lctr1x
	cmn		w6, #4			/* 32 bit overflow? */
	bcs		.Lctr1x
	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
	dup		v7.4s, w6
	add		w7, w6, #1
	mov		v0.16b, v4.16b
	add		v7.4s, v7.4s, v8.4s
	add		w8, w6, #2
	mov		v1.16b, v4.16b
	rev32		v8.16b, v7.16b
	add		w9, w6, #3
	mov		v2.16b, v4.16b
	rev		w7, w7
	mov		v3.16b, v4.16b
	mov		v1.s[3], v8.s[0]
	mov		v2.s[3], v8.s[1]
	mov		v3.s[3], v8.s[2]
	rev		w8, w8
	mov		v1.s[3], w7
	rev		w9, w9
	mov		v2.s[3], w8
	mov		v3.s[3], w9
	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
	bl		aes_encrypt_block4x
	eor		v0.16b, v5.16b, v0.16b