Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4a97abd4 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Will Deacon
Browse files

arm64/crypto: issue aese/aesmc instructions in pairs



This changes the AES core transform implementations to issue aese/aesmc
(and aesd/aesimc) in pairs. This enables a micro-architectural optimization
in recent Cortex-A5x cores that improves performance by 50-90%.

Measured performance in cycles per byte (Cortex-A57):

                CBC enc         CBC dec         CTR
  before        3.64            1.34            1.32
  after         1.95            0.85            0.93

Note that this results in a ~5% performance decrease for older cores.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent b63dbef9
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
0:	mov	v4.16b, v3.16b
1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v4.16b
	aesmc	v1.16b, v1.16b
2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aese	v1.16b, v5.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v5.16b
	aesmc	v1.16b, v1.16b
3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
	subs	w3, w3, #3
	aese	v0.16b, v3.16b
	aese	v1.16b, v3.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v3.16b
	aesmc	v1.16b, v1.16b
	bpl	1b
	aese	v0.16b, v4.16b
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
2:	/* inner loop: 3 rounds, 2x interleaved */
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v4.16b
	aesmc	v1.16b, v1.16b
3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aese	v1.16b, v5.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v5.16b
	aesmc	v1.16b, v1.16b
4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
	subs	w7, w7, #3
	aese	v0.16b, v3.16b
	aese	v1.16b, v3.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v3.16b
	aesmc	v1.16b, v1.16b
	ld1	{v5.2d}, [x10], #16		/* load next round key */
	bpl	2b
+3 −7
Original line number Diff line number Diff line
@@ -45,18 +45,14 @@

	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
	aes\de		\i0\().16b, \k\().16b
	.ifnb		\i1
	aes\de		\i1\().16b, \k\().16b
	.ifnb		\i3
	aes\de		\i2\().16b, \k\().16b
	aes\de		\i3\().16b, \k\().16b
	.endif
	.endif
	aes\mc		\i0\().16b, \i0\().16b
	.ifnb		\i1
	aes\de		\i1\().16b, \k\().16b
	aes\mc		\i1\().16b, \i1\().16b
	.ifnb		\i3
	aes\de		\i2\().16b, \k\().16b
	aes\mc		\i2\().16b, \i2\().16b
	aes\de		\i3\().16b, \k\().16b
	aes\mc		\i3\().16b, \i3\().16b
	.endif
	.endif