Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 80dca473 authored by Mathias Krause's avatar Mathias Krause Committed by Herbert Xu
Browse files

crypto: aesni - fix counter overflow handling in "by8" variant



The "by8" CTR AVX implementation fails to propperly handle counter
overflows. That was the reason it got disabled in commit 7da4b29d
("crypto: aesni - disable "by8" AVX CTR optimization").

Fix the overflow handling by incrementing the counter block as a double
quad word, i.e. a 128 bit, and testing for overflows afterwards. We need
to use VPTEST to do so as VPADD* does not set the flags itself and
silently drops the carry bit.

As this change adds branches to the hot path, minor performance
regressions  might be a side effect. But, OTOH, we now have a conforming
implementation -- the preferable goal.

A tcrypt test on a SandyBridge system (i7-2620M) showed almost identical
numbers for the old and this version with differences within the noise
range. A dm-crypt test with the fixed version gave even slightly better
results for this version. So the performance impact might not be as big
as expected.

Tested-by: default avatarRomain Francoise <romain@orebokech.com>
Signed-off-by: default avatarMathias Krause <minipli@googlemail.com>
Cc: Chandramouli Narayanan <mouli@linux.intel.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 7a1ae9c0
Loading
Loading
Loading
Loading
+15 −2
Original line number Original line Diff line number Diff line
@@ -108,6 +108,10 @@


byteswap_const:
byteswap_const:
	.octa 0x000102030405060708090A0B0C0D0E0F
	.octa 0x000102030405060708090A0B0C0D0E0F
ddq_low_msk:
	.octa 0x0000000000000000FFFFFFFFFFFFFFFF
ddq_high_add_1:
	.octa 0x00000000000000010000000000000000
ddq_add_1:
ddq_add_1:
	.octa 0x00000000000000000000000000000001
	.octa 0x00000000000000000000000000000001
ddq_add_2:
ddq_add_2:
@@ -169,7 +173,12 @@ ddq_add_8:
	.rept (by - 1)
	.rept (by - 1)
		club DDQ_DATA, i
		club DDQ_DATA, i
		club XDATA, i
		club XDATA, i
		vpaddd	var_ddq_add(%rip), xcounter, var_xdata
		vpaddq	var_ddq_add(%rip), xcounter, var_xdata
		vptest	ddq_low_msk(%rip), var_xdata
		jnz 1f
		vpaddq	ddq_high_add_1(%rip), var_xdata, var_xdata
		vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
		1:
		vpshufb	xbyteswap, var_xdata, var_xdata
		vpshufb	xbyteswap, var_xdata, var_xdata
		.set i, (i +1)
		.set i, (i +1)
	.endr
	.endr
@@ -178,7 +187,11 @@ ddq_add_8:


	vpxor	xkey0, xdata0, xdata0
	vpxor	xkey0, xdata0, xdata0
	club DDQ_DATA, by
	club DDQ_DATA, by
	vpaddd	var_ddq_add(%rip), xcounter, xcounter
	vpaddq	var_ddq_add(%rip), xcounter, xcounter
	vptest	ddq_low_msk(%rip), xcounter
	jnz	1f
	vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
	1:


	.set i, 1
	.set i, 1
	.rept (by - 1)
	.rept (by - 1)