Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e5b954e8 authored by Dave Watson's avatar Dave Watson Committed by Herbert Xu
Browse files

crypto: aesni - Use unaligned loads from gcm_context_data



A regression was reported bisecting to 1476db2d
"Move HashKey computation from stack to gcm_context".  That diff
moved HashKey computation from the stack, which was explicitly aligned
in the asm, to a struct provided from the C code, depending on
AESNI_ALIGN_ATTR for alignment.   It appears some compilers may not
align this struct correctly, resulting in a crash on the movdqa
instruction when attempting to encrypt or decrypt data.

Fix by using unaligned loads for the HashKeys.  On modern
hardware there is no perf difference between the unaligned and
aligned loads.  All other accesses to gcm_context_data already use
unaligned loads.

Reported-by: default avatarMauro Rossi <issor.oruam@gmail.com>
Fixes: 1476db2d ("Move HashKey computation from stack to gcm_context")
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarDave Watson <davejwatson@fb.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 65b2c12d
Loading
Loading
Loading
Loading
+33 −33
Original line number Diff line number Diff line
@@ -223,34 +223,34 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
	pcmpeqd TWOONE(%rip), \TMP2
	pand	POLY(%rip), \TMP2
	pxor	\TMP2, \TMP3
	movdqa	\TMP3, HashKey(%arg2)
	movdqu	\TMP3, HashKey(%arg2)

	movdqa	   \TMP3, \TMP5
	pshufd	   $78, \TMP3, \TMP1
	pxor	   \TMP3, \TMP1
	movdqa	   \TMP1, HashKey_k(%arg2)
	movdqu	   \TMP1, HashKey_k(%arg2)

	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^2<<1 (mod poly)
	movdqa	   \TMP5, HashKey_2(%arg2)
	movdqu	   \TMP5, HashKey_2(%arg2)
# HashKey_2 = HashKey^2<<1 (mod poly)
	pshufd	   $78, \TMP5, \TMP1
	pxor	   \TMP5, \TMP1
	movdqa	   \TMP1, HashKey_2_k(%arg2)
	movdqu	   \TMP1, HashKey_2_k(%arg2)

	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
	movdqa	   \TMP5, HashKey_3(%arg2)
	movdqu	   \TMP5, HashKey_3(%arg2)
	pshufd	   $78, \TMP5, \TMP1
	pxor	   \TMP5, \TMP1
	movdqa	   \TMP1, HashKey_3_k(%arg2)
	movdqu	   \TMP1, HashKey_3_k(%arg2)

	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
	movdqa	   \TMP5, HashKey_4(%arg2)
	movdqu	   \TMP5, HashKey_4(%arg2)
	pshufd	   $78, \TMP5, \TMP1
	pxor	   \TMP5, \TMP1
	movdqa	   \TMP1, HashKey_4_k(%arg2)
	movdqu	   \TMP1, HashKey_4_k(%arg2)
.endm

# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
@@ -271,7 +271,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv

	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
	movdqa HashKey(%arg2), %xmm13
	movdqu HashKey(%arg2), %xmm13

	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
	%xmm4, %xmm5, %xmm6
@@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	pshufd	  $78, \XMM5, \TMP6
	pxor	  \XMM5, \TMP6
	paddd     ONE(%rip), \XMM0		# INCR CNT
	movdqa	  HashKey_4(%arg2), \TMP5
	movdqu	  HashKey_4(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
	movdqa    \XMM0, \XMM1
	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	pxor	  (%arg1), \XMM2
	pxor	  (%arg1), \XMM3
	pxor	  (%arg1), \XMM4
	movdqa	  HashKey_4_k(%arg2), \TMP5
	movdqu	  HashKey_4_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
	movaps 0x10(%arg1), \TMP1
	AESENC	  \TMP1, \XMM1              # Round 1
@@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	movdqa	  \XMM6, \TMP1
	pshufd	  $78, \XMM6, \TMP2
	pxor	  \XMM6, \TMP2
	movdqa	  HashKey_3(%arg2), \TMP5
	movdqu	  HashKey_3(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
	movaps 0x30(%arg1), \TMP3
	AESENC    \TMP3, \XMM1              # Round 3
@@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	AESENC	  \TMP3, \XMM2
	AESENC	  \TMP3, \XMM3
	AESENC	  \TMP3, \XMM4
	movdqa	  HashKey_3_k(%arg2), \TMP5
	movdqu	  HashKey_3_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
	movaps 0x50(%arg1), \TMP3
	AESENC	  \TMP3, \XMM1              # Round 5
@@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	movdqa	  \XMM7, \TMP1
	pshufd	  $78, \XMM7, \TMP2
	pxor	  \XMM7, \TMP2
	movdqa	  HashKey_2(%arg2), \TMP5
	movdqu	  HashKey_2(%arg2), \TMP5

        # Multiply TMP5 * HashKey using karatsuba

@@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	AESENC	  \TMP3, \XMM2
	AESENC	  \TMP3, \XMM3
	AESENC	  \TMP3, \XMM4
	movdqa	  HashKey_2_k(%arg2), \TMP5
	movdqu	  HashKey_2_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
	movaps 0x80(%arg1), \TMP3
	AESENC	  \TMP3, \XMM1             # Round 8
@@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	movdqa	  \XMM8, \TMP1
	pshufd	  $78, \XMM8, \TMP2
	pxor	  \XMM8, \TMP2
	movdqa	  HashKey(%arg2), \TMP5
	movdqu	  HashKey(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
	movaps 0x90(%arg1), \TMP3
	AESENC	  \TMP3, \XMM1            # Round 9
@@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@:
	AESENCLAST \TMP3, \XMM2
	AESENCLAST \TMP3, \XMM3
	AESENCLAST \TMP3, \XMM4
	movdqa    HashKey_k(%arg2), \TMP5
	movdqu    HashKey_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
	movdqu	  (%arg4,%r11,1), \TMP3
	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
@@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	pshufd	  $78, \XMM5, \TMP6
	pxor	  \XMM5, \TMP6
	paddd     ONE(%rip), \XMM0		# INCR CNT
	movdqa	  HashKey_4(%arg2), \TMP5
	movdqu	  HashKey_4(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
	movdqa    \XMM0, \XMM1
	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	pxor	  (%arg1), \XMM2
	pxor	  (%arg1), \XMM3
	pxor	  (%arg1), \XMM4
	movdqa	  HashKey_4_k(%arg2), \TMP5
	movdqu	  HashKey_4_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
	movaps 0x10(%arg1), \TMP1
	AESENC	  \TMP1, \XMM1              # Round 1
@@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	movdqa	  \XMM6, \TMP1
	pshufd	  $78, \XMM6, \TMP2
	pxor	  \XMM6, \TMP2
	movdqa	  HashKey_3(%arg2), \TMP5
	movdqu	  HashKey_3(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
	movaps 0x30(%arg1), \TMP3
	AESENC    \TMP3, \XMM1              # Round 3
@@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	AESENC	  \TMP3, \XMM2
	AESENC	  \TMP3, \XMM3
	AESENC	  \TMP3, \XMM4
	movdqa	  HashKey_3_k(%arg2), \TMP5
	movdqu	  HashKey_3_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
	movaps 0x50(%arg1), \TMP3
	AESENC	  \TMP3, \XMM1              # Round 5
@@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	movdqa	  \XMM7, \TMP1
	pshufd	  $78, \XMM7, \TMP2
	pxor	  \XMM7, \TMP2
	movdqa	  HashKey_2(%arg2), \TMP5
	movdqu	  HashKey_2(%arg2), \TMP5

        # Multiply TMP5 * HashKey using karatsuba

@@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	AESENC	  \TMP3, \XMM2
	AESENC	  \TMP3, \XMM3
	AESENC	  \TMP3, \XMM4
	movdqa	  HashKey_2_k(%arg2), \TMP5
	movdqu	  HashKey_2_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
	movaps 0x80(%arg1), \TMP3
	AESENC	  \TMP3, \XMM1             # Round 8
@@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
	movdqa	  \XMM8, \TMP1
	pshufd	  $78, \XMM8, \TMP2
	pxor	  \XMM8, \TMP2
	movdqa	  HashKey(%arg2), \TMP5
	movdqu	  HashKey(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
	movaps 0x90(%arg1), \TMP3
	AESENC	  \TMP3, \XMM1            # Round 9
@@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@:
	AESENCLAST \TMP3, \XMM2
	AESENCLAST \TMP3, \XMM3
	AESENCLAST \TMP3, \XMM4
	movdqa    HashKey_k(%arg2), \TMP5
	movdqu    HashKey_k(%arg2), \TMP5
	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
	movdqu	  (%arg4,%r11,1), \TMP3
	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
@@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
	movdqa	  \XMM1, \TMP6
	pshufd	  $78, \XMM1, \TMP2
	pxor	  \XMM1, \TMP2
	movdqa	  HashKey_4(%arg2), \TMP5
	movdqu	  HashKey_4(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
	movdqa	  HashKey_4_k(%arg2), \TMP4
	movdqu	  HashKey_4_k(%arg2), \TMP4
	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
	movdqa	  \XMM1, \XMMDst
	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
@@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
	movdqa	  \XMM2, \TMP1
	pshufd	  $78, \XMM2, \TMP2
	pxor	  \XMM2, \TMP2
	movdqa	  HashKey_3(%arg2), \TMP5
	movdqu	  HashKey_3(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
	movdqa	  HashKey_3_k(%arg2), \TMP4
	movdqu	  HashKey_3_k(%arg2), \TMP4
	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
	pxor	  \TMP1, \TMP6
	pxor	  \XMM2, \XMMDst
@@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
	movdqa	  \XMM3, \TMP1
	pshufd	  $78, \XMM3, \TMP2
	pxor	  \XMM3, \TMP2
	movdqa	  HashKey_2(%arg2), \TMP5
	movdqu	  HashKey_2(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
	movdqa	  HashKey_2_k(%arg2), \TMP4
	movdqu	  HashKey_2_k(%arg2), \TMP4
	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
	pxor	  \TMP1, \TMP6
	pxor	  \XMM3, \XMMDst
@@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
	movdqa	  \XMM4, \TMP1
	pshufd	  $78, \XMM4, \TMP2
	pxor	  \XMM4, \TMP2
	movdqa	  HashKey(%arg2), \TMP5
	movdqu	  HashKey(%arg2), \TMP5
	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
	movdqa	  HashKey_k(%arg2), \TMP4
	movdqu	  HashKey_k(%arg2), \TMP4
	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
	pxor	  \TMP1, \TMP6
	pxor	  \XMM4, \XMMDst