crypto: aesni - Add support for 192 & 256 bit keys to AESNI RFC4106 (e31ac32d) · Commits · e / devices / android_kernel_sony_msm8998

arch/x86/crypto/aesni-intel_asm.S

+177 −166

Original line number	Diff line number	Diff line
		@@ -32,12 +32,23 @@
		#include <linux/linkage.h>
		#include <asm/inst.h>

		/*
		* The following macros are used to move an (un)aligned 16 byte value to/from
		* an XMM register. This can done for either FP or integer values, for FP use
		* movaps (move aligned packed single) or integer use movdqa (move double quad
		* aligned). It doesn't make a performance difference which instruction is used
		* since Nehalem (original Core i7) was released. However, the movaps is a byte
		* shorter, so that is the one we'll use for now. (same for unaligned).
		*/
		#define MOVADQ movaps
		#define MOVUDQ movups

		#ifdef __x86_64__

		.data
		.align 16
		.Lgf128mul_x_ble_mask:
		.octa 0x00000000000000010000000000000087

		POLY: .octa 0xC2000000000000000000000000000001
		TWOONE: .octa 0x00000001000000000000000000000001

		@@ -89,6 +100,7 @@ enc: .octa 0x2
		#define arg8 STACK_OFFSET+16(%r14)
		#define arg9 STACK_OFFSET+24(%r14)
		#define arg10 STACK_OFFSET+32(%r14)
		#define keysize 21516(%arg1)
		#endif


		@@ -213,10 +225,12 @@ enc: .octa 0x2

		.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
		XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
		MOVADQ SHUF_MASK(%rip), %xmm14
		mov arg7, %r10 # %r10 = AAD
		mov arg8, %r12 # %r12 = aadLen
		mov %r12, %r11
		pxor %xmm\i, %xmm\i

		_get_AAD_loop\num_initial_blocks\operation:
		movd (%r10), \TMP1
		pslldq $12, \TMP1
		@@ -225,16 +239,18 @@ _get_AAD_loop\num_initial_blocks\operation:
		add $4, %r10
		sub $4, %r12
		jne _get_AAD_loop\num_initial_blocks\operation

		cmp $16, %r11
		je _get_AAD_loop2_done\num_initial_blocks\operation

		mov $16, %r12
		_get_AAD_loop2\num_initial_blocks\operation:
		psrldq $4, %xmm\i
		sub $4, %r12
		cmp %r11, %r12
		jne _get_AAD_loop2\num_initial_blocks\operation

		_get_AAD_loop2_done\num_initial_blocks\operation:
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data

		xor %r11, %r11 # initialise the data pointer offset as zero
		@@ -243,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation:

		mov %arg5, %rax # %rax = *Y0
		movdqu (%rax), \XMM0 # XMM0 = Y0
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM0

		.if (\i == 5) \|\| (\i == 6) \|\| (\i == 7)
		MOVADQ ONE(%RIP),\TMP1
		MOVADQ (%arg1),\TMP2
		.irpc index, \i_seq
		paddd ONE(%rip), \XMM0 # INCR Y0
		paddd \TMP1, \XMM0 # INCR Y0
		movdqa \XMM0, %xmm\index
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap

		.endr
		.irpc index, \i_seq
		pxor 16*0(%arg1), %xmm\index
		.endr
		.irpc index, \i_seq
		movaps 0x10(%rdi), \TMP1
		AESENC \TMP1, %xmm\index # Round 1
		.endr
		.irpc index, \i_seq
		movaps 0x20(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x30(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x40(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x50(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x60(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x70(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x80(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		pxor \TMP2, %xmm\index
		.endr
		lea 0x10(%arg1),%r10
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		add $5,%eax # 128->9, 192->11, 256->13

		aes_loop_initial_dec\num_initial_blocks:
		MOVADQ (%r10),\TMP1
		.irpc index, \i_seq
		movaps 0x90(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		AESENC \TMP1, %xmm\index
		.endr
		add $16,%r10
		sub $1,%eax
		jnz aes_loop_initial_dec\num_initial_blocks

		MOVADQ (%r10), \TMP1
		.irpc index, \i_seq
		movaps 0xa0(%arg1), \TMP1
		AESENCLAST \TMP1, %xmm\index # Round 10
		AESENCLAST \TMP1, %xmm\index # Last Round
		.endr
		.irpc index, \i_seq
		movdqu (%arg3 , %r11, 1), \TMP1
		@@ -305,9 +296,7 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		add $16, %r11

		movdqa \TMP1, %xmm\index
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, %xmm\index

		# prepare plaintext/ciphertext for GHASH computation
		.endr
		.endif
		@@ -338,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		* Precomputations for HashKey parallel with encryption of first 4 blocks.
		* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
		*/
		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM1
		movdqa SHUF_MASK(%rip), %xmm14
		MOVADQ ONE(%rip), \TMP1
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM1
		PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap

		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM2
		movdqa SHUF_MASK(%rip), %xmm14
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM2
		PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap

		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM3
		movdqa SHUF_MASK(%rip), %xmm14
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM3
		PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap

		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM4
		movdqa SHUF_MASK(%rip), %xmm14
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM4
		PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap

		pxor 16*0(%arg1), \XMM1
		pxor 16*0(%arg1), \XMM2
		pxor 16*0(%arg1), \XMM3
		pxor 16*0(%arg1), \XMM4
		MOVADQ 0(%arg1),\TMP1
		pxor \TMP1, \XMM1
		pxor \TMP1, \XMM2
		pxor \TMP1, \XMM3
		pxor \TMP1, \XMM4
		movdqa \TMP3, \TMP5
		pshufd $78, \TMP3, \TMP1
		pxor \TMP3, \TMP1
		@@ -399,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		pshufd $78, \TMP5, \TMP1
		pxor \TMP5, \TMP1
		movdqa \TMP1, HashKey_4_k(%rsp)
		movaps 0xa0(%arg1), \TMP2
		lea 0xa0(%arg1),%r10
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		sub $4,%eax # 128->0, 192->2, 256->4
		jz aes_loop_pre_dec_done\num_initial_blocks

		aes_loop_pre_dec\num_initial_blocks:
		MOVADQ (%r10),\TMP2
		.irpc index, 1234
		AESENC \TMP2, %xmm\index
		.endr
		add $16,%r10
		sub $1,%eax
		jnz aes_loop_pre_dec\num_initial_blocks

		aes_loop_pre_dec_done\num_initial_blocks:
		MOVADQ (%r10), \TMP2
		AESENCLAST \TMP2, \XMM1
		AESENCLAST \TMP2, \XMM2
		AESENCLAST \TMP2, \XMM3
		@@ -421,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
		movdqa \TMP1, \XMM4
		add $64, %r11
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
		pxor \XMMDst, \XMM1
		# combine GHASHed value with the corresponding ciphertext
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap

		_initial_blocks_done\num_initial_blocks\operation:
		@@ -451,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation:

		.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
		XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
		MOVADQ SHUF_MASK(%rip), %xmm14
		mov arg7, %r10 # %r10 = AAD
		mov arg8, %r12 # %r12 = aadLen
		mov %r12, %r11
		@@ -472,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation:
		cmp %r11, %r12
		jne _get_AAD_loop2\num_initial_blocks\operation
		_get_AAD_loop2_done\num_initial_blocks\operation:
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data

		xor %r11, %r11 # initialise the data pointer offset as zero
		@@ -481,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation:

		mov %arg5, %rax # %rax = *Y0
		movdqu (%rax), \XMM0 # XMM0 = Y0
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM0

		.if (\i == 5) \|\| (\i == 6) \|\| (\i == 7)
		.irpc index, \i_seq
		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, %xmm\index
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap

		.endr
		MOVADQ ONE(%RIP),\TMP1
		MOVADQ 0(%arg1),\TMP2
		.irpc index, \i_seq
		pxor 16*0(%arg1), %xmm\index
		.endr
		.irpc index, \i_seq
		movaps 0x10(%rdi), \TMP1
		AESENC \TMP1, %xmm\index # Round 1
		.endr
		.irpc index, \i_seq
		movaps 0x20(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x30(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x40(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x50(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x60(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x70(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		.endr
		.irpc index, \i_seq
		movaps 0x80(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, %xmm\index
		PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
		pxor \TMP2, %xmm\index
		.endr
		lea 0x10(%arg1),%r10
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		add $5,%eax # 128->9, 192->11, 256->13

		aes_loop_initial_enc\num_initial_blocks:
		MOVADQ (%r10),\TMP1
		.irpc index, \i_seq
		movaps 0x90(%arg1), \TMP1
		AESENC \TMP1, %xmm\index # Round 2
		AESENC \TMP1, %xmm\index
		.endr
		add $16,%r10
		sub $1,%eax
		jnz aes_loop_initial_enc\num_initial_blocks

		MOVADQ (%r10), \TMP1
		.irpc index, \i_seq
		movaps 0xa0(%arg1), \TMP1
		AESENCLAST \TMP1, %xmm\index # Round 10
		AESENCLAST \TMP1, %xmm\index # Last Round
		.endr
		.irpc index, \i_seq
		movdqu (%arg3 , %r11, 1), \TMP1
		@@ -541,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		movdqu %xmm\index, (%arg2 , %r11, 1)
		# write back plaintext/ciphertext for num_initial_blocks
		add $16, %r11

		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, %xmm\index

		# prepare plaintext/ciphertext for GHASH computation
		@@ -575,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		* Precomputations for HashKey parallel with encryption of first 4 blocks.
		* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
		*/
		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM1
		movdqa SHUF_MASK(%rip), %xmm14
		MOVADQ ONE(%RIP),\TMP1
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM1
		PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap

		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM2
		movdqa SHUF_MASK(%rip), %xmm14
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM2
		PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap

		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM3
		movdqa SHUF_MASK(%rip), %xmm14
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM3
		PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap

		paddd ONE(%rip), \XMM0 # INCR Y0
		movdqa \XMM0, \XMM4
		movdqa SHUF_MASK(%rip), %xmm14
		paddd \TMP1, \XMM0 # INCR Y0
		MOVADQ \XMM0, \XMM4
		PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap

		pxor 16*0(%arg1), \XMM1
		pxor 16*0(%arg1), \XMM2
		pxor 16*0(%arg1), \XMM3
		pxor 16*0(%arg1), \XMM4
		MOVADQ 0(%arg1),\TMP1
		pxor \TMP1, \XMM1
		pxor \TMP1, \XMM2
		pxor \TMP1, \XMM3
		pxor \TMP1, \XMM4
		movdqa \TMP3, \TMP5
		pshufd $78, \TMP3, \TMP1
		pxor \TMP3, \TMP1
		@@ -636,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		pshufd $78, \TMP5, \TMP1
		pxor \TMP5, \TMP1
		movdqa \TMP1, HashKey_4_k(%rsp)
		movaps 0xa0(%arg1), \TMP2
		lea 0xa0(%arg1),%r10
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		sub $4,%eax # 128->0, 192->2, 256->4
		jz aes_loop_pre_enc_done\num_initial_blocks

		aes_loop_pre_enc\num_initial_blocks:
		MOVADQ (%r10),\TMP2
		.irpc index, 1234
		AESENC \TMP2, %xmm\index
		.endr
		add $16,%r10
		sub $1,%eax
		jnz aes_loop_pre_enc\num_initial_blocks

		aes_loop_pre_enc_done\num_initial_blocks:
		MOVADQ (%r10), \TMP2
		AESENCLAST \TMP2, \XMM1
		AESENCLAST \TMP2, \XMM2
		AESENCLAST \TMP2, \XMM3
		@@ -655,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
		movdqu \XMM4, 16*3(%arg2 , %r11 , 1)

		add $64, %r11
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
		pxor \XMMDst, \XMM1
		# combine GHASHed value with the corresponding ciphertext
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
		movdqa SHUF_MASK(%rip), %xmm14
		PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap

		_initial_blocks_done\num_initial_blocks\operation:
		@@ -794,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
		AESENC \TMP3, \XMM3
		AESENC \TMP3, \XMM4
		PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
		movaps 0xa0(%arg1), \TMP3
		lea 0xa0(%arg1),%r10
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		sub $4,%eax # 128->0, 192->2, 256->4
		jz aes_loop_par_enc_done

		aes_loop_par_enc:
		MOVADQ (%r10),\TMP3
		.irpc index, 1234
		AESENC \TMP3, %xmm\index
		.endr
		add $16,%r10
		sub $1,%eax
		jnz aes_loop_par_enc

		aes_loop_par_enc_done:
		MOVADQ (%r10), \TMP3
		AESENCLAST \TMP3, \XMM1 # Round 10
		AESENCLAST \TMP3, \XMM2
		AESENCLAST \TMP3, \XMM3
		@@ -986,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
		AESENC \TMP3, \XMM3
		AESENC \TMP3, \XMM4
		PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
		movaps 0xa0(%arg1), \TMP3
		AESENCLAST \TMP3, \XMM1 # Round 10
		lea 0xa0(%arg1),%r10
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		sub $4,%eax # 128->0, 192->2, 256->4
		jz aes_loop_par_dec_done

		aes_loop_par_dec:
		MOVADQ (%r10),\TMP3
		.irpc index, 1234
		AESENC \TMP3, %xmm\index
		.endr
		add $16,%r10
		sub $1,%eax
		jnz aes_loop_par_dec

		aes_loop_par_dec_done:
		MOVADQ (%r10), \TMP3
		AESENCLAST \TMP3, \XMM1 # last round
		AESENCLAST \TMP3, \XMM2
		AESENCLAST \TMP3, \XMM3
		AESENCLAST \TMP3, \XMM4
		@@ -1155,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
		pxor \TMP6, \XMMDst # reduced result is in XMMDst
		.endm

		/* Encryption of a single block done*/

		/* Encryption of a single block
		* uses eax & r10
		*/

		.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1

		pxor (%arg1), \XMM0
		movaps 16(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 32(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 48(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 64(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 80(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 96(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 112(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 128(%arg1), \TMP1
		AESENC \TMP1, \XMM0
		movaps 144(%arg1), \TMP1
		mov keysize,%eax
		shr $2,%eax # 128->4, 192->6, 256->8
		add $5,%eax # 128->9, 192->11, 256->13
		lea 16(%arg1), %r10 # get first expanded key address

		_esb_loop_\@:
		MOVADQ (%r10),\TMP1
		AESENC \TMP1,\XMM0
		movaps 160(%arg1), \TMP1
		add $16,%r10
		sub $1,%eax
		jnz _esb_loop_\@

		MOVADQ (%r10),\TMP1
		AESENCLAST \TMP1,\XMM0
		.endm


		/*****************************************************************************
		* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
		* u8 *out, // Plaintext output. Encrypt in-place is allowed.

arch/x86/crypto/aesni-intel_glue.c

+28 −6

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@
		#include <asm/crypto/glue_helper.h>
		#endif


		/* This data is stored at the end of the crypto_tfm struct.
		* It's a type of per "session" data storage location.
		* This needs to be 16 byte aligned.
		@@ -182,7 +183,8 @@ static void aesni_gcm_enc_avx(void ctx, u8 out,
		u8 hash_subkey, const u8 aad, unsigned long aad_len,
		u8 *auth_tag, unsigned long auth_tag_len)
		{
		if (plaintext_len < AVX_GEN2_OPTSIZE) {
		struct crypto_aes_ctx aes_ctx = (struct crypto_aes_ctx)ctx;
		if ((plaintext_len < AVX_GEN2_OPTSIZE) \|\| (aes_ctx-> key_length != AES_KEYSIZE_128)){
		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
		aad_len, auth_tag, auth_tag_len);
		} else {
		@@ -197,7 +199,8 @@ static void aesni_gcm_dec_avx(void ctx, u8 out,
		u8 hash_subkey, const u8 aad, unsigned long aad_len,
		u8 *auth_tag, unsigned long auth_tag_len)
		{
		if (ciphertext_len < AVX_GEN2_OPTSIZE) {
		struct crypto_aes_ctx aes_ctx = (struct crypto_aes_ctx)ctx;
		if ((ciphertext_len < AVX_GEN2_OPTSIZE) \|\| (aes_ctx-> key_length != AES_KEYSIZE_128)) {
		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
		aad_len, auth_tag, auth_tag_len);
		} else {
		@@ -231,7 +234,8 @@ static void aesni_gcm_enc_avx2(void ctx, u8 out,
		u8 hash_subkey, const u8 aad, unsigned long aad_len,
		u8 *auth_tag, unsigned long auth_tag_len)
		{
		if (plaintext_len < AVX_GEN2_OPTSIZE) {
		struct crypto_aes_ctx aes_ctx = (struct crypto_aes_ctx)ctx;
		if ((plaintext_len < AVX_GEN2_OPTSIZE) \|\| (aes_ctx-> key_length != AES_KEYSIZE_128)) {
		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
		aad_len, auth_tag, auth_tag_len);
		} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
		@@ -250,7 +254,8 @@ static void aesni_gcm_dec_avx2(void ctx, u8 out,
		u8 hash_subkey, const u8 aad, unsigned long aad_len,
		u8 *auth_tag, unsigned long auth_tag_len)
		{
		if (ciphertext_len < AVX_GEN2_OPTSIZE) {
		struct crypto_aes_ctx aes_ctx = (struct crypto_aes_ctx)ctx;
		if ((ciphertext_len < AVX_GEN2_OPTSIZE) \|\| (aes_ctx-> key_length != AES_KEYSIZE_128)) {
		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
		aad, aad_len, auth_tag, auth_tag_len);
		} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
		@@ -902,7 +907,8 @@ static int rfc4106_set_key(struct crypto_aead parent, const u8 key,
		}
		/Account for 4 byte nonce at the end./
		key_len -= 4;
		if (key_len != AES_KEYSIZE_128) {
		if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
		key_len != AES_KEYSIZE_256) {
		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
		return -EINVAL;
		}
		@@ -1013,6 +1019,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
		__be32 counter = cpu_to_be32(1);
		struct crypto_aead *tfm = crypto_aead_reqtfm(req);
		struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
		u32 key_len = ctx->aes_key_expanded.key_length;
		void *aes_ctx = &(ctx->aes_key_expanded);
		unsigned long auth_tag_len = crypto_aead_authsize(tfm);
		u8 iv_tab[16+AESNI_ALIGN];
		@@ -1027,6 +1034,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
		/* to 8 or 12 bytes */
		if (unlikely(req->assoclen != 8 && req->assoclen != 12))
		return -EINVAL;
		if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
		return -EINVAL;
		if (unlikely(key_len != AES_KEYSIZE_128 &&
		key_len != AES_KEYSIZE_192 &&
		key_len != AES_KEYSIZE_256))
		return -EINVAL;

		/* IV below built */
		for (i = 0; i < 4; i++)
		*(iv+i) = ctx->nonce[i];
		@@ -1091,6 +1105,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
		int retval = 0;
		struct crypto_aead *tfm = crypto_aead_reqtfm(req);
		struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
		u32 key_len = ctx->aes_key_expanded.key_length;
		void *aes_ctx = &(ctx->aes_key_expanded);
		unsigned long auth_tag_len = crypto_aead_authsize(tfm);
		u8 iv_and_authTag[32+AESNI_ALIGN];
		@@ -1104,6 +1119,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
		if (unlikely((req->cryptlen < auth_tag_len) \|\|
		(req->assoclen != 8 && req->assoclen != 12)))
		return -EINVAL;
		if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
		return -EINVAL;
		if (unlikely(key_len != AES_KEYSIZE_128 &&
		key_len != AES_KEYSIZE_192 &&
		key_len != AES_KEYSIZE_256))
		return -EINVAL;

		/* Assuming we are supporting rfc4106 64-bit extended */
		/* sequence numbers We need to have the AAD length */
		/* equal to 8 or 12 bytes */