Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 27352c45 authored by Sabrina Dubroca's avatar Sabrina Dubroca Committed by Herbert Xu
Browse files

crypto: aesni - make AVX2 AES-GCM work with any aadlen



This is the first step to make the aesni AES-GCM implementation
generic. The current code was written for rfc4106, so it handles only
some specific sizes of associated data.

Signed-off-by: default avatarSabrina Dubroca <sd@queasysnail.net>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 0120af77
Loading
Loading
Loading
Loading
+58 −27
Original line number Diff line number Diff line
@@ -1702,6 +1702,7 @@ ENDPROC(aesni_gcm_dec_avx_gen2)

.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
	i = (8-\num_initial_blocks)
	j = 0
	setreg

	mov     arg6, %r10                       # r10 = AAD
@@ -1710,33 +1711,64 @@ ENDPROC(aesni_gcm_dec_avx_gen2)

	mov     %r12, %r11

	vpxor   reg_j, reg_j, reg_j
	vpxor   reg_i, reg_i, reg_i
_get_AAD_loop\@:
        vmovd   (%r10), \T1
        vpslldq $12, \T1, \T1
        vpsrldq $4, reg_i, reg_i
        vpxor   \T1, reg_i, reg_i

        add     $4, %r10
        sub     $4, %r12
        jg      _get_AAD_loop\@


	cmp     $16, %r11
        je      _get_AAD_loop2_done\@
        mov     $16, %r12

_get_AAD_loop2\@:
        vpsrldq $4, reg_i, reg_i
        sub     $4, %r12
        cmp     %r11, %r12
        jg      _get_AAD_loop2\@
	jl      _get_AAD_rest8\@
_get_AAD_blocks\@:
	vmovdqu (%r10), reg_i
	vpshufb SHUF_MASK(%rip), reg_i, reg_i
	vpxor   reg_i, reg_j, reg_j
	GHASH_MUL_AVX2      reg_j, \T2, \T1, \T3, \T4, \T5, \T6
	add     $16, %r10
	sub     $16, %r12
	sub     $16, %r11
	cmp     $16, %r11
	jge     _get_AAD_blocks\@
	vmovdqu reg_j, reg_i
	cmp     $0, %r11
	je      _get_AAD_done\@

_get_AAD_loop2_done\@:
	vpxor   reg_i, reg_i, reg_i

        #byte-reflect the AAD data
	/* read the last <16B of AAD. since we have at least 4B of
	data right after the AAD (the ICV, and maybe some CT), we can
	read 4B/8B blocks safely, and then get rid of the extra stuff */
_get_AAD_rest8\@:
	cmp     $4, %r11
	jle     _get_AAD_rest4\@
	movq    (%r10), \T1
	add     $8, %r10
	sub     $8, %r11
	vpslldq $8, \T1, \T1
	vpsrldq $8, reg_i, reg_i
	vpxor   \T1, reg_i, reg_i
	jmp     _get_AAD_rest8\@
_get_AAD_rest4\@:
	cmp     $0, %r11
	jle     _get_AAD_rest0\@
	mov     (%r10), %eax
	movq    %rax, \T1
	add     $4, %r10
	sub     $4, %r11
	vpslldq $12, \T1, \T1
	vpsrldq $4, reg_i, reg_i
	vpxor   \T1, reg_i, reg_i
_get_AAD_rest0\@:
	/* finalize: shift out the extra bytes we read, and align
	left. since pslldq can only shift by an immediate, we use
	vpshufb and an array of shuffle masks */
	movq    %r12, %r11
	salq    $4, %r11
	movdqu  aad_shift_arr(%r11), \T1
	vpshufb \T1, reg_i, reg_i
_get_AAD_rest_final\@:
	vpshufb SHUF_MASK(%rip), reg_i, reg_i
	vpxor   reg_j, reg_i, reg_i
	GHASH_MUL_AVX2      reg_i, \T2, \T1, \T3, \T4, \T5, \T6

_get_AAD_done\@:
	# initialize the data pointer offset as zero
	xor     %r11, %r11

@@ -1811,7 +1843,6 @@ _get_AAD_loop2_done\@:
	i = (8-\num_initial_blocks)
	j = (9-\num_initial_blocks)
	setreg
        GHASH_MUL_AVX2       reg_i, \T2, \T1, \T3, \T4, \T5, \T6

.rep \num_initial_blocks
        vpxor    reg_i, reg_j, reg_j