Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1ecdd37e authored by Junaid Shahid's avatar Junaid Shahid Committed by Herbert Xu
Browse files

crypto: aesni - Fix out-of-bounds access of the AAD buffer in generic-gcm-aesni



The aesni_gcm_enc/dec functions can access memory after the end of
the AAD buffer if the AAD length is not a multiple of 4 bytes.
It didn't matter with rfc4106-gcm-aesni as in that case the AAD was
always followed by the 8 byte IV, but that is no longer the case with
generic-gcm-aesni. This can potentially result in accessing a page that
is not mapped and thus causing the machine to crash. This patch fixes
that by reading the last <16 byte block of the AAD byte-by-byte and
optionally via an 8-byte load if the block was at least 8 bytes.

Fixes: 0487ccac ("crypto: aesni - make non-AVX AES-GCM work with any aadlen")
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarJunaid Shahid <junaids@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent b20209c9
Loading
Loading
Loading
Loading
+12 −100
Original line number Diff line number Diff line
@@ -89,30 +89,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
            .octa 0x00000000000000000000000000000000

.section .rodata
.align 16
.type aad_shift_arr, @object
.size aad_shift_arr, 272
aad_shift_arr:
        .octa     0xffffffffffffffffffffffffffffffff
        .octa     0xffffffffffffffffffffffffffffff0C
        .octa     0xffffffffffffffffffffffffffff0D0C
        .octa     0xffffffffffffffffffffffffff0E0D0C
        .octa     0xffffffffffffffffffffffff0F0E0D0C
        .octa     0xffffffffffffffffffffff0C0B0A0908
        .octa     0xffffffffffffffffffff0D0C0B0A0908
        .octa     0xffffffffffffffffff0E0D0C0B0A0908
        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
        .octa     0xffffffffffffff0C0B0A090807060504
        .octa     0xffffffffffff0D0C0B0A090807060504
        .octa     0xffffffffff0E0D0C0B0A090807060504
        .octa     0xffffffff0F0E0D0C0B0A090807060504
        .octa     0xffffff0C0B0A09080706050403020100
        .octa     0xffff0D0C0B0A09080706050403020100
        .octa     0xff0E0D0C0B0A09080706050403020100
        .octa     0x0F0E0D0C0B0A09080706050403020100


.text


@@ -303,62 +279,30 @@ _done_read_partial_block_\@:
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
        MOVADQ     SHUF_MASK(%rip), %xmm14
	mov	   arg7, %r10           # %r10 = AAD
	mov	   arg8, %r12           # %r12 = aadLen
	mov	   %r12, %r11
	mov	   arg8, %r11           # %r11 = aadLen
	pxor	   %xmm\i, %xmm\i
	pxor       \XMM2, \XMM2

	cmp	   $16, %r11
	jl	   _get_AAD_rest8\num_initial_blocks\operation
	jl	   _get_AAD_rest\num_initial_blocks\operation
_get_AAD_blocks\num_initial_blocks\operation:
	movdqu	   (%r10), %xmm\i
	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
	pxor	   %xmm\i, \XMM2
	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
	add	   $16, %r10
	sub	   $16, %r12
	sub	   $16, %r11
	cmp	   $16, %r11
	jge	   _get_AAD_blocks\num_initial_blocks\operation

	movdqu	   \XMM2, %xmm\i

	/* read the last <16B of AAD */
_get_AAD_rest\num_initial_blocks\operation:
	cmp	   $0, %r11
	je	   _get_AAD_done\num_initial_blocks\operation

	pxor	   %xmm\i,%xmm\i

	/* read the last <16B of AAD. since we have at least 4B of
	data right after the AAD (the ICV, and maybe some CT), we can
	read 4B/8B blocks safely, and then get rid of the extra stuff */
_get_AAD_rest8\num_initial_blocks\operation:
	cmp	   $4, %r11
	jle	   _get_AAD_rest4\num_initial_blocks\operation
	movq	   (%r10), \TMP1
	add	   $8, %r10
	sub	   $8, %r11
	pslldq	   $8, \TMP1
	psrldq	   $8, %xmm\i
	pxor	   \TMP1, %xmm\i
	jmp	   _get_AAD_rest8\num_initial_blocks\operation
_get_AAD_rest4\num_initial_blocks\operation:
	cmp	   $0, %r11
	jle	   _get_AAD_rest0\num_initial_blocks\operation
	mov	   (%r10), %eax
	movq	   %rax, \TMP1
	add	   $4, %r10
	sub	   $4, %r10
	pslldq	   $12, \TMP1
	psrldq	   $4, %xmm\i
	pxor	   \TMP1, %xmm\i
_get_AAD_rest0\num_initial_blocks\operation:
	/* finalize: shift out the extra bytes we read, and align
	left. since pslldq can only shift by an immediate, we use
	vpshufb and an array of shuffle masks */
	movq	   %r12, %r11
	salq	   $4, %r11
	movdqu	   aad_shift_arr(%r11), \TMP1
	PSHUFB_XMM \TMP1, %xmm\i
_get_AAD_rest_final\num_initial_blocks\operation:
	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
	pxor	   \XMM2, %xmm\i
	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -562,62 +506,30 @@ _initial_blocks_done\num_initial_blocks\operation:
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
        MOVADQ     SHUF_MASK(%rip), %xmm14
	mov	   arg7, %r10           # %r10 = AAD
	mov	   arg8, %r12           # %r12 = aadLen
	mov	   %r12, %r11
	mov	   arg8, %r11           # %r11 = aadLen
	pxor	   %xmm\i, %xmm\i
	pxor	   \XMM2, \XMM2

	cmp	   $16, %r11
	jl	   _get_AAD_rest8\num_initial_blocks\operation
	jl	   _get_AAD_rest\num_initial_blocks\operation
_get_AAD_blocks\num_initial_blocks\operation:
	movdqu	   (%r10), %xmm\i
	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
	pxor	   %xmm\i, \XMM2
	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
	add	   $16, %r10
	sub	   $16, %r12
	sub	   $16, %r11
	cmp	   $16, %r11
	jge	   _get_AAD_blocks\num_initial_blocks\operation

	movdqu	   \XMM2, %xmm\i

	/* read the last <16B of AAD */
_get_AAD_rest\num_initial_blocks\operation:
	cmp	   $0, %r11
	je	   _get_AAD_done\num_initial_blocks\operation

	pxor	   %xmm\i,%xmm\i

	/* read the last <16B of AAD. since we have at least 4B of
	data right after the AAD (the ICV, and maybe some PT), we can
	read 4B/8B blocks safely, and then get rid of the extra stuff */
_get_AAD_rest8\num_initial_blocks\operation:
	cmp	   $4, %r11
	jle	   _get_AAD_rest4\num_initial_blocks\operation
	movq	   (%r10), \TMP1
	add	   $8, %r10
	sub	   $8, %r11
	pslldq	   $8, \TMP1
	psrldq	   $8, %xmm\i
	pxor	   \TMP1, %xmm\i
	jmp	   _get_AAD_rest8\num_initial_blocks\operation
_get_AAD_rest4\num_initial_blocks\operation:
	cmp	   $0, %r11
	jle	   _get_AAD_rest0\num_initial_blocks\operation
	mov	   (%r10), %eax
	movq	   %rax, \TMP1
	add	   $4, %r10
	sub	   $4, %r10
	pslldq	   $12, \TMP1
	psrldq	   $4, %xmm\i
	pxor	   \TMP1, %xmm\i
_get_AAD_rest0\num_initial_blocks\operation:
	/* finalize: shift out the extra bytes we read, and align
	left. since pslldq can only shift by an immediate, we use
	vpshufb and an array of shuffle masks */
	movq	   %r12, %r11
	salq	   $4, %r11
	movdqu	   aad_shift_arr(%r11), \TMP1
	PSHUFB_XMM \TMP1, %xmm\i
_get_AAD_rest_final\num_initial_blocks\operation:
	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
	pxor	   \XMM2, %xmm\i
	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1