Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8861249c authored by megha.dey@linux.intel.com's avatar megha.dey@linux.intel.com Committed by Herbert Xu
Browse files

crypto: x86/sha1 - Fix reads beyond the number of blocks passed

It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
reading ahead beyond its intended data, and causing a crash if the next
block is beyond page boundary:
http://marc.info/?l=linux-crypto-vger&m=149373371023377

This patch makes sure that there is no overflow for any buffer length.

It passes the tests written by Jan Stancek that revealed this problem:
https://github.com/jstancek/sha1-avx2-crash



I have re-enabled sha1-avx2 by reverting commit
b82ce244

Cc: <stable@vger.kernel.org>
Fixes: b82ce244 ("crypto: sha1-ssse3 - Disable avx2")
Originally-by: default avatarIlya Albrekht <ilya.albrekht@intel.com>
Tested-by: default avatarJan Stancek <jstancek@redhat.com>
Signed-off-by: default avatarMegha Dey <megha.dey@linux.intel.com>
Reported-by: default avatarJan Stancek <jstancek@redhat.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 28389575
Loading
Loading
Loading
Loading
+36 −31
Original line number Diff line number Diff line
@@ -117,11 +117,10 @@
	.set T1, REG_T1
.endm

#define K_BASE		%r8
#define HASH_PTR	%r9
#define BLOCKS_CTR	%r8
#define BUFFER_PTR	%r10
#define BUFFER_PTR2	%r13
#define BUFFER_END	%r11

#define PRECALC_BUF	%r14
#define WK_BUF		%r15
@@ -205,14 +204,14 @@
		 * blended AVX2 and ALU instruction scheduling
		 * 1 vector iteration per 8 rounds
		 */
		vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
		vmovdqu (i * 2)(BUFFER_PTR), W_TMP
	.elseif ((i & 7) == 1)
		vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
		vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
			 WY_TMP, WY_TMP
	.elseif ((i & 7) == 2)
		vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
	.elseif ((i & 7) == 4)
		vpaddd  K_XMM(K_BASE), WY, WY_TMP
		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
	.elseif ((i & 7) == 7)
		vmovdqu  WY_TMP, PRECALC_WK(i&~7)

@@ -255,7 +254,7 @@
		vpxor	WY, WY_TMP, WY_TMP
	.elseif ((i & 7) == 7)
		vpxor	WY_TMP2, WY_TMP, WY
		vpaddd	K_XMM(K_BASE), WY, WY_TMP
		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
		vmovdqu	WY_TMP, PRECALC_WK(i&~7)

		PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@
		vpsrld	$30, WY, WY
		vpor	WY, WY_TMP, WY
	.elseif ((i & 7) == 7)
		vpaddd	K_XMM(K_BASE), WY, WY_TMP
		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
		vmovdqu	WY_TMP, PRECALC_WK(i&~7)

		PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@

.endm

/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
 * %1 + %2 >= %3 ? %4 : 0
 */
.macro ADD_IF_GE a, b, c, d
	mov     \a, RTA
	add     $\d, RTA
	cmp     $\c, \b
	cmovge  RTA, \a
.endm

/*
 * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
 */
@@ -463,13 +472,16 @@
	lea	(2*4*80+32)(%rsp), WK_BUF

	# Precalc WK for first 2 blocks
	PRECALC_OFFSET = 0
	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
	.set i, 0
	.rept    160
		PRECALC i
		.set i, i + 1
	.endr
	PRECALC_OFFSET = 128

	/* Go to next block if needed */
	ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
	xchg	WK_BUF, PRECALC_BUF

	.align 32
@@ -479,8 +491,8 @@ _loop:
	 * we use K_BASE value as a signal of a last block,
	 * it is set below by: cmovae BUFFER_PTR, K_BASE
	 */
	cmp	K_BASE, BUFFER_PTR
	jne	_begin
	test BLOCKS_CTR, BLOCKS_CTR
	jnz _begin
	.align 32
	jmp	_end
	.align 32
@@ -512,10 +524,10 @@ _loop0:
		.set j, j+2
	.endr

	add	$(2*64), BUFFER_PTR       /* move to next odd-64-byte block */
	cmp	BUFFER_END, BUFFER_PTR    /* is current block the last one? */
	cmovae	K_BASE, BUFFER_PTR	/* signal the last iteration smartly */

	/* Update Counter */
	sub $1, BLOCKS_CTR
	/* Move to the next block only if needed*/
	ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
	/*
	 * rounds
	 * 60,62,64,66,68
@@ -532,8 +544,8 @@ _loop0:
	UPDATE_HASH	12(HASH_PTR), D
	UPDATE_HASH	16(HASH_PTR), E

	cmp	K_BASE, BUFFER_PTR	/* is current block the last one? */
	je	_loop
	test	BLOCKS_CTR, BLOCKS_CTR
	jz	_loop

	mov	TB, B

@@ -575,10 +587,10 @@ _loop2:
		.set j, j+2
	.endr

	add	$(2*64), BUFFER_PTR2      /* move to next even-64-byte block */

	cmp	BUFFER_END, BUFFER_PTR2   /* is current block the last one */
	cmovae	K_BASE, BUFFER_PTR       /* signal the last iteration smartly */
	/* update counter */
	sub     $1, BLOCKS_CTR
	/* Move to the next block only if needed*/
	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128

	jmp	_loop3
_loop3:
@@ -641,19 +653,12 @@ _loop3:

	avx2_zeroupper

	lea	K_XMM_AR(%rip), K_BASE

	/* Setup initial values */
	mov	CTX, HASH_PTR
	mov	BUF, BUFFER_PTR
	lea	64(BUF), BUFFER_PTR2

	shl	$6, CNT			/* mul by 64 */
	add	BUF, CNT
	add	$64, CNT
	mov	CNT, BUFFER_END

	cmp	BUFFER_END, BUFFER_PTR2
	cmovae	K_BASE, BUFFER_PTR2
	mov	BUF, BUFFER_PTR2
	mov	CNT, BLOCKS_CTR

	xmm_mov	BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP

+1 −1
Original line number Diff line number Diff line
@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,

static bool avx2_usable(void)
{
	if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
		&& boot_cpu_has(X86_FEATURE_BMI1)
		&& boot_cpu_has(X86_FEATURE_BMI2))
		return true;