Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d3ce3b18 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull crypto fix from Herbert Xu:
 "Fix a bug in the implementation of the x86 accelerated version of
  poly1305"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: x86/poly1305 - fix overflow during partial reduction
parents 95ea5529 678cce40
Loading
Loading
Loading
Loading
+10 −4
Original line number Diff line number Diff line
@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
	vpaddq		t2,t1,t1
	vmovq		t1x,d4

	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
	# integers.  It's true in a single-block implementation, but not here.

	# d1 += d0 >> 26
	mov		d0,%rax
	shr		$26,%rax
@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
	# h0 += (d4 >> 26) * 5
	mov		d4,%rax
	shr		$26,%rax
	lea		(%eax,%eax,4),%eax
	add		%eax,%ebx
	lea		(%rax,%rax,4),%rax
	add		%rax,%rbx
	# h4 = d4 & 0x3ffffff
	mov		d4,%rax
	and		$0x3ffffff,%eax
	mov		%eax,h4

	# h1 += h0 >> 26
	mov		%ebx,%eax
	shr		$26,%eax
	mov		%rbx,%rax
	shr		$26,%rax
	add		%eax,h1
	# h0 = h0 & 0x3ffffff
	andl		$0x3ffffff,%ebx
+14 −8
Original line number Diff line number Diff line
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
	# h0 += (d4 >> 26) * 5
	mov		d4,%rax
	shr		$26,%rax
	lea		(%eax,%eax,4),%eax
	add		%eax,%ebx
	lea		(%rax,%rax,4),%rax
	add		%rax,%rbx
	# h4 = d4 & 0x3ffffff
	mov		d4,%rax
	and		$0x3ffffff,%eax
	mov		%eax,h4

	# h1 += h0 >> 26
	mov		%ebx,%eax
	shr		$26,%eax
	mov		%rbx,%rax
	shr		$26,%rax
	add		%eax,h1
	# h0 = h0 & 0x3ffffff
	andl		$0x3ffffff,%ebx
@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
	paddq		t2,t1
	movq		t1,d4

	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
	# integers.  It's true in a single-block implementation, but not here.

	# d1 += d0 >> 26
	mov		d0,%rax
	shr		$26,%rax
@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
	# h0 += (d4 >> 26) * 5
	mov		d4,%rax
	shr		$26,%rax
	lea		(%eax,%eax,4),%eax
	add		%eax,%ebx
	lea		(%rax,%rax,4),%rax
	add		%rax,%rbx
	# h4 = d4 & 0x3ffffff
	mov		d4,%rax
	and		$0x3ffffff,%eax
	mov		%eax,h4

	# h1 += h0 >> 26
	mov		%ebx,%eax
	shr		$26,%eax
	mov		%rbx,%rax
	shr		$26,%rax
	add		%eax,h1
	# h0 = h0 & 0x3ffffff
	andl		$0x3ffffff,%ebx
+43 −1
Original line number Diff line number Diff line
@@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = {
		.psize		= 80,
		.digest		= "\x13\x00\x00\x00\x00\x00\x00\x00"
				  "\x00\x00\x00\x00\x00\x00\x00\x00",
	},
	}, { /* Regression test for overflow in AVX2 implementation */
		.plaintext	= "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff\xff\xff\xff\xff"
				  "\xff\xff\xff\xff",
		.psize		= 300,
		.digest		= "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
				  "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
	}
};
/* NHPoly1305 test vectors from https://github.com/google/adiantum */