Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b7b73cd5 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu
Browse files

crypto: x86/salsa20 - remove x86 salsa20 implementations



The x86 assembly implementations of Salsa20 use the frame base pointer
register (%ebp or %rbp), which breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.
Recent (v4.10+) kernels will warn about this, e.g.

WARNING: kernel stack regs at 00000000a8291e69 in syzkaller047086:4677 has bad 'bp' value 000000001077994c
[...]

But after looking into it, I believe there's very little reason to still
retain the x86 Salsa20 code.  First, these are *not* vectorized
(SSE2/SSSE3/AVX2) implementations, which would be needed to get anywhere
close to the best Salsa20 performance on any remotely modern x86
processor; they're just regular x86 assembly.  Second, it's still
unclear that anyone is actually using the kernel's Salsa20 at all,
especially given that now ChaCha20 is supported too, and with much more
efficient SSSE3 and AVX2 implementations.  Finally, in benchmarks I did
on both Intel and AMD processors with both gcc 8.1.0 and gcc 4.9.4, the
x86_64 salsa20-asm is actually slightly *slower* than salsa20-generic
(~3% slower on Skylake, ~10% slower on Zen), while the i686 salsa20-asm
is only slightly faster than salsa20-generic (~15% faster on Skylake,
~20% faster on Zen).  The gcc version made little difference.

So, the x86_64 salsa20-asm is pretty clearly useless.  That leaves just
the i686 salsa20-asm, which based on my tests provides a 15-20% speed
boost.  But that's without updating the code to not use %ebp.  And given
the maintenance cost, the small speed difference vs. salsa20-generic,
the fact that few people still use i686 kernels, the doubt that anyone
is even using the kernel's Salsa20 at all, and the fact that a SSE2
implementation would almost certainly be much faster on any remotely
modern x86 processor yet no one has cared enough to add one yet, I don't
think it's worthwhile to keep.

Thus, just remove both the x86_64 and i686 salsa20-asm implementations.

Reported-by: default avatar <syzbot+ffa3a158337bbc01ff09@syzkaller.appspotmail.com>
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 0b3a830b
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o

obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o

obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -71,7 +69,6 @@ endif

aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o

aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@@ -80,7 +77,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o

+0 −938
Original line number Diff line number Diff line
# Derived from:
#	salsa20_pm.s version 20051229
#	D. J. Bernstein
#	Public domain.

#include <linux/linkage.h>

.text

# enter salsa20_encrypt_bytes
ENTRY(salsa20_encrypt_bytes)
	mov	%esp,%eax
	and	$31,%eax
	add	$256,%eax
	sub	%eax,%esp
	# eax_stack = eax
	movl	%eax,80(%esp)
	# ebx_stack = ebx
	movl	%ebx,84(%esp)
	# esi_stack = esi
	movl	%esi,88(%esp)
	# edi_stack = edi
	movl	%edi,92(%esp)
	# ebp_stack = ebp
	movl	%ebp,96(%esp)
	# x = arg1
	movl	4(%esp,%eax),%edx
	# m = arg2
	movl	8(%esp,%eax),%esi
	# out = arg3
	movl	12(%esp,%eax),%edi
	# bytes = arg4
	movl	16(%esp,%eax),%ebx
	# bytes -= 0
	sub	$0,%ebx
	# goto done if unsigned<=
	jbe	._done
._start:
	# in0 = *(uint32 *) (x + 0)
	movl	0(%edx),%eax
	# in1 = *(uint32 *) (x + 4)
	movl	4(%edx),%ecx
	# in2 = *(uint32 *) (x + 8)
	movl	8(%edx),%ebp
	# j0 = in0
	movl	%eax,164(%esp)
	# in3 = *(uint32 *) (x + 12)
	movl	12(%edx),%eax
	# j1 = in1
	movl	%ecx,168(%esp)
	# in4 = *(uint32 *) (x + 16)
	movl	16(%edx),%ecx
	# j2 = in2
	movl	%ebp,172(%esp)
	# in5 = *(uint32 *) (x + 20)
	movl	20(%edx),%ebp
	# j3 = in3
	movl	%eax,176(%esp)
	# in6 = *(uint32 *) (x + 24)
	movl	24(%edx),%eax
	# j4 = in4
	movl	%ecx,180(%esp)
	# in7 = *(uint32 *) (x + 28)
	movl	28(%edx),%ecx
	# j5 = in5
	movl	%ebp,184(%esp)
	# in8 = *(uint32 *) (x + 32)
	movl	32(%edx),%ebp
	# j6 = in6
	movl	%eax,188(%esp)
	# in9 = *(uint32 *) (x + 36)
	movl	36(%edx),%eax
	# j7 = in7
	movl	%ecx,192(%esp)
	# in10 = *(uint32 *) (x + 40)
	movl	40(%edx),%ecx
	# j8 = in8
	movl	%ebp,196(%esp)
	# in11 = *(uint32 *) (x + 44)
	movl	44(%edx),%ebp
	# j9 = in9
	movl	%eax,200(%esp)
	# in12 = *(uint32 *) (x + 48)
	movl	48(%edx),%eax
	# j10 = in10
	movl	%ecx,204(%esp)
	# in13 = *(uint32 *) (x + 52)
	movl	52(%edx),%ecx
	# j11 = in11
	movl	%ebp,208(%esp)
	# in14 = *(uint32 *) (x + 56)
	movl	56(%edx),%ebp
	# j12 = in12
	movl	%eax,212(%esp)
	# in15 = *(uint32 *) (x + 60)
	movl	60(%edx),%eax
	# j13 = in13
	movl	%ecx,216(%esp)
	# j14 = in14
	movl	%ebp,220(%esp)
	# j15 = in15
	movl	%eax,224(%esp)
	# x_backup = x
	movl	%edx,64(%esp)
._bytesatleast1:
	#   bytes - 64
	cmp	$64,%ebx
	#   goto nocopy if unsigned>=
	jae	._nocopy
	#     ctarget = out
	movl	%edi,228(%esp)
	#     out = &tmp
	leal	0(%esp),%edi
	#     i = bytes
	mov	%ebx,%ecx
	#     while (i) { *out++ = *m++; --i }
	rep	movsb
	#     out = &tmp
	leal	0(%esp),%edi
	#     m = &tmp
	leal	0(%esp),%esi
._nocopy:
	#   out_backup = out
	movl	%edi,72(%esp)
	#   m_backup = m
	movl	%esi,68(%esp)
	#   bytes_backup = bytes
	movl	%ebx,76(%esp)
	#   in0 = j0
	movl	164(%esp),%eax
	#   in1 = j1
	movl	168(%esp),%ecx
	#   in2 = j2
	movl	172(%esp),%edx
	#   in3 = j3
	movl	176(%esp),%ebx
	#   x0 = in0
	movl	%eax,100(%esp)
	#   x1 = in1
	movl	%ecx,104(%esp)
	#   x2 = in2
	movl	%edx,108(%esp)
	#   x3 = in3
	movl	%ebx,112(%esp)
	#   in4 = j4
	movl	180(%esp),%eax
	#   in5 = j5
	movl	184(%esp),%ecx
	#   in6 = j6
	movl	188(%esp),%edx
	#   in7 = j7
	movl	192(%esp),%ebx
	#   x4 = in4
	movl	%eax,116(%esp)
	#   x5 = in5
	movl	%ecx,120(%esp)
	#   x6 = in6
	movl	%edx,124(%esp)
	#   x7 = in7
	movl	%ebx,128(%esp)
	#   in8 = j8
	movl	196(%esp),%eax
	#   in9 = j9
	movl	200(%esp),%ecx
	#   in10 = j10
	movl	204(%esp),%edx
	#   in11 = j11
	movl	208(%esp),%ebx
	#   x8 = in8
	movl	%eax,132(%esp)
	#   x9 = in9
	movl	%ecx,136(%esp)
	#   x10 = in10
	movl	%edx,140(%esp)
	#   x11 = in11
	movl	%ebx,144(%esp)
	#   in12 = j12
	movl	212(%esp),%eax
	#   in13 = j13
	movl	216(%esp),%ecx
	#   in14 = j14
	movl	220(%esp),%edx
	#   in15 = j15
	movl	224(%esp),%ebx
	#   x12 = in12
	movl	%eax,148(%esp)
	#   x13 = in13
	movl	%ecx,152(%esp)
	#   x14 = in14
	movl	%edx,156(%esp)
	#   x15 = in15
	movl	%ebx,160(%esp)
	#   i = 20
	mov	$20,%ebp
	# p = x0
	movl	100(%esp),%eax
	# s = x5
	movl	120(%esp),%ecx
	# t = x10
	movl	140(%esp),%edx
	# w = x15
	movl	160(%esp),%ebx
._mainloop:
	# x0 = p
	movl	%eax,100(%esp)
	# 				x10 = t
	movl	%edx,140(%esp)
	# p += x12
	addl	148(%esp),%eax
	# 		x5 = s
	movl	%ecx,120(%esp)
	# 				t += x6
	addl	124(%esp),%edx
	# 						x15 = w
	movl	%ebx,160(%esp)
	# 		r = x1
	movl	104(%esp),%esi
	# 		r += s
	add	%ecx,%esi
	# 						v = x11
	movl	144(%esp),%edi
	# 						v += w
	add	%ebx,%edi
	# p <<<= 7
	rol	$7,%eax
	# p ^= x4
	xorl	116(%esp),%eax
	# 				t <<<= 7
	rol	$7,%edx
	# 				t ^= x14
	xorl	156(%esp),%edx
	# 		r <<<= 7
	rol	$7,%esi
	# 		r ^= x9
	xorl	136(%esp),%esi
	# 						v <<<= 7
	rol	$7,%edi
	# 						v ^= x3
	xorl	112(%esp),%edi
	# x4 = p
	movl	%eax,116(%esp)
	# 				x14 = t
	movl	%edx,156(%esp)
	# p += x0
	addl	100(%esp),%eax
	# 		x9 = r
	movl	%esi,136(%esp)
	# 				t += x10
	addl	140(%esp),%edx
	# 						x3 = v
	movl	%edi,112(%esp)
	# p <<<= 9
	rol	$9,%eax
	# p ^= x8
	xorl	132(%esp),%eax
	# 				t <<<= 9
	rol	$9,%edx
	# 				t ^= x2
	xorl	108(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 9
	rol	$9,%ecx
	# 		s ^= x13
	xorl	152(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 9
	rol	$9,%ebx
	# 						w ^= x7
	xorl	128(%esp),%ebx
	# x8 = p
	movl	%eax,132(%esp)
	# 				x2 = t
	movl	%edx,108(%esp)
	# p += x4
	addl	116(%esp),%eax
	# 		x13 = s
	movl	%ecx,152(%esp)
	# 				t += x14
	addl	156(%esp),%edx
	# 						x7 = w
	movl	%ebx,128(%esp)
	# p <<<= 13
	rol	$13,%eax
	# p ^= x12
	xorl	148(%esp),%eax
	# 				t <<<= 13
	rol	$13,%edx
	# 				t ^= x6
	xorl	124(%esp),%edx
	# 		r += s
	add	%ecx,%esi
	# 		r <<<= 13
	rol	$13,%esi
	# 		r ^= x1
	xorl	104(%esp),%esi
	# 						v += w
	add	%ebx,%edi
	# 						v <<<= 13
	rol	$13,%edi
	# 						v ^= x11
	xorl	144(%esp),%edi
	# x12 = p
	movl	%eax,148(%esp)
	# 				x6 = t
	movl	%edx,124(%esp)
	# p += x8
	addl	132(%esp),%eax
	# 		x1 = r
	movl	%esi,104(%esp)
	# 				t += x2
	addl	108(%esp),%edx
	# 						x11 = v
	movl	%edi,144(%esp)
	# p <<<= 18
	rol	$18,%eax
	# p ^= x0
	xorl	100(%esp),%eax
	# 				t <<<= 18
	rol	$18,%edx
	# 				t ^= x10
	xorl	140(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 18
	rol	$18,%ecx
	# 		s ^= x5
	xorl	120(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 18
	rol	$18,%ebx
	# 						w ^= x15
	xorl	160(%esp),%ebx
	# x0 = p
	movl	%eax,100(%esp)
	# 				x10 = t
	movl	%edx,140(%esp)
	# p += x3
	addl	112(%esp),%eax
	# p <<<= 7
	rol	$7,%eax
	# 		x5 = s
	movl	%ecx,120(%esp)
	# 				t += x9
	addl	136(%esp),%edx
	# 						x15 = w
	movl	%ebx,160(%esp)
	# 		r = x4
	movl	116(%esp),%esi
	# 		r += s
	add	%ecx,%esi
	# 						v = x14
	movl	156(%esp),%edi
	# 						v += w
	add	%ebx,%edi
	# p ^= x1
	xorl	104(%esp),%eax
	# 				t <<<= 7
	rol	$7,%edx
	# 				t ^= x11
	xorl	144(%esp),%edx
	# 		r <<<= 7
	rol	$7,%esi
	# 		r ^= x6
	xorl	124(%esp),%esi
	# 						v <<<= 7
	rol	$7,%edi
	# 						v ^= x12
	xorl	148(%esp),%edi
	# x1 = p
	movl	%eax,104(%esp)
	# 				x11 = t
	movl	%edx,144(%esp)
	# p += x0
	addl	100(%esp),%eax
	# 		x6 = r
	movl	%esi,124(%esp)
	# 				t += x10
	addl	140(%esp),%edx
	# 						x12 = v
	movl	%edi,148(%esp)
	# p <<<= 9
	rol	$9,%eax
	# p ^= x2
	xorl	108(%esp),%eax
	# 				t <<<= 9
	rol	$9,%edx
	# 				t ^= x8
	xorl	132(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 9
	rol	$9,%ecx
	# 		s ^= x7
	xorl	128(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 9
	rol	$9,%ebx
	# 						w ^= x13
	xorl	152(%esp),%ebx
	# x2 = p
	movl	%eax,108(%esp)
	# 				x8 = t
	movl	%edx,132(%esp)
	# p += x1
	addl	104(%esp),%eax
	# 		x7 = s
	movl	%ecx,128(%esp)
	# 				t += x11
	addl	144(%esp),%edx
	# 						x13 = w
	movl	%ebx,152(%esp)
	# p <<<= 13
	rol	$13,%eax
	# p ^= x3
	xorl	112(%esp),%eax
	# 				t <<<= 13
	rol	$13,%edx
	# 				t ^= x9
	xorl	136(%esp),%edx
	# 		r += s
	add	%ecx,%esi
	# 		r <<<= 13
	rol	$13,%esi
	# 		r ^= x4
	xorl	116(%esp),%esi
	# 						v += w
	add	%ebx,%edi
	# 						v <<<= 13
	rol	$13,%edi
	# 						v ^= x14
	xorl	156(%esp),%edi
	# x3 = p
	movl	%eax,112(%esp)
	# 				x9 = t
	movl	%edx,136(%esp)
	# p += x2
	addl	108(%esp),%eax
	# 		x4 = r
	movl	%esi,116(%esp)
	# 				t += x8
	addl	132(%esp),%edx
	# 						x14 = v
	movl	%edi,156(%esp)
	# p <<<= 18
	rol	$18,%eax
	# p ^= x0
	xorl	100(%esp),%eax
	# 				t <<<= 18
	rol	$18,%edx
	# 				t ^= x10
	xorl	140(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 18
	rol	$18,%ecx
	# 		s ^= x5
	xorl	120(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 18
	rol	$18,%ebx
	# 						w ^= x15
	xorl	160(%esp),%ebx
	# x0 = p
	movl	%eax,100(%esp)
	# 				x10 = t
	movl	%edx,140(%esp)
	# p += x12
	addl	148(%esp),%eax
	# 		x5 = s
	movl	%ecx,120(%esp)
	# 				t += x6
	addl	124(%esp),%edx
	# 						x15 = w
	movl	%ebx,160(%esp)
	# 		r = x1
	movl	104(%esp),%esi
	# 		r += s
	add	%ecx,%esi
	# 						v = x11
	movl	144(%esp),%edi
	# 						v += w
	add	%ebx,%edi
	# p <<<= 7
	rol	$7,%eax
	# p ^= x4
	xorl	116(%esp),%eax
	# 				t <<<= 7
	rol	$7,%edx
	# 				t ^= x14
	xorl	156(%esp),%edx
	# 		r <<<= 7
	rol	$7,%esi
	# 		r ^= x9
	xorl	136(%esp),%esi
	# 						v <<<= 7
	rol	$7,%edi
	# 						v ^= x3
	xorl	112(%esp),%edi
	# x4 = p
	movl	%eax,116(%esp)
	# 				x14 = t
	movl	%edx,156(%esp)
	# p += x0
	addl	100(%esp),%eax
	# 		x9 = r
	movl	%esi,136(%esp)
	# 				t += x10
	addl	140(%esp),%edx
	# 						x3 = v
	movl	%edi,112(%esp)
	# p <<<= 9
	rol	$9,%eax
	# p ^= x8
	xorl	132(%esp),%eax
	# 				t <<<= 9
	rol	$9,%edx
	# 				t ^= x2
	xorl	108(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 9
	rol	$9,%ecx
	# 		s ^= x13
	xorl	152(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 9
	rol	$9,%ebx
	# 						w ^= x7
	xorl	128(%esp),%ebx
	# x8 = p
	movl	%eax,132(%esp)
	# 				x2 = t
	movl	%edx,108(%esp)
	# p += x4
	addl	116(%esp),%eax
	# 		x13 = s
	movl	%ecx,152(%esp)
	# 				t += x14
	addl	156(%esp),%edx
	# 						x7 = w
	movl	%ebx,128(%esp)
	# p <<<= 13
	rol	$13,%eax
	# p ^= x12
	xorl	148(%esp),%eax
	# 				t <<<= 13
	rol	$13,%edx
	# 				t ^= x6
	xorl	124(%esp),%edx
	# 		r += s
	add	%ecx,%esi
	# 		r <<<= 13
	rol	$13,%esi
	# 		r ^= x1
	xorl	104(%esp),%esi
	# 						v += w
	add	%ebx,%edi
	# 						v <<<= 13
	rol	$13,%edi
	# 						v ^= x11
	xorl	144(%esp),%edi
	# x12 = p
	movl	%eax,148(%esp)
	# 				x6 = t
	movl	%edx,124(%esp)
	# p += x8
	addl	132(%esp),%eax
	# 		x1 = r
	movl	%esi,104(%esp)
	# 				t += x2
	addl	108(%esp),%edx
	# 						x11 = v
	movl	%edi,144(%esp)
	# p <<<= 18
	rol	$18,%eax
	# p ^= x0
	xorl	100(%esp),%eax
	# 				t <<<= 18
	rol	$18,%edx
	# 				t ^= x10
	xorl	140(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 18
	rol	$18,%ecx
	# 		s ^= x5
	xorl	120(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 18
	rol	$18,%ebx
	# 						w ^= x15
	xorl	160(%esp),%ebx
	# x0 = p
	movl	%eax,100(%esp)
	# 				x10 = t
	movl	%edx,140(%esp)
	# p += x3
	addl	112(%esp),%eax
	# p <<<= 7
	rol	$7,%eax
	# 		x5 = s
	movl	%ecx,120(%esp)
	# 				t += x9
	addl	136(%esp),%edx
	# 						x15 = w
	movl	%ebx,160(%esp)
	# 		r = x4
	movl	116(%esp),%esi
	# 		r += s
	add	%ecx,%esi
	# 						v = x14
	movl	156(%esp),%edi
	# 						v += w
	add	%ebx,%edi
	# p ^= x1
	xorl	104(%esp),%eax
	# 				t <<<= 7
	rol	$7,%edx
	# 				t ^= x11
	xorl	144(%esp),%edx
	# 		r <<<= 7
	rol	$7,%esi
	# 		r ^= x6
	xorl	124(%esp),%esi
	# 						v <<<= 7
	rol	$7,%edi
	# 						v ^= x12
	xorl	148(%esp),%edi
	# x1 = p
	movl	%eax,104(%esp)
	# 				x11 = t
	movl	%edx,144(%esp)
	# p += x0
	addl	100(%esp),%eax
	# 		x6 = r
	movl	%esi,124(%esp)
	# 				t += x10
	addl	140(%esp),%edx
	# 						x12 = v
	movl	%edi,148(%esp)
	# p <<<= 9
	rol	$9,%eax
	# p ^= x2
	xorl	108(%esp),%eax
	# 				t <<<= 9
	rol	$9,%edx
	# 				t ^= x8
	xorl	132(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 9
	rol	$9,%ecx
	# 		s ^= x7
	xorl	128(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 9
	rol	$9,%ebx
	# 						w ^= x13
	xorl	152(%esp),%ebx
	# x2 = p
	movl	%eax,108(%esp)
	# 				x8 = t
	movl	%edx,132(%esp)
	# p += x1
	addl	104(%esp),%eax
	# 		x7 = s
	movl	%ecx,128(%esp)
	# 				t += x11
	addl	144(%esp),%edx
	# 						x13 = w
	movl	%ebx,152(%esp)
	# p <<<= 13
	rol	$13,%eax
	# p ^= x3
	xorl	112(%esp),%eax
	# 				t <<<= 13
	rol	$13,%edx
	# 				t ^= x9
	xorl	136(%esp),%edx
	# 		r += s
	add	%ecx,%esi
	# 		r <<<= 13
	rol	$13,%esi
	# 		r ^= x4
	xorl	116(%esp),%esi
	# 						v += w
	add	%ebx,%edi
	# 						v <<<= 13
	rol	$13,%edi
	# 						v ^= x14
	xorl	156(%esp),%edi
	# x3 = p
	movl	%eax,112(%esp)
	# 				x9 = t
	movl	%edx,136(%esp)
	# p += x2
	addl	108(%esp),%eax
	# 		x4 = r
	movl	%esi,116(%esp)
	# 				t += x8
	addl	132(%esp),%edx
	# 						x14 = v
	movl	%edi,156(%esp)
	# p <<<= 18
	rol	$18,%eax
	# p ^= x0
	xorl	100(%esp),%eax
	# 				t <<<= 18
	rol	$18,%edx
	# 				t ^= x10
	xorl	140(%esp),%edx
	# 		s += r
	add	%esi,%ecx
	# 		s <<<= 18
	rol	$18,%ecx
	# 		s ^= x5
	xorl	120(%esp),%ecx
	# 						w += v
	add	%edi,%ebx
	# 						w <<<= 18
	rol	$18,%ebx
	# 						w ^= x15
	xorl	160(%esp),%ebx
	# i -= 4
	sub	$4,%ebp
	# goto mainloop if unsigned >
	ja	._mainloop
	# x0 = p
	movl	%eax,100(%esp)
	# x5 = s
	movl	%ecx,120(%esp)
	# x10 = t
	movl	%edx,140(%esp)
	# x15 = w
	movl	%ebx,160(%esp)
	#   out = out_backup
	movl	72(%esp),%edi
	#   m = m_backup
	movl	68(%esp),%esi
	#   in0 = x0
	movl	100(%esp),%eax
	#   in1 = x1
	movl	104(%esp),%ecx
	#   in0 += j0
	addl	164(%esp),%eax
	#   in1 += j1
	addl	168(%esp),%ecx
	#   in0 ^= *(uint32 *) (m + 0)
	xorl	0(%esi),%eax
	#   in1 ^= *(uint32 *) (m + 4)
	xorl	4(%esi),%ecx
	#   *(uint32 *) (out + 0) = in0
	movl	%eax,0(%edi)
	#   *(uint32 *) (out + 4) = in1
	movl	%ecx,4(%edi)
	#   in2 = x2
	movl	108(%esp),%eax
	#   in3 = x3
	movl	112(%esp),%ecx
	#   in2 += j2
	addl	172(%esp),%eax
	#   in3 += j3
	addl	176(%esp),%ecx
	#   in2 ^= *(uint32 *) (m + 8)
	xorl	8(%esi),%eax
	#   in3 ^= *(uint32 *) (m + 12)
	xorl	12(%esi),%ecx
	#   *(uint32 *) (out + 8) = in2
	movl	%eax,8(%edi)
	#   *(uint32 *) (out + 12) = in3
	movl	%ecx,12(%edi)
	#   in4 = x4
	movl	116(%esp),%eax
	#   in5 = x5
	movl	120(%esp),%ecx
	#   in4 += j4
	addl	180(%esp),%eax
	#   in5 += j5
	addl	184(%esp),%ecx
	#   in4 ^= *(uint32 *) (m + 16)
	xorl	16(%esi),%eax
	#   in5 ^= *(uint32 *) (m + 20)
	xorl	20(%esi),%ecx
	#   *(uint32 *) (out + 16) = in4
	movl	%eax,16(%edi)
	#   *(uint32 *) (out + 20) = in5
	movl	%ecx,20(%edi)
	#   in6 = x6
	movl	124(%esp),%eax
	#   in7 = x7
	movl	128(%esp),%ecx
	#   in6 += j6
	addl	188(%esp),%eax
	#   in7 += j7
	addl	192(%esp),%ecx
	#   in6 ^= *(uint32 *) (m + 24)
	xorl	24(%esi),%eax
	#   in7 ^= *(uint32 *) (m + 28)
	xorl	28(%esi),%ecx
	#   *(uint32 *) (out + 24) = in6
	movl	%eax,24(%edi)
	#   *(uint32 *) (out + 28) = in7
	movl	%ecx,28(%edi)
	#   in8 = x8
	movl	132(%esp),%eax
	#   in9 = x9
	movl	136(%esp),%ecx
	#   in8 += j8
	addl	196(%esp),%eax
	#   in9 += j9
	addl	200(%esp),%ecx
	#   in8 ^= *(uint32 *) (m + 32)
	xorl	32(%esi),%eax
	#   in9 ^= *(uint32 *) (m + 36)
	xorl	36(%esi),%ecx
	#   *(uint32 *) (out + 32) = in8
	movl	%eax,32(%edi)
	#   *(uint32 *) (out + 36) = in9
	movl	%ecx,36(%edi)
	#   in10 = x10
	movl	140(%esp),%eax
	#   in11 = x11
	movl	144(%esp),%ecx
	#   in10 += j10
	addl	204(%esp),%eax
	#   in11 += j11
	addl	208(%esp),%ecx
	#   in10 ^= *(uint32 *) (m + 40)
	xorl	40(%esi),%eax
	#   in11 ^= *(uint32 *) (m + 44)
	xorl	44(%esi),%ecx
	#   *(uint32 *) (out + 40) = in10
	movl	%eax,40(%edi)
	#   *(uint32 *) (out + 44) = in11
	movl	%ecx,44(%edi)
	#   in12 = x12
	movl	148(%esp),%eax
	#   in13 = x13
	movl	152(%esp),%ecx
	#   in12 += j12
	addl	212(%esp),%eax
	#   in13 += j13
	addl	216(%esp),%ecx
	#   in12 ^= *(uint32 *) (m + 48)
	xorl	48(%esi),%eax
	#   in13 ^= *(uint32 *) (m + 52)
	xorl	52(%esi),%ecx
	#   *(uint32 *) (out + 48) = in12
	movl	%eax,48(%edi)
	#   *(uint32 *) (out + 52) = in13
	movl	%ecx,52(%edi)
	#   in14 = x14
	movl	156(%esp),%eax
	#   in15 = x15
	movl	160(%esp),%ecx
	#   in14 += j14
	addl	220(%esp),%eax
	#   in15 += j15
	addl	224(%esp),%ecx
	#   in14 ^= *(uint32 *) (m + 56)
	xorl	56(%esi),%eax
	#   in15 ^= *(uint32 *) (m + 60)
	xorl	60(%esi),%ecx
	#   *(uint32 *) (out + 56) = in14
	movl	%eax,56(%edi)
	#   *(uint32 *) (out + 60) = in15
	movl	%ecx,60(%edi)
	#   bytes = bytes_backup
	movl	76(%esp),%ebx
	#   in8 = j8
	movl	196(%esp),%eax
	#   in9 = j9
	movl	200(%esp),%ecx
	#   in8 += 1
	add	$1,%eax
	#   in9 += 0 + carry
	adc	$0,%ecx
	#   j8 = in8
	movl	%eax,196(%esp)
	#   j9 = in9
	movl	%ecx,200(%esp)
	#   bytes - 64
	cmp	$64,%ebx
	#   goto bytesatleast65 if unsigned>
	ja	._bytesatleast65
	#     goto bytesatleast64 if unsigned>=
	jae	._bytesatleast64
	#       m = out
	mov	%edi,%esi
	#       out = ctarget
	movl	228(%esp),%edi
	#       i = bytes
	mov	%ebx,%ecx
	#       while (i) { *out++ = *m++; --i }
	rep	movsb
._bytesatleast64:
	#     x = x_backup
	movl	64(%esp),%eax
	#     in8 = j8
	movl	196(%esp),%ecx
	#     in9 = j9
	movl	200(%esp),%edx
	#     *(uint32 *) (x + 32) = in8
	movl	%ecx,32(%eax)
	#     *(uint32 *) (x + 36) = in9
	movl	%edx,36(%eax)
._done:
	#     eax = eax_stack
	movl	80(%esp),%eax
	#     ebx = ebx_stack
	movl	84(%esp),%ebx
	#     esi = esi_stack
	movl	88(%esp),%esi
	#     edi = edi_stack
	movl	92(%esp),%edi
	#     ebp = ebp_stack
	movl	96(%esp),%ebp
	#     leave
	add	%eax,%esp
	ret
._bytesatleast65:
	#   bytes -= 64
	sub	$64,%ebx
	#   out += 64
	add	$64,%edi
	#   m += 64
	add	$64,%esi
	# goto bytesatleast1
	jmp	._bytesatleast1
ENDPROC(salsa20_encrypt_bytes)
+0 −805

File deleted.

Preview size limit exceeded, changes collapsed.

arch/x86/crypto/salsa20_glue.c

deleted100644 → 0
+0 −91
Original line number Diff line number Diff line
/*
 * Glue code for optimized assembly version of  Salsa20.
 *
 * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
 *
 * The assembly codes are public domain assembly codes written by Daniel. J.
 * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
 * and to remove extraneous comments and functions that are not needed.
 * - i586 version, renamed as salsa20-i586-asm_32.S
 *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
 * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
 *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
 *
 * Also modified to set up the initial state using the generic C code rather
 * than in assembly.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 */

#include <asm/unaligned.h>
#include <crypto/internal/skcipher.h>
#include <crypto/salsa20.h>
#include <linux/module.h>

asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
				      u32 bytes);

static int salsa20_asm_crypt(struct skcipher_request *req)
{
	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
	const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
	struct skcipher_walk walk;
	u32 state[16];
	int err;

	err = skcipher_walk_virt(&walk, req, true);

	crypto_salsa20_init(state, ctx, walk.iv);

	while (walk.nbytes > 0) {
		unsigned int nbytes = walk.nbytes;

		if (nbytes < walk.total)
			nbytes = round_down(nbytes, walk.stride);

		salsa20_encrypt_bytes(state, walk.src.virt.addr,
				      walk.dst.virt.addr, nbytes);
		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
	}

	return err;
}

static struct skcipher_alg alg = {
	.base.cra_name		= "salsa20",
	.base.cra_driver_name	= "salsa20-asm",
	.base.cra_priority	= 200,
	.base.cra_blocksize	= 1,
	.base.cra_ctxsize	= sizeof(struct salsa20_ctx),
	.base.cra_module	= THIS_MODULE,

	.min_keysize		= SALSA20_MIN_KEY_SIZE,
	.max_keysize		= SALSA20_MAX_KEY_SIZE,
	.ivsize			= SALSA20_IV_SIZE,
	.chunksize		= SALSA20_BLOCK_SIZE,
	.setkey			= crypto_salsa20_setkey,
	.encrypt		= salsa20_asm_crypt,
	.decrypt		= salsa20_asm_crypt,
};

static int __init init(void)
{
	return crypto_register_skcipher(&alg);
}

static void __exit fini(void)
{
	crypto_unregister_skcipher(&alg);
}

module_init(init);
module_exit(fini);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
MODULE_ALIAS_CRYPTO("salsa20");
MODULE_ALIAS_CRYPTO("salsa20-asm");
+0 −28
Original line number Diff line number Diff line
@@ -1436,34 +1436,6 @@ config CRYPTO_SALSA20
	  The Salsa20 stream cipher algorithm is designed by Daniel J.
	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>

config CRYPTO_SALSA20_586
	tristate "Salsa20 stream cipher algorithm (i586)"
	depends on (X86 || UML_X86) && !64BIT
	select CRYPTO_BLKCIPHER
	select CRYPTO_SALSA20
	help
	  Salsa20 stream cipher algorithm.

	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>

	  The Salsa20 stream cipher algorithm is designed by Daniel J.
	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>

config CRYPTO_SALSA20_X86_64
	tristate "Salsa20 stream cipher algorithm (x86_64)"
	depends on (X86 || UML_X86) && 64BIT
	select CRYPTO_BLKCIPHER
	select CRYPTO_SALSA20
	help
	  Salsa20 stream cipher algorithm.

	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>

	  The Salsa20 stream cipher algorithm is designed by Daniel J.
	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>

config CRYPTO_CHACHA20
	tristate "ChaCha20 cipher algorithm"
	select CRYPTO_BLKCIPHER