ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible (638591cd) · Commits · e / devices / android_kernel_oneplus_sm8150

arch/arm/crypto/aes-armv4.S

+20 −44

Original line number	Diff line number	Diff line
		@@ -34,8 +34,9 @@
		@ A little glue here to select the correct code below for the ARM CPU
		@ that is being targetted.

		#include <linux/linkage.h>

		.text
		.code 32

		.type AES_Te,%object
		.align 5
		@@ -145,10 +146,8 @@ AES_Te:

		@ void AES_encrypt(const unsigned char in, unsigned char out,
		@ const AES_KEY *key) {
		.global AES_encrypt
		.type AES_encrypt,%function
		.align 5
		AES_encrypt:
		ENTRY(AES_encrypt)
		sub r3,pc,#8 @ AES_encrypt
		stmdb sp!,{r1,r4-r12,lr}
		mov r12,r0 @ inp
		@@ -239,15 +238,8 @@ AES_encrypt:
		strb r6,[r12,#14]
		strb r3,[r12,#15]
		#endif
		#if __ARM_ARCH__>=5
		ldmia sp!,{r4-r12,pc}
		#else
		ldmia sp!,{r4-r12,lr}
		tst lr,#1
		moveq pc,lr @ be binary compatible with V4, yet
		.word 0xe12fff1e @ interoperable with Thumb ISA:-)
		#endif
		.size AES_encrypt,.-AES_encrypt
		ENDPROC(AES_encrypt)

		.type _armv4_AES_encrypt,%function
		.align 2
		@@ -386,10 +378,8 @@ _armv4_AES_encrypt:
		ldr pc,[sp],#4 @ pop and return
		.size _armv4_AES_encrypt,.-_armv4_AES_encrypt

		.global private_AES_set_encrypt_key
		.type private_AES_set_encrypt_key,%function
		.align 5
		private_AES_set_encrypt_key:
		ENTRY(private_AES_set_encrypt_key)
		_armv4_AES_set_encrypt_key:
		sub r3,pc,#8 @ AES_set_encrypt_key
		teq r0,#0
		@@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key:

		.Ldone: mov r0,#0
		ldmia sp!,{r4-r12,lr}
		.Labrt: tst lr,#1
		moveq pc,lr @ be binary compatible with V4, yet
		.word 0xe12fff1e @ interoperable with Thumb ISA:-)
		.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
		.Labrt: mov pc,lr
		ENDPROC(private_AES_set_encrypt_key)

		.global private_AES_set_decrypt_key
		.type private_AES_set_decrypt_key,%function
		.align 5
		private_AES_set_decrypt_key:
		ENTRY(private_AES_set_decrypt_key)
		str lr,[sp,#-4]! @ push lr
		#if 0
		@ kernel does both of these in setkey so optimise this bit out by
		@@ -748,15 +734,8 @@ private_AES_set_decrypt_key:
		bne .Lmix

		mov r0,#0
		#if __ARM_ARCH__>=5
		ldmia sp!,{r4-r12,pc}
		#else
		ldmia sp!,{r4-r12,lr}
		tst lr,#1
		moveq pc,lr @ be binary compatible with V4, yet
		.word 0xe12fff1e @ interoperable with Thumb ISA:-)
		#endif
		.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
		ENDPROC(private_AES_set_decrypt_key)

		.type AES_Td,%object
		.align 5
		@@ -862,10 +841,8 @@ AES_Td:

		@ void AES_decrypt(const unsigned char in, unsigned char out,
		@ const AES_KEY *key) {
		.global AES_decrypt
		.type AES_decrypt,%function
		.align 5
		AES_decrypt:
		ENTRY(AES_decrypt)
		sub r3,pc,#8 @ AES_decrypt
		stmdb sp!,{r1,r4-r12,lr}
		mov r12,r0 @ inp
		@@ -956,15 +933,8 @@ AES_decrypt:
		strb r6,[r12,#14]
		strb r3,[r12,#15]
		#endif
		#if __ARM_ARCH__>=5
		ldmia sp!,{r4-r12,pc}
		#else
		ldmia sp!,{r4-r12,lr}
		tst lr,#1
		moveq pc,lr @ be binary compatible with V4, yet
		.word 0xe12fff1e @ interoperable with Thumb ISA:-)
		#endif
		.size AES_decrypt,.-AES_decrypt
		ENDPROC(AES_decrypt)

		.type _armv4_AES_decrypt,%function
		.align 2
		@@ -1064,7 +1034,9 @@ _armv4_AES_decrypt:
		and r9,lr,r1,lsr#8

		ldrb r7,[r10,r7] @ Td4[s1>>0]
		ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
		ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24]
		THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24]
		THUMB( ldrb r1,[r1] )
		ldrb r8,[r10,r8] @ Td4[s1>>16]
		eor r0,r7,r0,lsl#24
		ldrb r9,[r10,r9] @ Td4[s1>>8]
		@@ -1077,7 +1049,9 @@ _armv4_AES_decrypt:
		ldrb r8,[r10,r8] @ Td4[s2>>0]
		and r9,lr,r2,lsr#16

		ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
		ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24]
		THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24]
		THUMB( ldrb r2,[r2] )
		eor r0,r0,r7,lsl#8
		ldrb r9,[r10,r9] @ Td4[s2>>16]
		eor r1,r8,r1,lsl#16
		@@ -1090,7 +1064,9 @@ _armv4_AES_decrypt:
		and r9,lr,r3 @ i2

		ldrb r9,[r10,r9] @ Td4[s3>>0]
		ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
		ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24]
		THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24]
		THUMB( ldrb r3,[r3] )
		eor r0,r0,r7,lsl#16
		ldr r7,[r11,#0]
		eor r1,r1,r8,lsl#8

arch/arm/crypto/sha1-armv4-large.S

+9 −15

Original line number	Diff line number	Diff line
		@@ -51,13 +51,12 @@
		@ Profiler-assisted and platform-specific optimization resulted in 10%
		@ improvement on Cortex A8 core and 12.2 cycles per byte.

		.text
		#include <linux/linkage.h>

		.global sha1_block_data_order
		.type sha1_block_data_order,%function
		.text

		.align 2
		sha1_block_data_order:
		ENTRY(sha1_block_data_order)
		stmdb sp!,{r4-r12,lr}
		add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
		ldmia r0,{r3,r4,r5,r6,r7}
		@@ -194,7 +193,7 @@ sha1_block_data_order:
		eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
		str r9,[r14,#-4]!
		add r3,r3,r10 @ E+=F_00_19(B,C,D)
		teq r14,sp
		cmp r14,sp
		bne .L_00_15 @ [((11+4)5+2)3]
		#if __ARM_ARCH__<7
		ldrb r10,[r1,#2]
		@@ -374,7 +373,9 @@ sha1_block_data_order:
		@ F_xx_xx
		add r3,r3,r9 @ E+=X[i]
		add r3,r3,r10 @ E+=F_20_39(B,C,D)
		teq r14,sp @ preserve carry
		ARM( teq r14,sp ) @ preserve carry
		THUMB( mov r11,sp )
		THUMB( teq r14,r11 ) @ preserve carry
		bne .L_20_39_or_60_79 @ [+((12+3)5+2)4]
		bcs .L_done @ [+((12+3)5+2)4], spare 300 bytes

		@@ -466,7 +467,7 @@ sha1_block_data_order:
		add r3,r3,r9 @ E+=X[i]
		add r3,r3,r10 @ E+=F_40_59(B,C,D)
		add r3,r3,r11,ror#2
		teq r14,sp
		cmp r14,sp
		bne .L_40_59 @ [+((12+5)5+2)4]

		ldr r8,.LK_60_79
		@@ -485,19 +486,12 @@ sha1_block_data_order:
		teq r1,r2
		bne .Lloop @ [+18], total 1307

		#if __ARM_ARCH__>=5
		ldmia sp!,{r4-r12,pc}
		#else
		ldmia sp!,{r4-r12,lr}
		tst lr,#1
		moveq pc,lr @ be binary compatible with V4, yet
		.word 0xe12fff1e @ interoperable with Thumb ISA:-)
		#endif
		.align 2
		.LK_00_19: .word 0x5a827999
		.LK_20_39: .word 0x6ed9eba1
		.LK_40_59: .word 0x8f1bbcdc
		.LK_60_79: .word 0xca62c1d6
		.size sha1_block_data_order,.-sha1_block_data_order
		ENDPROC(sha1_block_data_order)
		.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
		.align 2