Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3465893d authored by Herbert Xu's avatar Herbert Xu
Browse files
Merge crypto-2.6 to pick up NEON yield revert.
parents d6e43798 f10dc56c
Loading
Loading
Loading
Loading
+55 −95
Original line number Original line Diff line number Diff line
@@ -19,33 +19,24 @@
	 *			     u32 *macp, u8 const rk[], u32 rounds);
	 *			     u32 *macp, u8 const rk[], u32 rounds);
	 */
	 */
ENTRY(ce_aes_ccm_auth_data)
ENTRY(ce_aes_ccm_auth_data)
	frame_push	7
	ldr	w8, [x3]			/* leftover from prev round? */

	mov	x19, x0
	mov	x20, x1
	mov	x21, x2
	mov	x22, x3
	mov	x23, x4
	mov	x24, x5

	ldr	w25, [x22]			/* leftover from prev round? */
	ld1	{v0.16b}, [x0]			/* load mac */
	ld1	{v0.16b}, [x0]			/* load mac */
	cbz	w25, 1f
	cbz	w8, 1f
	sub	w25, w25, #16
	sub	w8, w8, #16
	eor	v1.16b, v1.16b, v1.16b
	eor	v1.16b, v1.16b, v1.16b
0:	ldrb	w7, [x20], #1			/* get 1 byte of input */
0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
	subs	w21, w21, #1
	subs	w2, w2, #1
	add	w25, w25, #1
	add	w8, w8, #1
	ins	v1.b[0], w7
	ins	v1.b[0], w7
	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
	beq	8f				/* out of input? */
	beq	8f				/* out of input? */
	cbnz	w25, 0b
	cbnz	w8, 0b
	eor	v0.16b, v0.16b, v1.16b
	eor	v0.16b, v0.16b, v1.16b
1:	ld1	{v3.4s}, [x23]			/* load first round key */
1:	ld1	{v3.4s}, [x4]			/* load first round key */
	prfm	pldl1strm, [x20]
	prfm	pldl1strm, [x1]
	cmp	w24, #12			/* which key size? */
	cmp	w5, #12				/* which key size? */
	add	x6, x23, #16
	add	x6, x4, #16
	sub	w7, w24, #2			/* modified # of rounds */
	sub	w7, w5, #2			/* modified # of rounds */
	bmi	2f
	bmi	2f
	bne	5f
	bne	5f
	mov	v5.16b, v3.16b
	mov	v5.16b, v3.16b
@@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data)
	ld1	{v5.4s}, [x6], #16		/* load next round key */
	ld1	{v5.4s}, [x6], #16		/* load next round key */
	bpl	3b
	bpl	3b
	aese	v0.16b, v4.16b
	aese	v0.16b, v4.16b
	subs	w21, w21, #16			/* last data? */
	subs	w2, w2, #16			/* last data? */
	eor	v0.16b, v0.16b, v5.16b		/* final round */
	eor	v0.16b, v0.16b, v5.16b		/* final round */
	bmi	6f
	bmi	6f
	ld1	{v1.16b}, [x20], #16		/* load next input block */
	ld1	{v1.16b}, [x1], #16		/* load next input block */
	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
	beq	6f
	bne	1b

6:	st1	{v0.16b}, [x0]			/* store mac */
	if_will_cond_yield_neon
	st1	{v0.16b}, [x19]			/* store mac */
	do_cond_yield_neon
	ld1	{v0.16b}, [x19]			/* reload mac */
	endif_yield_neon

	b	1b
6:	st1	{v0.16b}, [x19]			/* store mac */
	beq	10f
	beq	10f
	adds	w21, w21, #16
	adds	w2, w2, #16
	beq	10f
	beq	10f
	mov	w25, w21
	mov	w8, w2
7:	ldrb	w7, [x20], #1
7:	ldrb	w7, [x1], #1
	umov	w6, v0.b[0]
	umov	w6, v0.b[0]
	eor	w6, w6, w7
	eor	w6, w6, w7
	strb	w6, [x19], #1
	strb	w6, [x0], #1
	subs	w21, w21, #1
	subs	w2, w2, #1
	beq	10f
	beq	10f
	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
	b	7b
	b	7b
8:	mov	w7, w25
8:	mov	w7, w8
	add	w25, w25, #16
	add	w8, w8, #16
9:	ext	v1.16b, v1.16b, v1.16b, #1
9:	ext	v1.16b, v1.16b, v1.16b, #1
	adds	w7, w7, #1
	adds	w7, w7, #1
	bne	9b
	bne	9b
	eor	v0.16b, v0.16b, v1.16b
	eor	v0.16b, v0.16b, v1.16b
	st1	{v0.16b}, [x19]
	st1	{v0.16b}, [x0]
10:	str	w25, [x22]
10:	str	w8, [x3]

	frame_pop
	ret
	ret
ENDPROC(ce_aes_ccm_auth_data)
ENDPROC(ce_aes_ccm_auth_data)


@@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final)
ENDPROC(ce_aes_ccm_final)
ENDPROC(ce_aes_ccm_final)


	.macro	aes_ccm_do_crypt,enc
	.macro	aes_ccm_do_crypt,enc
	frame_push	8
	ldr	x8, [x6, #8]			/* load lower ctr */

	ld1	{v0.16b}, [x5]			/* load mac */
	mov	x19, x0
CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
	mov	x20, x1
	mov	x21, x2
	mov	x22, x3
	mov	x23, x4
	mov	x24, x5
	mov	x25, x6

	ldr	x26, [x25, #8]			/* load lower ctr */
	ld1	{v0.16b}, [x24]			/* load mac */
CPU_LE(	rev	x26, x26		)	/* keep swabbed ctr in reg */
0:	/* outer loop */
0:	/* outer loop */
	ld1	{v1.8b}, [x25]			/* load upper ctr */
	ld1	{v1.8b}, [x6]			/* load upper ctr */
	prfm	pldl1strm, [x20]
	prfm	pldl1strm, [x1]
	add	x26, x26, #1
	add	x8, x8, #1
	rev	x9, x26
	rev	x9, x8
	cmp	w23, #12			/* which key size? */
	cmp	w4, #12				/* which key size? */
	sub	w7, w23, #2			/* get modified # of rounds */
	sub	w7, w4, #2			/* get modified # of rounds */
	ins	v1.d[1], x9			/* no carry in lower ctr */
	ins	v1.d[1], x9			/* no carry in lower ctr */
	ld1	{v3.4s}, [x22]			/* load first round key */
	ld1	{v3.4s}, [x3]			/* load first round key */
	add	x10, x22, #16
	add	x10, x3, #16
	bmi	1f
	bmi	1f
	bne	4f
	bne	4f
	mov	v5.16b, v3.16b
	mov	v5.16b, v3.16b
@@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
	bpl	2b
	bpl	2b
	aese	v0.16b, v4.16b
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	aese	v1.16b, v4.16b
	subs	w21, w21, #16
	subs	w2, w2, #16
	bmi	7f				/* partial block? */
	bmi	6f				/* partial block? */
	ld1	{v2.16b}, [x20], #16		/* load next input block */
	ld1	{v2.16b}, [x1], #16		/* load next input block */
	.if	\enc == 1
	.if	\enc == 1
	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
@@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
	.endif
	.endif
	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
	st1	{v1.16b}, [x19], #16		/* write output block */
	st1	{v1.16b}, [x0], #16		/* write output block */
	beq	5f
	bne	0b

CPU_LE(	rev	x8, x8			)
	if_will_cond_yield_neon
	st1	{v0.16b}, [x5]			/* store mac */
	st1	{v0.16b}, [x24]			/* store mac */
	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
	do_cond_yield_neon
5:	ret
	ld1	{v0.16b}, [x24]			/* reload mac */

	endif_yield_neon
6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */

	b	0b
5:
CPU_LE(	rev	x26, x26			)
	st1	{v0.16b}, [x24]			/* store mac */
	str	x26, [x25, #8]			/* store lsb end of ctr (BE) */

6:	frame_pop
	ret

7:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
	st1	{v0.16b}, [x24]			/* store mac */
	st1	{v0.16b}, [x5]			/* store mac */
	add	w21, w21, #16			/* process partial tail block */
	add	w2, w2, #16			/* process partial tail block */
8:	ldrb	w9, [x20], #1			/* get 1 byte of input */
7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
	umov	w6, v1.b[0]			/* get top crypted ctr byte */
	umov	w6, v1.b[0]			/* get top crypted ctr byte */
	umov	w7, v0.b[0]			/* get top mac byte */
	umov	w7, v0.b[0]			/* get top mac byte */
	.if	\enc == 1
	.if	\enc == 1
@@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 )
	eor	w9, w9, w6
	eor	w9, w9, w6
	eor	w7, w7, w9
	eor	w7, w7, w9
	.endif
	.endif
	strb	w9, [x19], #1			/* store out byte */
	strb	w9, [x0], #1			/* store out byte */
	strb	w7, [x24], #1			/* store mac byte */
	strb	w7, [x5], #1			/* store mac byte */
	subs	w21, w21, #1
	subs	w2, w2, #1
	beq	6b
	beq	5b
	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
	b	8b
	b	7b
	.endm
	.endm


	/*
	/*
+25 −51
Original line number Original line Diff line number Diff line
@@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8)
	.endm
	.endm


	.macro		pmull_gcm_do_crypt, enc
	.macro		pmull_gcm_do_crypt, enc
	frame_push	10
	ld1		{SHASH.2d}, [x4]
	ld1		{XL.2d}, [x1]
	ldr		x8, [x5, #8]			// load lower counter


	mov		x19, x0
	load_round_keys	w7, x6
	mov		x20, x1
	mov		x21, x2
	mov		x22, x3
	mov		x23, x4
	mov		x24, x5
	mov		x25, x6
	mov		x26, x7
	.if		\enc == 1
	ldr		x27, [sp, #96]			// first stacked arg
	.endif

	ldr		x28, [x24, #8]			// load lower counter
CPU_LE(	rev		x28, x28	)

0:	mov		x0, x25
	load_round_keys	w26, x0
	ld1		{SHASH.2d}, [x23]
	ld1		{XL.2d}, [x20]


	movi		MASK.16b, #0xe1
	movi		MASK.16b, #0xe1
	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
CPU_LE(	rev		x8, x8		)
	shl		MASK.2d, MASK.2d, #57
	shl		MASK.2d, MASK.2d, #57
	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
	eor		SHASH2.16b, SHASH2.16b, SHASH.16b


	.if		\enc == 1
	.if		\enc == 1
	ld1		{KS.16b}, [x27]
	ldr		x10, [sp]
	ld1		{KS.16b}, [x10]
	.endif
	.endif


1:	ld1		{CTR.8b}, [x24]			// load upper counter
0:	ld1		{CTR.8b}, [x5]			// load upper counter
	ld1		{INP.16b}, [x22], #16
	ld1		{INP.16b}, [x3], #16
	rev		x9, x28
	rev		x9, x8
	add		x28, x28, #1
	add		x8, x8, #1
	sub		w19, w19, #1
	sub		w0, w0, #1
	ins		CTR.d[1], x9			// set lower counter
	ins		CTR.d[1], x9			// set lower counter


	.if		\enc == 1
	.if		\enc == 1
	eor		INP.16b, INP.16b, KS.16b	// encrypt input
	eor		INP.16b, INP.16b, KS.16b	// encrypt input
	st1		{INP.16b}, [x21], #16
	st1		{INP.16b}, [x2], #16
	.endif
	.endif


	rev64		T1.16b, INP.16b
	rev64		T1.16b, INP.16b


	cmp		w26, #12
	cmp		w7, #12
	b.ge		4f				// AES-192/256?
	b.ge		2f				// AES-192/256?


2:	enc_round	CTR, v21
1:	enc_round	CTR, v21


	ext		T2.16b, XL.16b, XL.16b, #8
	ext		T2.16b, XL.16b, XL.16b, #8
	ext		IN1.16b, T1.16b, T1.16b, #8
	ext		IN1.16b, T1.16b, T1.16b, #8
@@ -425,39 +411,27 @@ CPU_LE( rev x28, x28 )


	.if		\enc == 0
	.if		\enc == 0
	eor		INP.16b, INP.16b, KS.16b
	eor		INP.16b, INP.16b, KS.16b
	st1		{INP.16b}, [x21], #16
	st1		{INP.16b}, [x2], #16
	.endif
	.endif


	cbz		w19, 3f
	cbnz		w0, 0b


	if_will_cond_yield_neon
CPU_LE(	rev		x8, x8		)
	st1		{XL.2d}, [x20]
	st1		{XL.2d}, [x1]
	.if		\enc == 1
	str		x8, [x5, #8]			// store lower counter
	st1		{KS.16b}, [x27]
	.endif
	do_cond_yield_neon
	b		0b
	endif_yield_neon


	b		1b

3:	st1		{XL.2d}, [x20]
	.if		\enc == 1
	.if		\enc == 1
	st1		{KS.16b}, [x27]
	st1		{KS.16b}, [x10]
	.endif
	.endif


CPU_LE(	rev		x28, x28	)
	str		x28, [x24, #8]			// store lower counter

	frame_pop
	ret
	ret


4:	b.eq		5f				// AES-192?
2:	b.eq		3f				// AES-192?
	enc_round	CTR, v17
	enc_round	CTR, v17
	enc_round	CTR, v18
	enc_round	CTR, v18
5:	enc_round	CTR, v19
3:	enc_round	CTR, v19
	enc_round	CTR, v20
	enc_round	CTR, v20
	b		2b
	b		1b
	.endm
	.endm


	/*
	/*
+6 −2
Original line number Original line Diff line number Diff line
@@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
		return;
		return;
	}
	}


	count -= initial;

	if (initial)
	if (initial)
		asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
		asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
			      : "+S"(input), "+D"(output)
			      : "+S"(input), "+D"(output)
@@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,


	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
		      : "+S"(input), "+D"(output)
		      : "+S"(input), "+D"(output)
		      : "d"(control_word), "b"(key), "c"(count - initial));
		      : "d"(control_word), "b"(key), "c"(count));
}
}


static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
@@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
	if (count < cbc_fetch_blocks)
	if (count < cbc_fetch_blocks)
		return cbc_crypt(input, output, key, iv, control_word, count);
		return cbc_crypt(input, output, key, iv, control_word, count);


	count -= initial;

	if (initial)
	if (initial)
		asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
		asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
			      : "+S" (input), "+D" (output), "+a" (iv)
			      : "+S" (input), "+D" (output), "+a" (iv)
@@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,


	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
		      : "+S" (input), "+D" (output), "+a" (iv)
		      : "+S" (input), "+D" (output), "+a" (iv)
		      : "d" (control_word), "b" (key), "c" (count-initial));
		      : "d" (control_word), "b" (key), "c" (count));
	return iv;
	return iv;
}
}