Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7c50136a authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/aes-ghash - yield NEON after every block of input



Avoid excessive scheduling delays under a preemptible kernel by
yielding the NEON after every block of input.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 20ab6332
Loading
Loading
Loading
Loading
+80 −33
Original line number Diff line number Diff line
@@ -213,22 +213,31 @@
	.endm

	.macro		__pmull_ghash, pn
	ld1		{SHASH.2d}, [x3]
	ld1		{XL.2d}, [x1]
	frame_push	5

	mov		x19, x0
	mov		x20, x1
	mov		x21, x2
	mov		x22, x3
	mov		x23, x4

0:	ld1		{SHASH.2d}, [x22]
	ld1		{XL.2d}, [x20]
	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
	eor		SHASH2.16b, SHASH2.16b, SHASH.16b

	__pmull_pre_\pn

	/* do the head block first, if supplied */
	cbz		x4, 0f
	ld1		{T1.2d}, [x4]
	b		1f
	cbz		x23, 1f
	ld1		{T1.2d}, [x23]
	mov		x23, xzr
	b		2f

0:	ld1		{T1.2d}, [x2], #16
	sub		w0, w0, #1
1:	ld1		{T1.2d}, [x21], #16
	sub		w19, w19, #1

1:	/* multiply XL by SHASH in GF(2^128) */
2:	/* multiply XL by SHASH in GF(2^128) */
CPU_LE(	rev64		T1.16b, T1.16b	)

	ext		T2.16b, XL.16b, XL.16b, #8
@@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b )
	eor		T2.16b, T2.16b, XH.16b
	eor		XL.16b, XL.16b, T2.16b

	cbnz		w0, 0b
	cbz		w19, 3f

	if_will_cond_yield_neon
	st1		{XL.2d}, [x20]
	do_cond_yield_neon
	b		0b
	endif_yield_neon

	b		1b

	st1		{XL.2d}, [x1]
3:	st1		{XL.2d}, [x20]
	frame_pop
	ret
	.endm

@@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8)
	.endm

	.macro		pmull_gcm_do_crypt, enc
	ld1		{SHASH.2d}, [x4]
	ld1		{XL.2d}, [x1]
	ldr		x8, [x5, #8]			// load lower counter
	frame_push	10

	mov		x19, x0
	mov		x20, x1
	mov		x21, x2
	mov		x22, x3
	mov		x23, x4
	mov		x24, x5
	mov		x25, x6
	mov		x26, x7
	.if		\enc == 1
	ldr		x27, [sp, #96]			// first stacked arg
	.endif

	ldr		x28, [x24, #8]			// load lower counter
CPU_LE(	rev		x28, x28	)

0:	mov		x0, x25
	load_round_keys	w26, x0
	ld1		{SHASH.2d}, [x23]
	ld1		{XL.2d}, [x20]

	movi		MASK.16b, #0xe1
	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
CPU_LE(	rev		x8, x8		)
	shl		MASK.2d, MASK.2d, #57
	eor		SHASH2.16b, SHASH2.16b, SHASH.16b

	.if		\enc == 1
	ld1		{KS.16b}, [x7]
	ld1		{KS.16b}, [x27]
	.endif

0:	ld1		{CTR.8b}, [x5]			// load upper counter
	ld1		{INP.16b}, [x3], #16
	rev		x9, x8
	add		x8, x8, #1
	sub		w0, w0, #1
1:	ld1		{CTR.8b}, [x24]			// load upper counter
	ld1		{INP.16b}, [x22], #16
	rev		x9, x28
	add		x28, x28, #1
	sub		w19, w19, #1
	ins		CTR.d[1], x9			// set lower counter

	.if		\enc == 1
	eor		INP.16b, INP.16b, KS.16b	// encrypt input
	st1		{INP.16b}, [x2], #16
	st1		{INP.16b}, [x21], #16
	.endif

	rev64		T1.16b, INP.16b

	cmp		w6, #12
	b.ge		2f				// AES-192/256?
	cmp		w26, #12
	b.ge		4f				// AES-192/256?

1:	enc_round	CTR, v21
2:	enc_round	CTR, v21

	ext		T2.16b, XL.16b, XL.16b, #8
	ext		IN1.16b, T1.16b, T1.16b, #8
@@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 )

	.if		\enc == 0
	eor		INP.16b, INP.16b, KS.16b
	st1		{INP.16b}, [x2], #16
	st1		{INP.16b}, [x21], #16
	.endif

	cbnz		w0, 0b
	cbz		w19, 3f

CPU_LE(	rev		x8, x8		)
	st1		{XL.2d}, [x1]
	str		x8, [x5, #8]			// store lower counter
	if_will_cond_yield_neon
	st1		{XL.2d}, [x20]
	.if		\enc == 1
	st1		{KS.16b}, [x27]
	.endif
	do_cond_yield_neon
	b		0b
	endif_yield_neon

	b		1b

3:	st1		{XL.2d}, [x20]
	.if		\enc == 1
	st1		{KS.16b}, [x7]
	st1		{KS.16b}, [x27]
	.endif

CPU_LE(	rev		x28, x28	)
	str		x28, [x24, #8]			// store lower counter

	frame_pop
	ret

2:	b.eq		3f				// AES-192?
4:	b.eq		5f				// AES-192?
	enc_round	CTR, v17
	enc_round	CTR, v18
3:	enc_round	CTR, v19
5:	enc_round	CTR, v19
	enc_round	CTR, v20
	b		1b
	b		2b
	.endm

	/*
+17 −11
Original line number Diff line number Diff line
@@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,

asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
				  const u8 src[], struct ghash_key const *k,
				  u8 ctr[], int rounds, u8 ks[]);
				  u8 ctr[], u32 const rk[], int rounds,
				  u8 ks[]);

asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
				  const u8 src[], struct ghash_key const *k,
				  u8 ctr[], int rounds);
				  u8 ctr[], u32 const rk[], int rounds);

asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
					u32 const rk[], int rounds);
@@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req)
		pmull_gcm_encrypt_block(ks, iv, NULL,
					num_rounds(&ctx->aes_key));
		put_unaligned_be32(3, iv + GCM_IV_SIZE);
		kernel_neon_end();

		err = skcipher_walk_aead_encrypt(&walk, req, true);
		err = skcipher_walk_aead_encrypt(&walk, req, false);

		while (walk.nbytes >= AES_BLOCK_SIZE) {
			int blocks = walk.nbytes / AES_BLOCK_SIZE;

			kernel_neon_begin();
			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
					  walk.src.virt.addr, &ctx->ghash_key,
					  iv, num_rounds(&ctx->aes_key), ks);
					  iv, ctx->aes_key.key_enc,
					  num_rounds(&ctx->aes_key), ks);
			kernel_neon_end();

			err = skcipher_walk_done(&walk,
						 walk.nbytes % AES_BLOCK_SIZE);
		}
		kernel_neon_end();
	} else {
		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
				    num_rounds(&ctx->aes_key));
		put_unaligned_be32(2, iv + GCM_IV_SIZE);

		err = skcipher_walk_aead_encrypt(&walk, req, true);
		err = skcipher_walk_aead_encrypt(&walk, req, false);

		while (walk.nbytes >= AES_BLOCK_SIZE) {
			int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -467,15 +471,19 @@ static int gcm_decrypt(struct aead_request *req)
		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
					num_rounds(&ctx->aes_key));
		put_unaligned_be32(2, iv + GCM_IV_SIZE);
		kernel_neon_end();

		err = skcipher_walk_aead_decrypt(&walk, req, true);
		err = skcipher_walk_aead_decrypt(&walk, req, false);

		while (walk.nbytes >= AES_BLOCK_SIZE) {
			int blocks = walk.nbytes / AES_BLOCK_SIZE;

			kernel_neon_begin();
			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
					  walk.src.virt.addr, &ctx->ghash_key,
					  iv, num_rounds(&ctx->aes_key));
					  iv, ctx->aes_key.key_enc,
					  num_rounds(&ctx->aes_key));
			kernel_neon_end();

			err = skcipher_walk_done(&walk,
						 walk.nbytes % AES_BLOCK_SIZE);
@@ -483,14 +491,12 @@ static int gcm_decrypt(struct aead_request *req)
		if (walk.nbytes)
			pmull_gcm_encrypt_block(iv, iv, NULL,
						num_rounds(&ctx->aes_key));

		kernel_neon_end();
	} else {
		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
				    num_rounds(&ctx->aes_key));
		put_unaligned_be32(2, iv + GCM_IV_SIZE);

		err = skcipher_walk_aead_decrypt(&walk, req, true);
		err = skcipher_walk_aead_decrypt(&walk, req, false);

		while (walk.nbytes >= AES_BLOCK_SIZE) {
			int blocks = walk.nbytes / AES_BLOCK_SIZE;