Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8e492eff authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/ghash-ce - replace NEON yield check with block limit



Checking the TIF_NEED_RESCHED flag is disproportionately costly on cores
with fast crypto instructions and comparatively slow memory accesses.

On algorithms such as GHASH, which executes at ~1 cycle per byte on
cores that implement support for 64 bit polynomial multiplication,
there is really no need to check the TIF_NEED_RESCHED particularly
often, and so we can remove the NEON yield check from the assembler
routines.

However, unlike the AEAD or skcipher APIs, the shash/ahash APIs take
arbitrary input lengths, and so there needs to be some sanity check
to ensure that we don't hog the CPU for excessive amounts of time.

So let's simply cap the maximum input size that is processed in one go
to 64 KB.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 8418cf54
Loading
Loading
Loading
Loading
+11 −28
Original line number Diff line number Diff line
@@ -213,31 +213,23 @@
	.endm

	.macro		__pmull_ghash, pn
	frame_push	5

	mov		x19, x0
	mov		x20, x1
	mov		x21, x2
	mov		x22, x3
	mov		x23, x4

0:	ld1		{SHASH.2d}, [x22]
	ld1		{XL.2d}, [x20]
	ld1		{SHASH.2d}, [x3]
	ld1		{XL.2d}, [x1]
	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
	eor		SHASH2.16b, SHASH2.16b, SHASH.16b

	__pmull_pre_\pn

	/* do the head block first, if supplied */
	cbz		x23, 1f
	ld1		{T1.2d}, [x23]
	mov		x23, xzr
	b		2f
	cbz		x4, 0f
	ld1		{T1.2d}, [x4]
	mov		x4, xzr
	b		1f

1:	ld1		{T1.2d}, [x21], #16
	sub		w19, w19, #1
0:	ld1		{T1.2d}, [x2], #16
	sub		w0, w0, #1

2:	/* multiply XL by SHASH in GF(2^128) */
1:	/* multiply XL by SHASH in GF(2^128) */
CPU_LE(	rev64		T1.16b, T1.16b	)

	ext		T2.16b, XL.16b, XL.16b, #8
@@ -259,18 +251,9 @@ CPU_LE( rev64 T1.16b, T1.16b )
	eor		T2.16b, T2.16b, XH.16b
	eor		XL.16b, XL.16b, T2.16b

	cbz		w19, 3f

	if_will_cond_yield_neon
	st1		{XL.2d}, [x20]
	do_cond_yield_neon
	b		0b
	endif_yield_neon

	b		1b
	cbnz		w0, 0b

3:	st1		{XL.2d}, [x20]
	frame_pop
	st1		{XL.2d}, [x1]
	ret
	.endm

+12 −4
Original line number Diff line number Diff line
@@ -113,6 +113,9 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
	}
}

/* avoid hogging the CPU for too long */
#define MAX_BLOCKS	(SZ_64K / GHASH_BLOCK_SIZE)

static int ghash_update(struct shash_desc *desc, const u8 *src,
			unsigned int len)
{
@@ -136,11 +139,16 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
		blocks = len / GHASH_BLOCK_SIZE;
		len %= GHASH_BLOCK_SIZE;

		ghash_do_update(blocks, ctx->digest, src, key,
		do {
			int chunk = min(blocks, MAX_BLOCKS);

			ghash_do_update(chunk, ctx->digest, src, key,
					partial ? ctx->buf : NULL);

		src += blocks * GHASH_BLOCK_SIZE;
			blocks -= chunk;
			src += chunk * GHASH_BLOCK_SIZE;
			partial = 0;
		} while (unlikely(blocks > 0));
	}
	if (len)
		memcpy(ctx->buf + partial, src, len);