Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 792cfa1b authored by Jason A. Donenfeld's avatar Jason A. Donenfeld Committed by Greg Kroah-Hartman
Browse files

UPSTREAM: crypto: arch/lib - limit simd usage to 4k chunks



The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).

Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.

Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.

Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
(cherry picked from commit 706024a52c614b478b63f7728d202532ce6591a9)
Bug: 152722841
Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@google.com>
Change-Id: I7550ac1bf01e6d311af9b9521ebbd16833e1aed0
parent 81be0f94
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -90,9 +90,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
		return;
	}

	do {
		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);

		kernel_neon_begin();
	chacha_doneon(state, dst, src, bytes, nrounds);
		chacha_doneon(state, dst, src, todo, nrounds);
		kernel_neon_end();

		bytes -= todo;
		src += todo;
		dst += todo;
	} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);

+11 −4
Original line number Diff line number Diff line
@@ -159,13 +159,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);

		if (static_branch_likely(&have_neon) && do_neon) {
			do {
				unsigned int todo = min_t(unsigned int, len, SZ_4K);

				kernel_neon_begin();
			poly1305_blocks_neon(&dctx->h, src, len, 1);
				poly1305_blocks_neon(&dctx->h, src, todo, 1);
				kernel_neon_end();

				len -= todo;
				src += todo;
			} while (len);
		} else {
			poly1305_blocks_arm(&dctx->h, src, len, 1);
		}
			src += len;
		}
		nbytes %= POLY1305_BLOCK_SIZE;
	}

+11 −3
Original line number Diff line number Diff line
@@ -86,9 +86,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
	    !may_use_simd())
		return chacha_crypt_generic(state, dst, src, bytes, nrounds);

	do {
		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);

		kernel_neon_begin();
	chacha_doneon(state, dst, src, bytes, nrounds);
		chacha_doneon(state, dst, src, todo, nrounds);
		kernel_neon_end();

		bytes -= todo;
		src += todo;
		dst += todo;
	} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);

+11 −4
Original line number Diff line number Diff line
@@ -142,13 +142,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);

		if (static_branch_likely(&have_neon) && may_use_simd()) {
			do {
				unsigned int todo = min_t(unsigned int, len, SZ_4K);

				kernel_neon_begin();
			poly1305_blocks_neon(&dctx->h, src, len, 1);
				poly1305_blocks_neon(&dctx->h, src, todo, 1);
				kernel_neon_end();

				len -= todo;
				src += todo;
			} while (len);
		} else {
			poly1305_blocks(&dctx->h, src, len, 1);
		}
			src += len;
		}
		nbytes %= POLY1305_BLOCK_SIZE;
	}

+4 −6
Original line number Diff line number Diff line
@@ -31,16 +31,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
			   const u32 inc)
{
	/* SIMD disables preemption, so relax after processing each page. */
	BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
	BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);

	if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
		blake2s_compress_generic(state, block, nblocks, inc);
		return;
	}

	for (;;) {
	do {
		const size_t blocks = min_t(size_t, nblocks,
					    PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
					    SZ_4K / BLAKE2S_BLOCK_SIZE);

		kernel_fpu_begin();
		if (IS_ENABLED(CONFIG_AS_AVX512) &&
@@ -51,10 +51,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
		kernel_fpu_end();

		nblocks -= blocks;
		if (!nblocks)
			break;
		block += blocks * BLAKE2S_BLOCK_SIZE;
	}
	} while (nblocks);
}
EXPORT_SYMBOL(blake2s_compress_arch);

Loading