UPSTREAM: crypto: arch/lib - limit simd usage to 4k chunks (792cfa1b) · Commits · e / devices / android_kernel_oneplus_sm7250

arch/arm/crypto/chacha-glue.c

+11 −3

Original line number	Diff line number	Diff line
		@@ -90,9 +90,17 @@ void chacha_crypt_arch(u32 state, u8 dst, const u8 *src, unsigned int bytes,
		return;
		}

		do {
		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);

		kernel_neon_begin();
		chacha_doneon(state, dst, src, bytes, nrounds);
		chacha_doneon(state, dst, src, todo, nrounds);
		kernel_neon_end();

		bytes -= todo;
		src += todo;
		dst += todo;
		} while (bytes);
		}
		EXPORT_SYMBOL(chacha_crypt_arch);

+11 −4

Original line number	Diff line number	Diff line
		@@ -159,13 +159,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx dctx, const u8 src,
		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);

		if (static_branch_likely(&have_neon) && do_neon) {
		do {
		unsigned int todo = min_t(unsigned int, len, SZ_4K);

		kernel_neon_begin();
		poly1305_blocks_neon(&dctx->h, src, len, 1);
		poly1305_blocks_neon(&dctx->h, src, todo, 1);
		kernel_neon_end();

		len -= todo;
		src += todo;
		} while (len);
		} else {
		poly1305_blocks_arm(&dctx->h, src, len, 1);
		}
		src += len;
		}
		nbytes %= POLY1305_BLOCK_SIZE;
		}

+11 −3

Original line number	Diff line number	Diff line
		@@ -86,9 +86,17 @@ void chacha_crypt_arch(u32 state, u8 dst, const u8 *src, unsigned int bytes,
		!may_use_simd())
		return chacha_crypt_generic(state, dst, src, bytes, nrounds);

		do {
		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);

		kernel_neon_begin();
		chacha_doneon(state, dst, src, bytes, nrounds);
		chacha_doneon(state, dst, src, todo, nrounds);
		kernel_neon_end();

		bytes -= todo;
		src += todo;
		dst += todo;
		} while (bytes);
		}
		EXPORT_SYMBOL(chacha_crypt_arch);

+11 −4

Original line number	Diff line number	Diff line
		@@ -142,13 +142,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx dctx, const u8 src,
		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);

		if (static_branch_likely(&have_neon) && may_use_simd()) {
		do {
		unsigned int todo = min_t(unsigned int, len, SZ_4K);

		kernel_neon_begin();
		poly1305_blocks_neon(&dctx->h, src, len, 1);
		poly1305_blocks_neon(&dctx->h, src, todo, 1);
		kernel_neon_end();

		len -= todo;
		src += todo;
		} while (len);
		} else {
		poly1305_blocks(&dctx->h, src, len, 1);
		}
		src += len;
		}
		nbytes %= POLY1305_BLOCK_SIZE;
		}

+4 −6

Original line number	Diff line number	Diff line
		@@ -31,16 +31,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
		const u32 inc)
		{
		/* SIMD disables preemption, so relax after processing each page. */
		BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
		BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);

		if (!static_branch_likely(&blake2s_use_ssse3) \|\| !may_use_simd()) {
		blake2s_compress_generic(state, block, nblocks, inc);
		return;
		}

		for (;;) {
		do {
		const size_t blocks = min_t(size_t, nblocks,
		PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
		SZ_4K / BLAKE2S_BLOCK_SIZE);

		kernel_fpu_begin();
		if (IS_ENABLED(CONFIG_AS_AVX512) &&
		@@ -51,10 +51,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
		kernel_fpu_end();

		nblocks -= blocks;
		if (!nblocks)
		break;
		block += blocks * BLAKE2S_BLOCK_SIZE;
		}
		} while (nblocks);
		}
		EXPORT_SYMBOL(blake2s_compress_arch);