crypto: arm64/nhpoly1305 - add NEON-accelerated NHPoly1305 (a00fa0c8) · Commits · e / devices / android_kernel_fairphone_FP5

arch/arm64/crypto/Kconfig

+5 −0

Original line number	Diff line number	Diff line
		@@ -106,6 +106,11 @@ config CRYPTO_CHACHA20_NEON
		select CRYPTO_BLKCIPHER
		select CRYPTO_CHACHA20

		config CRYPTO_NHPOLY1305_NEON
		tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
		depends on KERNEL_MODE_NEON
		select CRYPTO_NHPOLY1305

		config CRYPTO_AES_ARM64_BS
		tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
		depends on KERNEL_MODE_NEON

arch/arm64/crypto/Makefile

+3 −0

Original line number	Diff line number	Diff line
		@@ -53,6 +53,9 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
		obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
		chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o

		obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
		nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o

		obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
		aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o

arch/arm64/crypto/nh-neon-core.S

0 → 100644

+103 −0

Original line number	Diff line number	Diff line
		/* SPDX-License-Identifier: GPL-2.0 */
		/*
		* NH - ε-almost-universal hash function, ARM64 NEON accelerated version
		*
		* Copyright 2018 Google LLC
		*
		* Author: Eric Biggers <ebiggers@google.com>
		*/

		#include <linux/linkage.h>

		KEY .req x0
		MESSAGE .req x1
		MESSAGE_LEN .req x2
		HASH .req x3

		PASS0_SUMS .req v0
		PASS1_SUMS .req v1
		PASS2_SUMS .req v2
		PASS3_SUMS .req v3
		K0 .req v4
		K1 .req v5
		K2 .req v6
		K3 .req v7
		T0 .req v8
		T1 .req v9
		T2 .req v10
		T3 .req v11
		T4 .req v12
		T5 .req v13
		T6 .req v14
		T7 .req v15

		.macro _nh_stride k0, k1, k2, k3

		// Load next message stride
		ld1 {T3.16b}, [MESSAGE], #16

		// Load next key stride
		ld1 {\k3\().4s}, [KEY], #16

		// Add message words to key words
		add T0.4s, T3.4s, \k0\().4s
		add T1.4s, T3.4s, \k1\().4s
		add T2.4s, T3.4s, \k2\().4s
		add T3.4s, T3.4s, \k3\().4s

		// Multiply 32x32 => 64 and accumulate
		mov T4.d[0], T0.d[1]
		mov T5.d[0], T1.d[1]
		mov T6.d[0], T2.d[1]
		mov T7.d[0], T3.d[1]
		umlal PASS0_SUMS.2d, T0.2s, T4.2s
		umlal PASS1_SUMS.2d, T1.2s, T5.2s
		umlal PASS2_SUMS.2d, T2.2s, T6.2s
		umlal PASS3_SUMS.2d, T3.2s, T7.2s
		.endm

		/*
		* void nh_neon(const u32 key, const u8 message, size_t message_len,
		* u8 hash[NH_HASH_BYTES])
		*
		* It's guaranteed that message_len % 16 == 0.
		*/
		ENTRY(nh_neon)

		ld1 {K0.4s,K1.4s}, [KEY], #32
		movi PASS0_SUMS.2d, #0
		movi PASS1_SUMS.2d, #0
		ld1 {K2.4s}, [KEY], #16
		movi PASS2_SUMS.2d, #0
		movi PASS3_SUMS.2d, #0

		subs MESSAGE_LEN, MESSAGE_LEN, #64
		blt .Lloop4_done
		.Lloop4:
		_nh_stride K0, K1, K2, K3
		_nh_stride K1, K2, K3, K0
		_nh_stride K2, K3, K0, K1
		_nh_stride K3, K0, K1, K2
		subs MESSAGE_LEN, MESSAGE_LEN, #64
		bge .Lloop4

		.Lloop4_done:
		ands MESSAGE_LEN, MESSAGE_LEN, #63
		beq .Ldone
		_nh_stride K0, K1, K2, K3

		subs MESSAGE_LEN, MESSAGE_LEN, #16
		beq .Ldone
		_nh_stride K1, K2, K3, K0

		subs MESSAGE_LEN, MESSAGE_LEN, #16
		beq .Ldone
		_nh_stride K2, K3, K0, K1

		.Ldone:
		// Sum the accumulators for each pass, then store the sums to 'hash'
		addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
		addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
		st1 {T0.16b,T1.16b}, [HASH]
		ret
		ENDPROC(nh_neon)

arch/arm64/crypto/nhpoly1305-neon-glue.c

0 → 100644

+77 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0
		/*
		* NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
		* (ARM64 NEON accelerated version)
		*
		* Copyright 2018 Google LLC
		*/

		#include <asm/neon.h>
		#include <asm/simd.h>
		#include <crypto/internal/hash.h>
		#include <crypto/nhpoly1305.h>
		#include <linux/module.h>

		asmlinkage void nh_neon(const u32 key, const u8 message, size_t message_len,
		u8 hash[NH_HASH_BYTES]);

		/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
		static void _nh_neon(const u32 key, const u8 message, size_t message_len,
		__le64 hash[NH_NUM_PASSES])
		{
		nh_neon(key, message, message_len, (u8 *)hash);
		}

		static int nhpoly1305_neon_update(struct shash_desc *desc,
		const u8 *src, unsigned int srclen)
		{
		if (srclen < 64 \|\| !may_use_simd())
		return crypto_nhpoly1305_update(desc, src, srclen);

		do {
		unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);

		kernel_neon_begin();
		crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
		kernel_neon_end();
		src += n;
		srclen -= n;
		} while (srclen);
		return 0;
		}

		static struct shash_alg nhpoly1305_alg = {
		.base.cra_name = "nhpoly1305",
		.base.cra_driver_name = "nhpoly1305-neon",
		.base.cra_priority = 200,
		.base.cra_ctxsize = sizeof(struct nhpoly1305_key),
		.base.cra_module = THIS_MODULE,
		.digestsize = POLY1305_DIGEST_SIZE,
		.init = crypto_nhpoly1305_init,
		.update = nhpoly1305_neon_update,
		.final = crypto_nhpoly1305_final,
		.setkey = crypto_nhpoly1305_setkey,
		.descsize = sizeof(struct nhpoly1305_state),
		};

		static int __init nhpoly1305_mod_init(void)
		{
		if (!(elf_hwcap & HWCAP_ASIMD))
		return -ENODEV;

		return crypto_register_shash(&nhpoly1305_alg);
		}

		static void __exit nhpoly1305_mod_exit(void)
		{
		crypto_unregister_shash(&nhpoly1305_alg);
		}

		module_init(nhpoly1305_mod_init);
		module_exit(nhpoly1305_mod_exit);

		MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
		MODULE_LICENSE("GPL v2");
		MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
		MODULE_ALIAS_CRYPTO("nhpoly1305");
		MODULE_ALIAS_CRYPTO("nhpoly1305-neon");