Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a00fa0c8 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu
Browse files

crypto: arm64/nhpoly1305 - add NEON-accelerated NHPoly1305



Add an ARM64 NEON implementation of NHPoly1305, an ε-almost-∆-universal
hash function used in the Adiantum encryption mode.  For now, only the
NH portion is actually NEON-accelerated; the Poly1305 part is less
performance-critical so is just implemented in C.

Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> # big-endian
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 88d905e2
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -106,6 +106,11 @@ config CRYPTO_CHACHA20_NEON
	select CRYPTO_BLKCIPHER
	select CRYPTO_CHACHA20

config CRYPTO_NHPOLY1305_NEON
	tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
	depends on KERNEL_MODE_NEON
	select CRYPTO_NHPOLY1305

config CRYPTO_AES_ARM64_BS
	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
	depends on KERNEL_MODE_NEON
+3 −0
Original line number Diff line number Diff line
@@ -53,6 +53,9 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o

obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o

obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o

+103 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
 *
 * Copyright 2018 Google LLC
 *
 * Author: Eric Biggers <ebiggers@google.com>
 */

#include <linux/linkage.h>

	KEY		.req	x0
	MESSAGE		.req	x1
	MESSAGE_LEN	.req	x2
	HASH		.req	x3

	PASS0_SUMS	.req	v0
	PASS1_SUMS	.req	v1
	PASS2_SUMS	.req	v2
	PASS3_SUMS	.req	v3
	K0		.req	v4
	K1		.req	v5
	K2		.req	v6
	K3		.req	v7
	T0		.req	v8
	T1		.req	v9
	T2		.req	v10
	T3		.req	v11
	T4		.req	v12
	T5		.req	v13
	T6		.req	v14
	T7		.req	v15

.macro _nh_stride	k0, k1, k2, k3

	// Load next message stride
	ld1		{T3.16b}, [MESSAGE], #16

	// Load next key stride
	ld1		{\k3\().4s}, [KEY], #16

	// Add message words to key words
	add		T0.4s, T3.4s, \k0\().4s
	add		T1.4s, T3.4s, \k1\().4s
	add		T2.4s, T3.4s, \k2\().4s
	add		T3.4s, T3.4s, \k3\().4s

	// Multiply 32x32 => 64 and accumulate
	mov		T4.d[0], T0.d[1]
	mov		T5.d[0], T1.d[1]
	mov		T6.d[0], T2.d[1]
	mov		T7.d[0], T3.d[1]
	umlal		PASS0_SUMS.2d, T0.2s, T4.2s
	umlal		PASS1_SUMS.2d, T1.2s, T5.2s
	umlal		PASS2_SUMS.2d, T2.2s, T6.2s
	umlal		PASS3_SUMS.2d, T3.2s, T7.2s
.endm

/*
 * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
 *		u8 hash[NH_HASH_BYTES])
 *
 * It's guaranteed that message_len % 16 == 0.
 */
ENTRY(nh_neon)

	ld1		{K0.4s,K1.4s}, [KEY], #32
	  movi		PASS0_SUMS.2d, #0
	  movi		PASS1_SUMS.2d, #0
	ld1		{K2.4s}, [KEY], #16
	  movi		PASS2_SUMS.2d, #0
	  movi		PASS3_SUMS.2d, #0

	subs		MESSAGE_LEN, MESSAGE_LEN, #64
	blt		.Lloop4_done
.Lloop4:
	_nh_stride	K0, K1, K2, K3
	_nh_stride	K1, K2, K3, K0
	_nh_stride	K2, K3, K0, K1
	_nh_stride	K3, K0, K1, K2
	subs		MESSAGE_LEN, MESSAGE_LEN, #64
	bge		.Lloop4

.Lloop4_done:
	ands		MESSAGE_LEN, MESSAGE_LEN, #63
	beq		.Ldone
	_nh_stride	K0, K1, K2, K3

	subs		MESSAGE_LEN, MESSAGE_LEN, #16
	beq		.Ldone
	_nh_stride	K1, K2, K3, K0

	subs		MESSAGE_LEN, MESSAGE_LEN, #16
	beq		.Ldone
	_nh_stride	K2, K3, K0, K1

.Ldone:
	// Sum the accumulators for each pass, then store the sums to 'hash'
	addp		T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
	addp		T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
	st1		{T0.16b,T1.16b}, [HASH]
	ret
ENDPROC(nh_neon)
+77 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
 * (ARM64 NEON accelerated version)
 *
 * Copyright 2018 Google LLC
 */

#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/hash.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>

asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
			u8 hash[NH_HASH_BYTES]);

/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
		     __le64 hash[NH_NUM_PASSES])
{
	nh_neon(key, message, message_len, (u8 *)hash);
}

static int nhpoly1305_neon_update(struct shash_desc *desc,
				  const u8 *src, unsigned int srclen)
{
	if (srclen < 64 || !may_use_simd())
		return crypto_nhpoly1305_update(desc, src, srclen);

	do {
		unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);

		kernel_neon_begin();
		crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
		kernel_neon_end();
		src += n;
		srclen -= n;
	} while (srclen);
	return 0;
}

static struct shash_alg nhpoly1305_alg = {
	.base.cra_name		= "nhpoly1305",
	.base.cra_driver_name	= "nhpoly1305-neon",
	.base.cra_priority	= 200,
	.base.cra_ctxsize	= sizeof(struct nhpoly1305_key),
	.base.cra_module	= THIS_MODULE,
	.digestsize		= POLY1305_DIGEST_SIZE,
	.init			= crypto_nhpoly1305_init,
	.update			= nhpoly1305_neon_update,
	.final			= crypto_nhpoly1305_final,
	.setkey			= crypto_nhpoly1305_setkey,
	.descsize		= sizeof(struct nhpoly1305_state),
};

static int __init nhpoly1305_mod_init(void)
{
	if (!(elf_hwcap & HWCAP_ASIMD))
		return -ENODEV;

	return crypto_register_shash(&nhpoly1305_alg);
}

static void __exit nhpoly1305_mod_exit(void)
{
	crypto_unregister_shash(&nhpoly1305_alg);
}

module_init(nhpoly1305_mod_init);
module_exit(nhpoly1305_mod_exit);

MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
MODULE_ALIAS_CRYPTO("nhpoly1305");
MODULE_ALIAS_CRYPTO("nhpoly1305-neon");