Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 15d5910e authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/sha3 - new v8.2 Crypto Extensions implementation



Implement the various flavours of SHA3 using the new optional
EOR3/RAX1/XAR/BCAX instructions introduced by ARMv8.2.

Tested-by: default avatarSteve Capper <steve.capper@arm.com>
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent d60031dd
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -35,6 +35,12 @@ config CRYPTO_SHA512_ARM64_CE
	select CRYPTO_HASH
	select CRYPTO_SHA512_ARM64

config CRYPTO_SHA3_ARM64
	tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
	depends on KERNEL_MODE_NEON
	select CRYPTO_HASH
	select CRYPTO_SHA3

config CRYPTO_GHASH_ARM64_CE
	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
	depends on KERNEL_MODE_NEON
+3 −0
Original line number Diff line number Diff line
@@ -17,6 +17,9 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o
sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o

obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o

obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o

+210 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
 *
 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/linkage.h>
#include <asm/assembler.h>

	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
	.set	.Lv\b\().2d, \b
	.set	.Lv\b\().16b, \b
	.endr

	/*
	 * ARMv8.2 Crypto Extensions instructions
	 */
	.macro	eor3, rd, rn, rm, ra
	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
	.endm

	.macro	rax1, rd, rn, rm
	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
	.endm

	.macro	bcax, rd, rn, rm, ra
	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
	.endm

	.macro	xar, rd, rn, rm, imm6
	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
	.endm

	/*
	 * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
	 */
	.text
ENTRY(sha3_ce_transform)
	/* load state */
	add	x8, x0, #32
	ld1	{ v0.1d- v3.1d}, [x0]
	ld1	{ v4.1d- v7.1d}, [x8], #32
	ld1	{ v8.1d-v11.1d}, [x8], #32
	ld1	{v12.1d-v15.1d}, [x8], #32
	ld1	{v16.1d-v19.1d}, [x8], #32
	ld1	{v20.1d-v23.1d}, [x8], #32
	ld1	{v24.1d}, [x8]

0:	sub	w2, w2, #1
	mov	w8, #24
	adr_l	x9, .Lsha3_rcon

	/* load input */
	ld1	{v25.8b-v28.8b}, [x1], #32
	ld1	{v29.8b-v31.8b}, [x1], #24
	eor	v0.8b, v0.8b, v25.8b
	eor	v1.8b, v1.8b, v26.8b
	eor	v2.8b, v2.8b, v27.8b
	eor	v3.8b, v3.8b, v28.8b
	eor	v4.8b, v4.8b, v29.8b
	eor	v5.8b, v5.8b, v30.8b
	eor	v6.8b, v6.8b, v31.8b

	tbnz	x3, #6, 2f		// SHA3-512

	ld1	{v25.8b-v28.8b}, [x1], #32
	ld1	{v29.8b-v30.8b}, [x1], #16
	eor	 v7.8b,  v7.8b, v25.8b
	eor	 v8.8b,  v8.8b, v26.8b
	eor	 v9.8b,  v9.8b, v27.8b
	eor	v10.8b, v10.8b, v28.8b
	eor	v11.8b, v11.8b, v29.8b
	eor	v12.8b, v12.8b, v30.8b

	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224

	// SHA3-256
	ld1	{v25.8b-v28.8b}, [x1], #32
	eor	v13.8b, v13.8b, v25.8b
	eor	v14.8b, v14.8b, v26.8b
	eor	v15.8b, v15.8b, v27.8b
	eor	v16.8b, v16.8b, v28.8b
	b	3f

1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384

	// SHA3-224
	ld1	{v25.8b-v28.8b}, [x1], #32
	ld1	{v29.8b}, [x1], #8
	eor	v13.8b, v13.8b, v25.8b
	eor	v14.8b, v14.8b, v26.8b
	eor	v15.8b, v15.8b, v27.8b
	eor	v16.8b, v16.8b, v28.8b
	eor	v17.8b, v17.8b, v29.8b
	b	3f

	// SHA3-512
2:	ld1	{v25.8b-v26.8b}, [x1], #16
	eor	 v7.8b,  v7.8b, v25.8b
	eor	 v8.8b,  v8.8b, v26.8b

3:	sub	w8, w8, #1

	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
	eor3	v29.16b, v29.16b, v19.16b, v24.16b
	eor3	v26.16b, v26.16b, v16.16b, v21.16b
	eor3	v28.16b, v28.16b, v18.16b, v23.16b
	eor3	v25.16b, v25.16b, v15.16b, v20.16b
	eor3	v27.16b, v27.16b, v17.16b, v22.16b

	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
	rax1	v27.2d, v27.2d, v29.2d	// bc[3]

	eor	 v0.16b,  v0.16b, v30.16b
	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)

	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
	bcax	v22.16b, v22.16b, v24.16b, v23.16b
	bcax	v23.16b, v23.16b, v31.16b, v24.16b
	bcax	v24.16b, v24.16b,  v8.16b, v31.16b

	ld1r	{v31.2d}, [x9], #8

	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
	bcax	v19.16b, v19.16b, v16.16b, v15.16b
	bcax	v15.16b, v15.16b, v25.16b, v16.16b
	bcax	v16.16b, v16.16b,  v3.16b, v25.16b

	bcax	v10.16b, v29.16b, v12.16b, v26.16b
	bcax	v11.16b, v26.16b, v13.16b, v12.16b
	bcax	v12.16b, v12.16b, v14.16b, v13.16b
	bcax	v13.16b, v13.16b, v29.16b, v14.16b
	bcax	v14.16b, v14.16b, v26.16b, v29.16b

	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b

	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b

	eor	 v0.16b,  v0.16b, v31.16b

	cbnz	w8, 3b
	cbnz	w2, 0b

	/* save state */
	st1	{ v0.1d- v3.1d}, [x0], #32
	st1	{ v4.1d- v7.1d}, [x0], #32
	st1	{ v8.1d-v11.1d}, [x0], #32
	st1	{v12.1d-v15.1d}, [x0], #32
	st1	{v16.1d-v19.1d}, [x0], #32
	st1	{v20.1d-v23.1d}, [x0], #32
	st1	{v24.1d}, [x0]
	ret
ENDPROC(sha3_ce_transform)

	.section	".rodata", "a"
	.align		8
.Lsha3_rcon:
	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+161 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
 *
 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha3.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>

MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");

asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
				  int md_len);

static int sha3_update(struct shash_desc *desc, const u8 *data,
		       unsigned int len)
{
	struct sha3_state *sctx = shash_desc_ctx(desc);
	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);

	if (!may_use_simd())
		return crypto_sha3_update(desc, data, len);

	if ((sctx->partial + len) >= sctx->rsiz) {
		int blocks;

		if (sctx->partial) {
			int p = sctx->rsiz - sctx->partial;

			memcpy(sctx->buf + sctx->partial, data, p);
			kernel_neon_begin();
			sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
			kernel_neon_end();

			data += p;
			len -= p;
			sctx->partial = 0;
		}

		blocks = len / sctx->rsiz;
		len %= sctx->rsiz;

		if (blocks) {
			kernel_neon_begin();
			sha3_ce_transform(sctx->st, data, blocks, digest_size);
			kernel_neon_end();
			data += blocks * sctx->rsiz;
		}
	}

	if (len) {
		memcpy(sctx->buf + sctx->partial, data, len);
		sctx->partial += len;
	}
	return 0;
}

static int sha3_final(struct shash_desc *desc, u8 *out)
{
	struct sha3_state *sctx = shash_desc_ctx(desc);
	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
	__le64 *digest = (__le64 *)out;
	int i;

	if (!may_use_simd())
		return crypto_sha3_final(desc, out);

	sctx->buf[sctx->partial++] = 0x06;
	memset(sctx->buf + sctx->partial, 0, sctx->rsiz - sctx->partial);
	sctx->buf[sctx->rsiz - 1] |= 0x80;

	kernel_neon_begin();
	sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
	kernel_neon_end();

	for (i = 0; i < digest_size / 8; i++)
		put_unaligned_le64(sctx->st[i], digest++);

	if (digest_size & 4)
		put_unaligned_le32(sctx->st[i], (__le32 *)digest);

	*sctx = (struct sha3_state){};
	return 0;
}

static struct shash_alg algs[] = { {
	.digestsize		= SHA3_224_DIGEST_SIZE,
	.init			= crypto_sha3_init,
	.update			= sha3_update,
	.final			= sha3_final,
	.descsize		= sizeof(struct sha3_state),
	.base.cra_name		= "sha3-224",
	.base.cra_driver_name	= "sha3-224-ce",
	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
	.base.cra_blocksize	= SHA3_224_BLOCK_SIZE,
	.base.cra_module	= THIS_MODULE,
	.base.cra_priority	= 200,
}, {
	.digestsize		= SHA3_256_DIGEST_SIZE,
	.init			= crypto_sha3_init,
	.update			= sha3_update,
	.final			= sha3_final,
	.descsize		= sizeof(struct sha3_state),
	.base.cra_name		= "sha3-256",
	.base.cra_driver_name	= "sha3-256-ce",
	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
	.base.cra_blocksize	= SHA3_256_BLOCK_SIZE,
	.base.cra_module	= THIS_MODULE,
	.base.cra_priority	= 200,
}, {
	.digestsize		= SHA3_384_DIGEST_SIZE,
	.init			= crypto_sha3_init,
	.update			= sha3_update,
	.final			= sha3_final,
	.descsize		= sizeof(struct sha3_state),
	.base.cra_name		= "sha3-384",
	.base.cra_driver_name	= "sha3-384-ce",
	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
	.base.cra_blocksize	= SHA3_384_BLOCK_SIZE,
	.base.cra_module	= THIS_MODULE,
	.base.cra_priority	= 200,
}, {
	.digestsize		= SHA3_512_DIGEST_SIZE,
	.init			= crypto_sha3_init,
	.update			= sha3_update,
	.final			= sha3_final,
	.descsize		= sizeof(struct sha3_state),
	.base.cra_name		= "sha3-512",
	.base.cra_driver_name	= "sha3-512-ce",
	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
	.base.cra_blocksize	= SHA3_512_BLOCK_SIZE,
	.base.cra_module	= THIS_MODULE,
	.base.cra_priority	= 200,
} };

static int __init sha3_neon_mod_init(void)
{
	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}

static void __exit sha3_neon_mod_fini(void)
{
	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}

module_cpu_feature_match(SHA3, sha3_neon_mod_init);
module_exit(sha3_neon_mod_fini);