Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0b8e72bd authored by Eric Biggers's avatar Eric Biggers
Browse files

BACKPORT, FROMGIT: crypto: arm/chacha20 - add XChaCha20 support



Add an XChaCha20 implementation that is hooked up to the ARM NEON
implementation of ChaCha20.  This is needed for use in the Adiantum
encryption mode; see the generic code patch,
"crypto: chacha20-generic - add XChaCha20 support", for more details.

We also update the NEON code to support HChaCha20 on one block, so we
can use that in XChaCha20 rather than calling the generic HChaCha20.
This required factoring the permutation out into its own macro.

Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>

(cherry picked from commit d97a94309d764ed907d4281da6246f5d935166f8
 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git

 master)

Conflicts:
	arch/arm/crypto/chacha20-neon-glue.c

(backported from skcipher to blkcipher API)

Bug: 112008522
Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b
Change-Id: I84c3a019e22598f8f8eb25e7a0fefbc79c9660c9
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
parent d31aa621
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ config CRYPTO_GHASH_ARM_CE
	  that is part of the ARMv8 Crypto Extensions

config CRYPTO_CHACHA20_NEON
	tristate "NEON accelerated ChaCha20 symmetric cipher"
	tristate "NEON accelerated ChaCha20 stream cipher algorithms"
	depends on KERNEL_MODE_NEON
	select CRYPTO_BLKCIPHER
	select CRYPTO_CHACHA20
+48 −22
Original line number Diff line number Diff line
@@ -52,27 +52,16 @@
	.fpu		neon
	.align		5

ENTRY(chacha20_block_xor_neon)
	// r0: Input state matrix, s
	// r1: 1 data block output, o
	// r2: 1 data block input, i

	//
	// This function encrypts one ChaCha20 block by loading the state matrix
	// in four NEON registers. It performs matrix operation on four words in
	// parallel, but requireds shuffling to rearrange the words after each
	// round.
	//

	// x0..3 = s0..3
	add		ip, r0, #0x20
	vld1.32		{q0-q1}, [r0]
	vld1.32		{q2-q3}, [ip]

	vmov		q8, q0
	vmov		q9, q1
	vmov		q10, q2
	vmov		q11, q3
/*
 * chacha20_permute - permute one block
 *
 * Permute one 64-byte block where the state matrix is stored in the four NEON
 * registers q0-q3.  It performs matrix operations on four words in parallel,
 * but requires shuffling to rearrange the words after each round.
 *
 * Clobbers: r3, ip, q4-q5
 */
chacha20_permute:

	adr		ip, .Lrol8_table
	mov		r3, #10
@@ -142,6 +131,27 @@ ENTRY(chacha20_block_xor_neon)
	subs		r3, r3, #1
	bne		.Ldoubleround

	bx		lr
ENDPROC(chacha20_permute)

ENTRY(chacha20_block_xor_neon)
	// r0: Input state matrix, s
	// r1: 1 data block output, o
	// r2: 1 data block input, i
	push		{lr}

	// x0..3 = s0..3
	add		ip, r0, #0x20
	vld1.32		{q0-q1}, [r0]
	vld1.32		{q2-q3}, [ip]

	vmov		q8, q0
	vmov		q9, q1
	vmov		q10, q2
	vmov		q11, q3

	bl		chacha20_permute

	add		ip, r2, #0x20
	vld1.8		{q4-q5}, [r2]
	vld1.8		{q6-q7}, [ip]
@@ -166,9 +176,25 @@ ENTRY(chacha20_block_xor_neon)
	vst1.8		{q0-q1}, [r1]
	vst1.8		{q2-q3}, [ip]

	bx		lr
	pop		{pc}
ENDPROC(chacha20_block_xor_neon)

ENTRY(hchacha20_block_neon)
	// r0: Input state matrix, s
	// r1: output (8 32-bit words)
	push		{lr}

	vld1.32		{q0-q1}, [r0]!
	vld1.32		{q2-q3}, [r0]

	bl		chacha20_permute

	vst1.32		{q0}, [r1]!
	vst1.32		{q3}, [r1]

	pop		{pc}
ENDPROC(hchacha20_block_neon)

	.align		4
.Lctrinc:	.word	0, 1, 2, 3
.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
+91 −29
Original line number Diff line number Diff line
/*
 * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
 * ChaCha20 (RFC7539) and XChaCha20 stream ciphers, NEON accelerated
 *
 * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
 *
@@ -30,6 +30,7 @@

asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);

static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
			    unsigned int bytes)
@@ -57,20 +58,20 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
	}
}

static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
			 struct scatterlist *src, unsigned int nbytes)
static int chacha20_neon_stream_xor(struct blkcipher_desc *desc,
				    struct scatterlist *dst,
				    struct scatterlist *src,
				    unsigned int nbytes,
				    struct chacha_ctx *ctx, u8 *iv)
{
	struct blkcipher_walk walk;
	u32 state[16];
	int err;

	if (nbytes <= CHACHA_BLOCK_SIZE || !may_use_simd())
		return crypto_chacha_crypt(desc, dst, src, nbytes);

	blkcipher_walk_init(&walk, dst, src, nbytes);
	err = blkcipher_walk_virt_block(desc, &walk, CHACHA_BLOCK_SIZE);

	crypto_chacha_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
	crypto_chacha_init(state, ctx, iv);

	while (walk.nbytes >= CHACHA_BLOCK_SIZE) {
		kernel_neon_begin();
@@ -88,11 +89,47 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
		kernel_neon_end();
		err = blkcipher_walk_done(desc, &walk, 0);
	}

	return err;
}

static struct crypto_alg alg = {
static int chacha20_neon(struct blkcipher_desc *desc, struct scatterlist *dst,
			 struct scatterlist *src, unsigned int nbytes)
{
	struct chacha_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
	u8 *iv = desc->info;

	if (nbytes <= CHACHA_BLOCK_SIZE || !may_use_simd())
		return crypto_chacha_crypt(desc, dst, src, nbytes);

	return chacha20_neon_stream_xor(desc, dst, src, nbytes, ctx, iv);
}

static int xchacha20_neon(struct blkcipher_desc *desc, struct scatterlist *dst,
			  struct scatterlist *src, unsigned int nbytes)
{
	struct chacha_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
	u8 *iv = desc->info;
	struct chacha_ctx subctx;
	u32 state[16];
	u8 real_iv[16];

	if (nbytes <= CHACHA_BLOCK_SIZE || !may_use_simd())
		return crypto_xchacha_crypt(desc, dst, src, nbytes);

	crypto_chacha_init(state, ctx, iv);

	kernel_neon_begin();
	hchacha20_block_neon(state, subctx.key);
	kernel_neon_end();

	memcpy(&real_iv[0], iv + 24, 8);
	memcpy(&real_iv[8], iv + 16, 8);
	return chacha20_neon_stream_xor(desc, dst, src, nbytes, &subctx,
					real_iv);
}

static struct crypto_alg algs[] = {
	{
		.cra_name		= "chacha20",
		.cra_driver_name	= "chacha20-neon",
		.cra_priority		= 300,
@@ -109,8 +146,30 @@ static struct crypto_alg alg = {
				.ivsize		= CHACHA_IV_SIZE,
				.geniv		= "seqiv",
				.setkey		= crypto_chacha20_setkey,
			.encrypt	= chacha20_simd,
			.decrypt	= chacha20_simd,
				.encrypt	= chacha20_neon,
				.decrypt	= chacha20_neon,
			},
		},
	}, {
		.cra_name		= "xchacha20",
		.cra_driver_name	= "xchacha20-neon",
		.cra_priority		= 300,
		.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
		.cra_blocksize		= 1,
		.cra_type		= &crypto_blkcipher_type,
		.cra_ctxsize		= sizeof(struct chacha_ctx),
		.cra_alignmask		= sizeof(u32) - 1,
		.cra_module		= THIS_MODULE,
		.cra_u			= {
			.blkcipher = {
				.min_keysize	= CHACHA_KEY_SIZE,
				.max_keysize	= CHACHA_KEY_SIZE,
				.ivsize		= XCHACHA_IV_SIZE,
				.geniv		= "seqiv",
				.setkey		= crypto_chacha20_setkey,
				.encrypt	= xchacha20_neon,
				.decrypt	= xchacha20_neon,
			},
		},
	},
};
@@ -120,12 +179,12 @@ static int __init chacha20_simd_mod_init(void)
	if (!(elf_hwcap & HWCAP_NEON))
		return -ENODEV;

	return crypto_register_alg(&alg);
	return crypto_register_algs(algs, ARRAY_SIZE(algs));
}

static void __exit chacha20_simd_mod_fini(void)
{
	crypto_unregister_alg(&alg);
	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
}

module_init(chacha20_simd_mod_init);
@@ -134,3 +193,6 @@ module_exit(chacha20_simd_mod_fini);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-neon");
MODULE_ALIAS_CRYPTO("xchacha20");
MODULE_ALIAS_CRYPTO("xchacha20-neon");