Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cc7cf991 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu
Browse files

crypto: arm64/chacha20 - add XChaCha20 support



Add an XChaCha20 implementation that is hooked up to the ARM64 NEON
implementation of ChaCha20.  This can be used by Adiantum.

A NEON implementation of single-block HChaCha20 is also added so that
XChaCha20 can use it rather than the generic implementation.  This
required refactoring the ChaCha20 permutation into its own function.

Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent a00fa0c8
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -101,7 +101,7 @@ config CRYPTO_AES_ARM64_NEON_BLK
	select CRYPTO_SIMD

config CRYPTO_CHACHA20_NEON
	tristate "NEON accelerated ChaCha20 symmetric cipher"
	tristate "ChaCha20 and XChaCha20 stream ciphers using NEON instructions"
	depends on KERNEL_MODE_NEON
	select CRYPTO_BLKCIPHER
	select CRYPTO_CHACHA20
+48 −17
Original line number Diff line number Diff line
@@ -23,25 +23,20 @@
	.text
	.align		6

ENTRY(chacha20_block_xor_neon)
	// x0: Input state matrix, s
	// x1: 1 data block output, o
	// x2: 1 data block input, i

	//
	// This function encrypts one ChaCha20 block by loading the state matrix
	// in four NEON registers. It performs matrix operation on four words in
	// parallel, but requires shuffling to rearrange the words after each
	// round.
	//

	// x0..3 = s0..3
	adr		x3, ROT8
	ld1		{v0.4s-v3.4s}, [x0]
	ld1		{v8.4s-v11.4s}, [x0]
	ld1		{v12.4s}, [x3]
/*
 * chacha20_permute - permute one block
 *
 * Permute one 64-byte block where the state matrix is stored in the four NEON
 * registers v0-v3.  It performs matrix operations on four words in parallel,
 * but requires shuffling to rearrange the words after each round.
 *
 * Clobbers: x3, x10, v4, v12
 */
chacha20_permute:

	mov		x3, #10
	adr		x10, ROT8
	ld1		{v12.4s}, [x10]

.Ldoubleround:
	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -105,6 +100,23 @@ ENTRY(chacha20_block_xor_neon)
	subs		x3, x3, #1
	b.ne		.Ldoubleround

	ret
ENDPROC(chacha20_permute)

ENTRY(chacha20_block_xor_neon)
	// x0: Input state matrix, s
	// x1: 1 data block output, o
	// x2: 1 data block input, i

	stp		x29, x30, [sp, #-16]!
	mov		x29, sp

	// x0..3 = s0..3
	ld1		{v0.4s-v3.4s}, [x0]
	ld1		{v8.4s-v11.4s}, [x0]

	bl		chacha20_permute

	ld1		{v4.16b-v7.16b}, [x2]

	// o0 = i0 ^ (x0 + s0)
@@ -125,9 +137,28 @@ ENTRY(chacha20_block_xor_neon)

	st1		{v0.16b-v3.16b}, [x1]

	ldp		x29, x30, [sp], #16
	ret
ENDPROC(chacha20_block_xor_neon)

ENTRY(hchacha20_block_neon)
	// x0: Input state matrix, s
	// x1: output (8 32-bit words)

	stp		x29, x30, [sp, #-16]!
	mov		x29, sp

	ld1		{v0.4s-v3.4s}, [x0]

	bl		chacha20_permute

	st1		{v0.16b}, [x1], #16
	st1		{v3.16b}, [x1]

	ldp		x29, x30, [sp], #16
	ret
ENDPROC(hchacha20_block_neon)

	.align		6
ENTRY(chacha20_4block_xor_neon)
	// x0: Input state matrix, s
+76 −25
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@

asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);

static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
			    unsigned int bytes)
@@ -65,20 +66,16 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
	kernel_neon_end();
}

static int chacha20_neon(struct skcipher_request *req)
static int chacha20_neon_stream_xor(struct skcipher_request *req,
				    struct chacha_ctx *ctx, u8 *iv)
{
	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
	struct skcipher_walk walk;
	u32 state[16];
	int err;

	if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE)
		return crypto_chacha_crypt(req);

	err = skcipher_walk_virt(&walk, req, false);

	crypto_chacha_init(state, ctx, walk.iv);
	crypto_chacha_init(state, ctx, iv);

	while (walk.nbytes > 0) {
		unsigned int nbytes = walk.nbytes;
@@ -94,7 +91,41 @@ static int chacha20_neon(struct skcipher_request *req)
	return err;
}

static struct skcipher_alg alg = {
static int chacha20_neon(struct skcipher_request *req)
{
	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);

	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
		return crypto_chacha_crypt(req);

	return chacha20_neon_stream_xor(req, ctx, req->iv);
}

static int xchacha20_neon(struct skcipher_request *req)
{
	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
	struct chacha_ctx subctx;
	u32 state[16];
	u8 real_iv[16];

	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
		return crypto_xchacha_crypt(req);

	crypto_chacha_init(state, ctx, req->iv);

	kernel_neon_begin();
	hchacha20_block_neon(state, subctx.key);
	kernel_neon_end();

	memcpy(&real_iv[0], req->iv + 24, 8);
	memcpy(&real_iv[8], req->iv + 16, 8);
	return chacha20_neon_stream_xor(req, &subctx, real_iv);
}

static struct skcipher_alg algs[] = {
	{
		.base.cra_name		= "chacha20",
		.base.cra_driver_name	= "chacha20-neon",
		.base.cra_priority	= 300,
@@ -110,6 +141,23 @@ static struct skcipher_alg alg = {
		.setkey			= crypto_chacha20_setkey,
		.encrypt		= chacha20_neon,
		.decrypt		= chacha20_neon,
	}, {
		.base.cra_name		= "xchacha20",
		.base.cra_driver_name	= "xchacha20-neon",
		.base.cra_priority	= 300,
		.base.cra_blocksize	= 1,
		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
		.base.cra_module	= THIS_MODULE,

		.min_keysize		= CHACHA_KEY_SIZE,
		.max_keysize		= CHACHA_KEY_SIZE,
		.ivsize			= XCHACHA_IV_SIZE,
		.chunksize		= CHACHA_BLOCK_SIZE,
		.walksize		= 4 * CHACHA_BLOCK_SIZE,
		.setkey			= crypto_chacha20_setkey,
		.encrypt		= xchacha20_neon,
		.decrypt		= xchacha20_neon,
	}
};

static int __init chacha20_simd_mod_init(void)
@@ -117,12 +165,12 @@ static int __init chacha20_simd_mod_init(void)
	if (!(elf_hwcap & HWCAP_ASIMD))
		return -ENODEV;

	return crypto_register_skcipher(&alg);
	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}

static void __exit chacha20_simd_mod_fini(void)
{
	crypto_unregister_skcipher(&alg);
	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}

module_init(chacha20_simd_mod_init);
@@ -131,3 +179,6 @@ module_exit(chacha20_simd_mod_fini);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-neon");
MODULE_ALIAS_CRYPTO("xchacha20");
MODULE_ALIAS_CRYPTO("xchacha20-neon");