Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b7912e0f authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Eric Biggers
Browse files

BACKPORT: crypto: arm/aes - replace scalar AES cipher



This replaces the scalar AES cipher that originates in the OpenSSL project
with a new implementation that is ~15% (*) faster (on modern cores), and
reuses the lookup tables and the key schedule generation routines from the
generic C implementation (which is usually compiled in anyway due to
networking and other subsystems depending on it).

Note that the bit sliced NEON code for AES still depends on the scalar cipher
that this patch replaces, so it is not removed entirely yet.

* On Cortex-A57, the performance increases from 17.0 to 14.9 cycles per byte
  for 128-bit keys.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>

(cherry picked from commit 81edb42629758bacdf813dd5e4542ae26e3ad73a)

Conflicts:
	arch/arm/crypto/Kconfig

Bug: 112008522
Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b
Change-Id: Iac145d586b01c5c18f3cb5979bf6035232d4dbf2
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
parent 4f8baaba
Loading
Loading
Loading
Loading
+1 −19
Original line number Diff line number Diff line
@@ -62,34 +62,16 @@ config CRYPTO_SHA512_ARM
	  using optimized ARM assembler and NEON, when available.

config CRYPTO_AES_ARM
	tristate "AES cipher algorithms (ARM-asm)"
	depends on ARM
	tristate "Scalar AES cipher for ARM"
	select CRYPTO_ALGAPI
	select CRYPTO_AES
	help
	  Use optimized AES assembler routines for ARM platforms.

	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
	  algorithm.

	  Rijndael appears to be consistently a very good performer in
	  both hardware and software across a wide range of computing
	  environments regardless of its use in feedback or non-feedback
	  modes. Its key setup time is excellent, and its key agility is
	  good. Rijndael's very low memory requirements make it very well
	  suited for restricted-space environments, in which it also
	  demonstrates excellent performance. Rijndael's operations are
	  among the easiest to defend against power and timing attacks.

	  The AES specifies three key sizes: 128, 192 and 256 bits

	  See <http://csrc.nist.gov/encryption/aes/> for more information.

config CRYPTO_AES_ARM_BS
	tristate "Bit sliced AES using NEON instructions"
	depends on KERNEL_MODE_NEON
	select CRYPTO_ALGAPI
	select CRYPTO_AES_ARM
	select CRYPTO_ABLK_HELPER
	help
	  Use a faster and more secure NEON based implementation of AES in CBC,
+2 −2
Original line number Diff line number Diff line
@@ -25,8 +25,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
endif
endif

aes-arm-y	:= aes-armv4.o aes_glue.o
aes-arm-bs-y	:= aesbs-core.o aesbs-glue.o
aes-arm-y	:= aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y	:= aes-armv4.o aesbs-core.o aesbs-glue.o
sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+179 −0
Original line number Diff line number Diff line
/*
 * Scalar AES core transform
 *
 * Copyright (C) 2017 Linaro Ltd.
 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/linkage.h>

	.text
	.align		5

	rk		.req	r0
	rounds		.req	r1
	in		.req	r2
	out		.req	r3
	tt		.req	ip

	t0		.req	lr
	t1		.req	r2
	t2		.req	r3

	.macro		__select, out, in, idx
	.if		__LINUX_ARM_ARCH__ < 7
	and		\out, \in, #0xff << (8 * \idx)
	.else
	ubfx		\out, \in, #(8 * \idx), #8
	.endif
	.endm

	.macro		__load, out, in, idx
	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
	ldr		\out, [tt, \in, lsr #(8 * \idx) - 2]
	.else
	ldr		\out, [tt, \in, lsl #2]
	.endif
	.endm

	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
	__select	\out0, \in0, 0
	__select	t0, \in1, 1
	__load		\out0, \out0, 0
	__load		t0, t0, 1

	.if		\enc
	__select	\out1, \in1, 0
	__select	t1, \in2, 1
	.else
	__select	\out1, \in3, 0
	__select	t1, \in0, 1
	.endif
	__load		\out1, \out1, 0
	__select	t2, \in2, 2
	__load		t1, t1, 1
	__load		t2, t2, 2

	eor		\out0, \out0, t0, ror #24

	__select	t0, \in3, 3
	.if		\enc
	__select	\t3, \in3, 2
	__select	\t4, \in0, 3
	.else
	__select	\t3, \in1, 2
	__select	\t4, \in2, 3
	.endif
	__load		\t3, \t3, 2
	__load		t0, t0, 3
	__load		\t4, \t4, 3

	eor		\out1, \out1, t1, ror #24
	eor		\out0, \out0, t2, ror #16
	ldm		rk!, {t1, t2}
	eor		\out1, \out1, \t3, ror #16
	eor		\out0, \out0, t0, ror #8
	eor		\out1, \out1, \t4, ror #8
	eor		\out0, \out0, t1
	eor		\out1, \out1, t2
	.endm

	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
	.endm

	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
	.endm

	.macro		__rev, out, in
	.if		__LINUX_ARM_ARCH__ < 6
	lsl		t0, \in, #24
	and		t1, \in, #0xff00
	and		t2, \in, #0xff0000
	orr		\out, t0, \in, lsr #24
	orr		\out, \out, t1, lsl #8
	orr		\out, \out, t2, lsr #8
	.else
	rev		\out, \in
	.endif
	.endm

	.macro		__adrl, out, sym, c
	.if		__LINUX_ARM_ARCH__ < 7
	ldr\c		\out, =\sym
	.else
	movw\c		\out, #:lower16:\sym
	movt\c		\out, #:upper16:\sym
	.endif
	.endm

	.macro		do_crypt, round, ttab, ltab
	push		{r3-r11, lr}

	ldr		r4, [in]
	ldr		r5, [in, #4]
	ldr		r6, [in, #8]
	ldr		r7, [in, #12]

	ldm		rk!, {r8-r11}

#ifdef CONFIG_CPU_BIG_ENDIAN
	__rev		r4, r4
	__rev		r5, r5
	__rev		r6, r6
	__rev		r7, r7
#endif

	eor		r4, r4, r8
	eor		r5, r5, r9
	eor		r6, r6, r10
	eor		r7, r7, r11

	__adrl		tt, \ttab

	tst		rounds, #2
	bne		1f

0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
	\round		r4, r5, r6, r7, r8, r9, r10, r11

1:	subs		rounds, rounds, #4
	\round		r8, r9, r10, r11, r4, r5, r6, r7
	__adrl		tt, \ltab, ls
	\round		r4, r5, r6, r7, r8, r9, r10, r11
	bhi		0b

#ifdef CONFIG_CPU_BIG_ENDIAN
	__rev		r4, r4
	__rev		r5, r5
	__rev		r6, r6
	__rev		r7, r7
#endif

	ldr		out, [sp]

	str		r4, [out]
	str		r5, [out, #4]
	str		r6, [out, #8]
	str		r7, [out, #12]

	pop		{r3-r11, pc}

	.align		3
	.ltorg
	.endm

ENTRY(__aes_arm_encrypt)
	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
ENDPROC(__aes_arm_encrypt)

ENTRY(__aes_arm_decrypt)
	do_crypt	iround, crypto_it_tab, crypto_il_tab
ENDPROC(__aes_arm_decrypt)
+74 −0
Original line number Diff line number Diff line
/*
 * Scalar AES core transform
 *
 * Copyright (C) 2017 Linaro Ltd.
 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <crypto/aes.h>
#include <linux/crypto.h>
#include <linux/module.h>

asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_encrypt);

asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_decrypt);

static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
	int rounds = 6 + ctx->key_length / 4;

	__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
}

static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
	int rounds = 6 + ctx->key_length / 4;

	__aes_arm_decrypt(ctx->key_dec, rounds, in, out);
}

static struct crypto_alg aes_alg = {
	.cra_name			= "aes",
	.cra_driver_name		= "aes-arm",
	.cra_priority			= 200,
	.cra_flags			= CRYPTO_ALG_TYPE_CIPHER,
	.cra_blocksize			= AES_BLOCK_SIZE,
	.cra_ctxsize			= sizeof(struct crypto_aes_ctx),
	.cra_module			= THIS_MODULE,

	.cra_cipher.cia_min_keysize	= AES_MIN_KEY_SIZE,
	.cra_cipher.cia_max_keysize	= AES_MAX_KEY_SIZE,
	.cra_cipher.cia_setkey		= crypto_aes_set_key,
	.cra_cipher.cia_encrypt		= aes_encrypt,
	.cra_cipher.cia_decrypt		= aes_decrypt,

#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
	.cra_alignmask			= 3,
#endif
};

static int __init aes_init(void)
{
	return crypto_register_alg(&aes_alg);
}

static void __exit aes_fini(void)
{
	crypto_unregister_alg(&aes_alg);
}

module_init(aes_init);
module_exit(aes_fini);

MODULE_DESCRIPTION("Scalar AES cipher for ARM");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("aes");

arch/arm/crypto/aes_glue.c

deleted100644 → 0
+0 −98
Original line number Diff line number Diff line
/*
 * Glue Code for the asm optimized version of the AES Cipher Algorithm
 */

#include <linux/module.h>
#include <linux/crypto.h>
#include <crypto/aes.h>

#include "aes_glue.h"

EXPORT_SYMBOL(AES_encrypt);
EXPORT_SYMBOL(AES_decrypt);
EXPORT_SYMBOL(private_AES_set_encrypt_key);
EXPORT_SYMBOL(private_AES_set_decrypt_key);

static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
	AES_encrypt(src, dst, &ctx->enc_key);
}

static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
	AES_decrypt(src, dst, &ctx->dec_key);
}

static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
		unsigned int key_len)
{
	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);

	switch (key_len) {
	case AES_KEYSIZE_128:
		key_len = 128;
		break;
	case AES_KEYSIZE_192:
		key_len = 192;
		break;
	case AES_KEYSIZE_256:
		key_len = 256;
		break;
	default:
		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
		return -EINVAL;
	}

	if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
		return -EINVAL;
	}
	/* private_AES_set_decrypt_key expects an encryption key as input */
	ctx->dec_key = ctx->enc_key;
	if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
		return -EINVAL;
	}
	return 0;
}

static struct crypto_alg aes_alg = {
	.cra_name		= "aes",
	.cra_driver_name	= "aes-asm",
	.cra_priority		= 200,
	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
	.cra_blocksize		= AES_BLOCK_SIZE,
	.cra_ctxsize		= sizeof(struct AES_CTX),
	.cra_module		= THIS_MODULE,
	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
	.cra_u	= {
		.cipher	= {
			.cia_min_keysize	= AES_MIN_KEY_SIZE,
			.cia_max_keysize	= AES_MAX_KEY_SIZE,
			.cia_setkey		= aes_set_key,
			.cia_encrypt		= aes_encrypt,
			.cia_decrypt		= aes_decrypt
		}
	}
};

static int __init aes_init(void)
{
	return crypto_register_alg(&aes_alg);
}

static void __exit aes_fini(void)
{
	crypto_unregister_alg(&aes_alg);
}

module_init(aes_init);
module_exit(aes_fini);

MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");
MODULE_ALIAS_CRYPTO("aes-asm");
MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");