crypto: arm/aes - add some hardening against cache-timing attacks (913a3aa0) · Commits · e / devices / android_kernel_fairphone_FP5

arch/arm/crypto/Kconfig

+9 −0

Original line number	Diff line number	Diff line
		@@ -69,6 +69,15 @@ config CRYPTO_AES_ARM
		help
		Use optimized AES assembler routines for ARM platforms.

		On ARM processors without the Crypto Extensions, this is the
		fastest AES implementation for single blocks. For multiple
		blocks, the NEON bit-sliced implementation is usually faster.

		This implementation may be vulnerable to cache timing attacks,
		since it uses lookup tables. However, as countermeasures it
		disables IRQs and preloads the tables; it is hoped this makes
		such attacks very difficult.

		config CRYPTO_AES_ARM_BS
		tristate "Bit sliced AES using NEON instructions"
		depends on KERNEL_MODE_NEON

arch/arm/crypto/aes-cipher-core.S

+52 −10

Original line number	Diff line number	Diff line
		@@ -10,6 +10,7 @@
		*/

		#include <linux/linkage.h>
		#include <asm/assembler.h>
		#include <asm/cache.h>

		.text
		@@ -41,7 +42,7 @@
		.endif
		.endm

		.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
		.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
		__select \out0, \in0, 0
		__select t0, \in1, 1
		__load \out0, \out0, 0, \sz, \op
		@@ -73,6 +74,14 @@
		__load t0, t0, 3, \sz, \op
		__load \t4, \t4, 3, \sz, \op

		.ifnb \oldcpsr
		/*
		* This is the final round and we're done with all data-dependent table
		* lookups, so we can safely re-enable interrupts.
		*/
		restore_irqs \oldcpsr
		.endif

		eor \out1, \out1, t1, ror #24
		eor \out0, \out0, t2, ror #16
		ldm rk!, {t1, t2}
		@@ -83,14 +92,14 @@
		eor \out1, \out1, t2
		.endm

		.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
		.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
		__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
		__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
		__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
		.endm

		.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
		.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
		__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
		__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
		__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
		.endm

		.macro __rev, out, in
		@@ -118,13 +127,14 @@
		.macro do_crypt, round, ttab, ltab, bsz
		push {r3-r11, lr}

		// Load keys first, to reduce latency in case they're not cached yet.
		ldm rk!, {r8-r11}

		ldr r4, [in]
		ldr r5, [in, #4]
		ldr r6, [in, #8]
		ldr r7, [in, #12]

		ldm rk!, {r8-r11}

		#ifdef CONFIG_CPU_BIG_ENDIAN
		__rev r4, r4
		__rev r5, r5
		@@ -138,6 +148,25 @@
		eor r7, r7, r11

		__adrl ttab, \ttab
		/*
		* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
		* L1 cache, assuming cacheline size >= 32. This is a hardening measure
		* intended to make cache-timing attacks more difficult. They may not
		* be fully prevented, however; see the paper
		* https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
		* ("Cache-timing attacks on AES") for a discussion of the many
		* difficulties involved in writing truly constant-time AES software.
		*/
		save_and_disable_irqs t0
		.set i, 0
		.rept 1024 / 128
		ldr r8, [ttab, #i + 0]
		ldr r9, [ttab, #i + 32]
		ldr r10, [ttab, #i + 64]
		ldr r11, [ttab, #i + 96]
		.set i, i + 128
		.endr
		push {t0} // oldcpsr

		tst rounds, #2
		bne 1f
		@@ -151,8 +180,21 @@
		\round r4, r5, r6, r7, r8, r9, r10, r11
		b 0b

		2: __adrl ttab, \ltab
		\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
		2: .ifb \ltab
		add ttab, ttab, #1
		.else
		__adrl ttab, \ltab
		// Prefetch inverse S-box for final round; see explanation above
		.set i, 0
		.rept 256 / 64
		ldr t0, [ttab, #i + 0]
		ldr t1, [ttab, #i + 32]
		.set i, i + 64
		.endr
		.endif

		pop {rounds} // oldcpsr
		\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds

		#ifdef CONFIG_CPU_BIG_ENDIAN
		__rev r4, r4
		@@ -175,7 +217,7 @@
		.endm

		ENTRY(__aes_arm_encrypt)
		do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
		do_crypt fround, crypto_ft_tab,, 2
		ENDPROC(__aes_arm_encrypt)

		.align 5

crypto/aes_generic.c

+5 −4

Original line number	Diff line number	Diff line
		@@ -63,7 +63,8 @@ static inline u8 byte(const u32 x, const unsigned n)

		static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };

		__visible const u32 crypto_ft_tab[4][256] = {
		/* cacheline-aligned to facilitate prefetching into cache */
		__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
		{
		0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
		0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
		@@ -327,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] = {
		}
		};

		__visible const u32 crypto_fl_tab[4][256] = {
		__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
		{
		0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
		0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
		@@ -591,7 +592,7 @@ __visible const u32 crypto_fl_tab[4][256] = {
		}
		};

		__visible const u32 crypto_it_tab[4][256] = {
		__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
		{
		0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
		0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
		@@ -855,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] = {
		}
		};

		__visible const u32 crypto_il_tab[4][256] = {
		__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
		{
		0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
		0x00000030, 0x00000036, 0x000000a5, 0x00000038,