Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 07eb54d3 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer



This removes all the boilerplate from the existing implementation,
and replaces it with calls into the base layer.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 9205b949
Loading
Loading
Loading
Loading
+15 −18
Original line number Original line Diff line number Diff line
@@ -66,8 +66,8 @@
	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6


	/*
	/*
	 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
	 * 			  u8 *head, long bytes)
	 *			  int blocks)
	 */
	 */
ENTRY(sha1_ce_transform)
ENTRY(sha1_ce_transform)
	/* load round constants */
	/* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
	ld1r		{k3.4s}, [x6]
	ld1r		{k3.4s}, [x6]


	/* load state */
	/* load state */
	ldr		dga, [x2]
	ldr		dga, [x0]
	ldr		dgb, [x2, #16]
	ldr		dgb, [x0, #16]


	/* load partial state (if supplied) */
	/* load sha1_ce_state::finalize */
	cbz		x3, 0f
	ldr		w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
	ld1		{v8.4s-v11.4s}, [x3]
	b		1f


	/* load input */
	/* load input */
0:	ld1		{v8.4s-v11.4s}, [x1], #64
0:	ld1		{v8.4s-v11.4s}, [x1], #64
	sub		w0, w0, #1
	sub		w2, w2, #1


1:
CPU_LE(	rev32		v8.16b, v8.16b		)
CPU_LE(	rev32		v8.16b, v8.16b		)
CPU_LE(	rev32		v9.16b, v9.16b		)
CPU_LE(	rev32		v9.16b, v9.16b		)
CPU_LE(	rev32		v10.16b, v10.16b	)
CPU_LE(	rev32		v10.16b, v10.16b	)
CPU_LE(	rev32		v11.16b, v11.16b	)
CPU_LE(	rev32		v11.16b, v11.16b	)


2:	add		t0.4s, v8.4s, k0.4s
1:	add		t0.4s, v8.4s, k0.4s
	mov		dg0v.16b, dgav.16b
	mov		dg0v.16b, dgav.16b


	add_update	c, ev, k0,  8,  9, 10, 11, dgb
	add_update	c, ev, k0,  8,  9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
	add		dgbv.2s, dgbv.2s, dg1v.2s
	add		dgbv.2s, dgbv.2s, dg1v.2s
	add		dgav.4s, dgav.4s, dg0v.4s
	add		dgav.4s, dgav.4s, dg0v.4s


	cbnz		w0, 0b
	cbnz		w2, 0b


	/*
	/*
	 * Final block: add padding and total bit count.
	 * Final block: add padding and total bit count.
	 * Skip if we have no total byte count in x4. In that case, the input
	 * Skip if the input size was not a round multiple of the block size,
	 * size was not a round multiple of the block size, and the padding is
	 * the padding is handled by the C code in that case.
	 * handled by the C code.
	 */
	 */
	cbz		x4, 3f
	cbz		x4, 3f
	ldr		x4, [x0, #:lo12:sha1_ce_offsetof_count]
	movi		v9.2d, #0
	movi		v9.2d, #0
	mov		x8, #0x80000000
	mov		x8, #0x80000000
	movi		v10.2d, #0
	movi		v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
	mov		x4, #0
	mov		x4, #0
	mov		v11.d[0], xzr
	mov		v11.d[0], xzr
	mov		v11.d[1], x7
	mov		v11.d[1], x7
	b		2b
	b		1b


	/* store new state */
	/* store new state */
3:	str		dga, [x2]
3:	str		dga, [x0]
	str		dgb, [x2, #16]
	str		dgb, [x0, #16]
	ret
	ret
ENDPROC(sha1_ce_transform)
ENDPROC(sha1_ce_transform)
+44 −107
Original line number Original line Diff line number Diff line
@@ -12,144 +12,81 @@
#include <asm/unaligned.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <linux/module.h>


#define ASM_EXPORT(sym, val) \
	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));

MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_LICENSE("GPL v2");


asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
struct sha1_ce_state {
				  u8 *head, long bytes);
	struct sha1_state	sst;

	u32			finalize;
static int sha1_init(struct shash_desc *desc)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);

	*sctx = (struct sha1_state){
		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
};
	return 0;
}


static int sha1_update(struct shash_desc *desc, const u8 *data,
asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
				  int blocks);

static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
			  unsigned int len)
			  unsigned int len)
{
{
	struct sha1_state *sctx = shash_desc_ctx(desc);
	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;

	sctx->count += len;

	if ((partial + len) >= SHA1_BLOCK_SIZE) {
		int blocks;

		if (partial) {
			int p = SHA1_BLOCK_SIZE - partial;

			memcpy(sctx->buffer + partial, data, p);
			data += p;
			len -= p;
		}

		blocks = len / SHA1_BLOCK_SIZE;
		len %= SHA1_BLOCK_SIZE;


	sctx->finalize = 0;
	kernel_neon_begin_partial(16);
	kernel_neon_begin_partial(16);
		sha1_ce_transform(blocks, data, sctx->state,
	sha1_base_do_update(desc, data, len,
				  partial ? sctx->buffer : NULL, 0);
			    (sha1_block_fn *)sha1_ce_transform);
	kernel_neon_end();
	kernel_neon_end();


		data += blocks * SHA1_BLOCK_SIZE;
		partial = 0;
	}
	if (len)
		memcpy(sctx->buffer + partial, data, len);
	return 0;
	return 0;
}
}


static int sha1_final(struct shash_desc *desc, u8 *out)
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
{
	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };

	struct sha1_state *sctx = shash_desc_ctx(desc);
	__be64 bits = cpu_to_be64(sctx->count << 3);
	__be32 *dst = (__be32 *)out;
	int i;

	u32 padlen = SHA1_BLOCK_SIZE
		     - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);

	sha1_update(desc, padding, padlen);
	sha1_update(desc, (const u8 *)&bits, sizeof(bits));

	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
		put_unaligned_be32(sctx->state[i], dst++);

	*sctx = (struct sha1_state){};
	return 0;
}

static int sha1_finup(struct shash_desc *desc, const u8 *data,
			 unsigned int len, u8 *out)
			 unsigned int len, u8 *out)
{
{
	struct sha1_state *sctx = shash_desc_ctx(desc);
	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
	__be32 *dst = (__be32 *)out;
	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
	int blocks;

	int i;
	ASM_EXPORT(sha1_ce_offsetof_count,

		   offsetof(struct sha1_ce_state, sst.count));
	if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
	ASM_EXPORT(sha1_ce_offsetof_finalize,
		sha1_update(desc, data, len);
		   offsetof(struct sha1_ce_state, finalize));
		return sha1_final(desc, out);
	}


	/*
	/*
	 * Use a fast path if the input is a multiple of 64 bytes. In
	 * Allow the asm code to perform the finalization if there is no
	 * this case, there is no need to copy data around, and we can
	 * partial data and the input is a round multiple of the block size.
	 * perform the entire digest calculation in a single invocation
	 * of sha1_ce_transform()
	 */
	 */
	blocks = len / SHA1_BLOCK_SIZE;
	sctx->finalize = finalize;


	kernel_neon_begin_partial(16);
	kernel_neon_begin_partial(16);
	sha1_ce_transform(blocks, data, sctx->state, NULL, len);
	sha1_base_do_update(desc, data, len,
			    (sha1_block_fn *)sha1_ce_transform);
	if (!finalize)
		sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
	kernel_neon_end();
	kernel_neon_end();

	return sha1_base_finish(desc, out);
	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
		put_unaligned_be32(sctx->state[i], dst++);

	*sctx = (struct sha1_state){};
	return 0;
}
}


static int sha1_export(struct shash_desc *desc, void *out)
static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
{
	struct sha1_state *sctx = shash_desc_ctx(desc);
	kernel_neon_begin_partial(16);
	struct sha1_state *dst = out;
	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);

	kernel_neon_end();
	*dst = *sctx;
	return sha1_base_finish(desc, out);
	return 0;
}

static int sha1_import(struct shash_desc *desc, const void *in)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);
	struct sha1_state const *src = in;

	*sctx = *src;
	return 0;
}
}


static struct shash_alg alg = {
static struct shash_alg alg = {
	.init			= sha1_init,
	.init			= sha1_base_init,
	.update			= sha1_update,
	.update			= sha1_ce_update,
	.final			= sha1_final,
	.final			= sha1_ce_final,
	.finup			= sha1_finup,
	.finup			= sha1_ce_finup,
	.export			= sha1_export,
	.descsize		= sizeof(struct sha1_ce_state),
	.import			= sha1_import,
	.descsize		= sizeof(struct sha1_state),
	.digestsize		= SHA1_DIGEST_SIZE,
	.digestsize		= SHA1_DIGEST_SIZE,
	.statesize		= sizeof(struct sha1_state),
	.base			= {
	.base			= {
		.cra_name		= "sha1",
		.cra_name		= "sha1",
		.cra_driver_name	= "sha1-ce",
		.cra_driver_name	= "sha1-ce",