Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 323a6bf1 authored by Michael Ellerman's avatar Michael Ellerman Committed by Benjamin Herrenschmidt
Browse files

powerpc: Add a powerpc implementation of SHA-1

This patch adds a crypto driver which provides a powerpc accelerated
implementation of SHA-1, accelerated in that it is written in asm.

Original patch by Paul, minor fixups for upstream by moi.

Lightly tested on 64-bit with the test program here:

 http://michael.ellerman.id.au/files/junkcode/sha1test.c



Seems to work, and is "not slower" than the generic version.

Needs testing on 32-bit.

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarMichael Ellerman <michael@ellerman.id.au>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 5c49985c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -143,6 +143,7 @@ core-y += arch/powerpc/kernel/ \
				   arch/powerpc/sysdev/ \
				   arch/powerpc/platforms/ \
				   arch/powerpc/math-emu/ \
				   arch/powerpc/crypto/ \
				   arch/powerpc/net/
core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
core-$(CONFIG_KVM) 		+= arch/powerpc/kvm/
+9 −0
Original line number Diff line number Diff line
#
# powerpc/crypto/Makefile
#
# Arch-specific CryptoAPI modules.
#

obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o

sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
+179 −0
Original line number Diff line number Diff line
/*
 * SHA-1 implementation for PowerPC.
 *
 * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
 */

#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>

/*
 * We roll the registers for T, A, B, C, D, E around on each
 * iteration; T on iteration t is A on iteration t+1, and so on.
 * We use registers 7 - 12 for this.
 */
#define RT(t)	((((t)+5)%6)+7)
#define RA(t)	((((t)+4)%6)+7)
#define RB(t)	((((t)+3)%6)+7)
#define RC(t)	((((t)+2)%6)+7)
#define RD(t)	((((t)+1)%6)+7)
#define RE(t)	((((t)+0)%6)+7)

/* We use registers 16 - 31 for the W values */
#define W(t)	(((t)%16)+16)

#define LOADW(t)				\
	lwz	W(t),(t)*4(r4)

#define STEPD0_LOAD(t)				\
	andc	r0,RD(t),RB(t);		\
	and	r6,RB(t),RC(t);		\
	rotlwi	RT(t),RA(t),5;			\
	or	r6,r6,r0;			\
	add	r0,RE(t),r15;			\
	add	RT(t),RT(t),r6;		\
	add	r14,r0,W(t);			\
	lwz	W((t)+4),((t)+4)*4(r4);	\
	rotlwi	RB(t),RB(t),30;			\
	add	RT(t),RT(t),r14

#define STEPD0_UPDATE(t)			\
	and	r6,RB(t),RC(t);		\
	andc	r0,RD(t),RB(t);		\
	rotlwi	RT(t),RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	or	r6,r6,r0;			\
	add	r0,RE(t),r15;			\
	xor	r5,W((t)+4-3),W((t)+4-8);		\
	add	RT(t),RT(t),r6;		\
	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
	add	r0,r0,W(t);			\
	xor	W((t)+4),W((t)+4),r5;			\
	add	RT(t),RT(t),r0;		\
	rotlwi	W((t)+4),W((t)+4),1

#define STEPD1(t)				\
	xor	r6,RB(t),RC(t);		\
	rotlwi	RT(t),RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	xor	r6,r6,RD(t);			\
	add	r0,RE(t),r15;			\
	add	RT(t),RT(t),r6;		\
	add	r0,r0,W(t);			\
	add	RT(t),RT(t),r0

#define STEPD1_UPDATE(t)				\
	xor	r6,RB(t),RC(t);		\
	rotlwi	RT(t),RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	xor	r6,r6,RD(t);			\
	add	r0,RE(t),r15;			\
	xor	r5,W((t)+4-3),W((t)+4-8);		\
	add	RT(t),RT(t),r6;		\
	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
	add	r0,r0,W(t);			\
	xor	W((t)+4),W((t)+4),r5;			\
	add	RT(t),RT(t),r0;		\
	rotlwi	W((t)+4),W((t)+4),1

#define STEPD2_UPDATE(t)			\
	and	r6,RB(t),RC(t);		\
	and	r0,RB(t),RD(t);		\
	rotlwi	RT(t),RA(t),5;			\
	or	r6,r6,r0;			\
	rotlwi	RB(t),RB(t),30;			\
	and	r0,RC(t),RD(t);		\
	xor	r5,W((t)+4-3),W((t)+4-8);	\
	or	r6,r6,r0;			\
	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
	add	r0,RE(t),r15;			\
	add	RT(t),RT(t),r6;		\
	add	r0,r0,W(t);			\
	xor	W((t)+4),W((t)+4),r5;		\
	add	RT(t),RT(t),r0;		\
	rotlwi	W((t)+4),W((t)+4),1

#define STEP0LD4(t)				\
	STEPD0_LOAD(t);				\
	STEPD0_LOAD((t)+1);			\
	STEPD0_LOAD((t)+2);			\
	STEPD0_LOAD((t)+3)

#define STEPUP4(t, fn)				\
	STEP##fn##_UPDATE(t);			\
	STEP##fn##_UPDATE((t)+1);		\
	STEP##fn##_UPDATE((t)+2);		\
	STEP##fn##_UPDATE((t)+3)

#define STEPUP20(t, fn)				\
	STEPUP4(t, fn);				\
	STEPUP4((t)+4, fn);			\
	STEPUP4((t)+8, fn);			\
	STEPUP4((t)+12, fn);			\
	STEPUP4((t)+16, fn)

_GLOBAL(powerpc_sha_transform)
	PPC_STLU r1,-STACKFRAMESIZE(r1)
	SAVE_8GPRS(14, r1)
	SAVE_10GPRS(22, r1)

	/* Load up A - E */
	lwz	RA(0),0(r3)	/* A */
	lwz	RB(0),4(r3)	/* B */
	lwz	RC(0),8(r3)	/* C */
	lwz	RD(0),12(r3)	/* D */
	lwz	RE(0),16(r3)	/* E */

	LOADW(0)
	LOADW(1)
	LOADW(2)
	LOADW(3)

	lis	r15,0x5a82	/* K0-19 */
	ori	r15,r15,0x7999
	STEP0LD4(0)
	STEP0LD4(4)
	STEP0LD4(8)
	STEPUP4(12, D0)
	STEPUP4(16, D0)

	lis	r15,0x6ed9	/* K20-39 */
	ori	r15,r15,0xeba1
	STEPUP20(20, D1)

	lis	r15,0x8f1b	/* K40-59 */
	ori	r15,r15,0xbcdc
	STEPUP20(40, D2)

	lis	r15,0xca62	/* K60-79 */
	ori	r15,r15,0xc1d6
	STEPUP4(60, D1)
	STEPUP4(64, D1)
	STEPUP4(68, D1)
	STEPUP4(72, D1)
	lwz	r20,16(r3)
	STEPD1(76)
	lwz	r19,12(r3)
	STEPD1(77)
	lwz	r18,8(r3)
	STEPD1(78)
	lwz	r17,4(r3)
	STEPD1(79)

	lwz	r16,0(r3)
	add	r20,RE(80),r20
	add	RD(0),RD(80),r19
	add	RC(0),RC(80),r18
	add	RB(0),RB(80),r17
	add	RA(0),RA(80),r16
	mr	RE(0),r20
	stw	RA(0),0(r3)
	stw	RB(0),4(r3)
	stw	RC(0),8(r3)
	stw	RD(0),12(r3)
	stw	RE(0),16(r3)

	REST_8GPRS(14, r1)
	REST_10GPRS(22, r1)
	addi	r1,r1,STACKFRAMESIZE
	blr
+157 −0
Original line number Diff line number Diff line
/*
 * Cryptographic API.
 *
 * powerpc implementation of the SHA1 Secure Hash Algorithm.
 *
 * Derived from cryptoapi implementation, adapted for in-place
 * scatterlist interface.
 *
 * Derived from "crypto/sha1.c"
 * Copyright (c) Alan Smithee.
 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 */
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>

extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp);

static int sha1_init(struct shash_desc *desc)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);

	*sctx = (struct sha1_state){
		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
	};

	return 0;
}

static int sha1_update(struct shash_desc *desc, const u8 *data,
			unsigned int len)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);
	unsigned int partial, done;
	const u8 *src;

	partial = sctx->count & 0x3f;
	sctx->count += len;
	done = 0;
	src = data;

	if ((partial + len) > 63) {
		u32 temp[SHA_WORKSPACE_WORDS];

		if (partial) {
			done = -partial;
			memcpy(sctx->buffer + partial, data, done + 64);
			src = sctx->buffer;
		}

		do {
			powerpc_sha_transform(sctx->state, src, temp);
			done += 64;
			src = data + done;
		} while (done + 63 < len);

		memset(temp, 0, sizeof(temp));
		partial = 0;
	}
	memcpy(sctx->buffer + partial, src, len - done);

	return 0;
}


/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);
	__be32 *dst = (__be32 *)out;
	u32 i, index, padlen;
	__be64 bits;
	static const u8 padding[64] = { 0x80, };

	bits = cpu_to_be64(sctx->count << 3);

	/* Pad out to 56 mod 64 */
	index = sctx->count & 0x3f;
	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
	sha1_update(desc, padding, padlen);

	/* Append length */
	sha1_update(desc, (const u8 *)&bits, sizeof(bits));

	/* Store state in digest */
	for (i = 0; i < 5; i++)
		dst[i] = cpu_to_be32(sctx->state[i]);

	/* Wipe context */
	memset(sctx, 0, sizeof *sctx);

	return 0;
}

static int sha1_export(struct shash_desc *desc, void *out)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);

	memcpy(out, sctx, sizeof(*sctx));
	return 0;
}

static int sha1_import(struct shash_desc *desc, const void *in)
{
	struct sha1_state *sctx = shash_desc_ctx(desc);

	memcpy(sctx, in, sizeof(*sctx));
	return 0;
}

static struct shash_alg alg = {
	.digestsize	=	SHA1_DIGEST_SIZE,
	.init		=	sha1_init,
	.update		=	sha1_update,
	.final		=	sha1_final,
	.export		=	sha1_export,
	.import		=	sha1_import,
	.descsize	=	sizeof(struct sha1_state),
	.statesize	=	sizeof(struct sha1_state),
	.base		=	{
		.cra_name	=	"sha1",
		.cra_driver_name=	"sha1-powerpc",
		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
		.cra_blocksize	=	SHA1_BLOCK_SIZE,
		.cra_module	=	THIS_MODULE,
	}
};

static int __init sha1_powerpc_mod_init(void)
{
	return crypto_register_shash(&alg);
}

static void __exit sha1_powerpc_mod_fini(void)
{
	crypto_unregister_shash(&alg);
}

module_init(sha1_powerpc_mod_init);
module_exit(sha1_powerpc_mod_fini);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");

MODULE_ALIAS("sha1-powerpc");
+7 −0
Original line number Diff line number Diff line
@@ -479,6 +479,13 @@ config CRYPTO_SHA1_ARM
	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
	  using optimized ARM assembler.

config CRYPTO_SHA1_PPC
	tristate "SHA1 digest algorithm (powerpc)"
	depends on PPC
	help
	  This is the powerpc hardware accelerated implementation of the
	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).

config CRYPTO_SHA256
	tristate "SHA224 and SHA256 digest algorithm"
	select CRYPTO_HASH