Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2676b89e authored by Christophe Leroy's avatar Christophe Leroy Committed by Michael Ellerman
Browse files

powerpc/lib: optimise PPC32 memcmp



At the time being, memcmp() compares two chunks of memory
byte per byte.

This patch optimises the comparison by comparing word by word.

On the same way as commit 15c2d45d ("powerpc: Add 64bit
optimised memcmp"), this patch moves memcmp() into a dedicated
file named memcmp_32.S

A small benchmark performed on an 8xx comparing two chuncks
of 512 bytes performed 100000 times gives:

Before : 5852274 TB ticks
After:   1488638 TB ticks

This is almost 4 times faster

Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent f36bbf21
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -26,14 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
			       memcpy_power7.o

obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
	   memcpy_64.o memcmp_64.o pmem.o
	   memcpy_64.o pmem.o

obj64-$(CONFIG_SMP)	+= locks.o
obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o

obj-y			+= checksum_$(BITS).o checksum_wrappers.o \
			   string_$(BITS).o
			   string_$(BITS).o memcmp_$(BITS).o

obj-y			+= sstep.o ldstfp.o quad.o
obj64-y			+= quad.o
+45 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * memcmp for PowerPC32
 *
 * Copyright (C) 1996 Paul Mackerras.
 *
 */

#include <asm/ppc_asm.h>
#include <asm/export.h>

	.text

_GLOBAL(memcmp)
	srawi.	r7, r5, 2		/* Divide len by 4 */
	mr	r6, r3
	beq-	3f
	mtctr	r7
	li	r7, 0
1:	lwzx	r3, r6, r7
	lwzx	r0, r4, r7
	addi	r7, r7, 4
	cmplw	cr0, r3, r0
	bdnzt	eq, 1b
	bne	5f
3:	andi.	r3, r5, 3
	beqlr
	cmplwi	cr1, r3, 2
	blt-	cr1, 4f
	lhzx	r3, r6, r7
	lhzx	r0, r4, r7
	addi	r7, r7, 2
	subf.	r3, r0, r3
	beqlr	cr1
	bnelr
4:	lbzx	r3, r6, r7
	lbzx	r0, r4, r7
	subf.	r3, r0, r3
	blr
5:	li	r3, 1
	bgtlr
	li	r3, -1
	blr
EXPORT_SYMBOL(memcmp)
+0 −17
Original line number Diff line number Diff line
@@ -54,23 +54,6 @@ _GLOBAL(strncmp)
	blr
EXPORT_SYMBOL(strncmp)

#ifdef CONFIG_PPC32
_GLOBAL(memcmp)
	PPC_LCMPI 0,r5,0
	beq-	2f
	mtctr	r5
	addi	r6,r3,-1
	addi	r4,r4,-1
1:	lbzu	r3,1(r6)
	lbzu	r0,1(r4)
	subf.	r3,r0,r3
	bdnzt	2,1b
	blr
2:	li	r3,0
	blr
EXPORT_SYMBOL(memcmp)
#endif

_GLOBAL(memchr)
	PPC_LCMPI 0,r5,0
	beq-	2f