Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 373e098e authored by Christophe Leroy's avatar Christophe Leroy Committed by Michael Ellerman
Browse files

powerpc/32: Optimise __csum_partial()



Improve __csum_partial by interleaving loads and adds.

On a 8xx, it brings neither improvement nor degradation.
On a 83xx, it brings a 25% improvement.

Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: default avatarSegher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 1128bb78
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
	bdnz	2b
21:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
	beq	3f
	lwz	r0,4(r3)
	mtctr	r6
22:	lwz	r0,4(r3)
	lwz	r6,8(r3)
	adde	r5,r5,r0
	lwz	r7,12(r3)
	adde	r5,r5,r6
	lwzu	r8,16(r3)
	adde	r5,r5,r7
	bdz	23f
22:	lwz	r0,4(r3)
	adde	r5,r5,r8
	lwz	r6,8(r3)
	adde	r5,r5,r0
	lwz	r7,12(r3)
	adde	r5,r5,r6
	lwzu	r8,16(r3)
	adde	r5,r5,r7
	adde	r5,r5,r8
	bdnz	22b
23:	adde	r5,r5,r8
3:	andi.	r0,r4,2
	beq+	4f
	lhz	r0,4(r3)