Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 04efc3be authored by Heiko Carstens's avatar Heiko Carstens Committed by Martin Schwidefsky
Browse files

[S390] convert/optimize csum_fold() to C



In the meantime gcc generates better code than the old inline
assemblies do. Original inline assembly results in:

lr	%r1,%r2
sr	%r3,%r3
lr	%r2,%r1
srdl	%r2,16
alr	%r2,%r3
alr	%r1,%r2
srl	%r1,16
xilf	%r1,65535
llghr	%r2,%r1
br	%r14

Out of the C code gcc generates this:

rll	%r1,%r2,16
ar	%r1,%r2
srl	%r1,16
xilf	%r1,65535
llghr	%r2,%r1
br	%r14

In addition we don't have any static register allocations anymore and
gcc is free to shuffle instructions around for better pipeline usage.

Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 05e7ff7d
Loading
Loading
Loading
Loading
+4 −21
Original line number Diff line number Diff line
@@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
 */
static inline __sum16 csum_fold(__wsum sum)
{
#ifndef __s390x__
	register_pair rp;
	u32 csum = (__force u32) sum;

	asm volatile(
		"	slr	%N1,%N1\n"	/* %0 = H L */
		"	lr	%1,%0\n"	/* %0 = H L, %1 = H L 0 0 */
		"	srdl	%1,16\n"	/* %0 = H L, %1 = 0 H L 0 */
		"	alr	%1,%N1\n"	/* %0 = H L, %1 = L H L 0 */
		"	alr	%0,%1\n"	/* %0 = H+L+C L+H */
		"	srl	%0,16\n"	/* %0 = H+L+C */
		: "+&d" (sum), "=d" (rp) : : "cc");
#else /* __s390x__ */
	asm volatile(
		"	sr	3,3\n"		/* %0 = H*65536 + L */
		"	lr	2,%0\n"		/* %0 = H L, 2/3 = H L / 0 0 */
		"	srdl	2,16\n"		/* %0 = H L, 2/3 = 0 H / L 0 */
		"	alr	2,3\n"		/* %0 = H L, 2/3 = L H / L 0 */
		"	alr	%0,2\n"		/* %0 = H+L+C L+H */
		"	srl	%0,16\n"	/* %0 = H+L+C */
		: "+&d" (sum) : : "cc", "2", "3");
#endif /* __s390x__ */
	return (__force __sum16) ~sum;
	csum += (csum >> 16) + (csum << 16);
	csum >>= 16;
	return (__force __sum16) ~csum;
}

/*