Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 37e08cad authored by Christophe Leroy's avatar Christophe Leroy Committed by Scott Wood
Browse files

powerpc: inline ip_fast_csum()



In several architectures, ip_fast_csum() is inlined
There are functions like ip_send_check() which do nothing
much more than calling ip_fast_csum().
Inlining ip_fast_csum() allows the compiler to optimise better

Suggested-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
[scottwood: whitespace and cast fixes]
Signed-off-by: default avatarScott Wood <oss@buserror.net>
parent 03bc8b0f
Loading
Loading
Loading
Loading
+38 −7
Original line number Diff line number Diff line
@@ -9,16 +9,9 @@
 * 2 of the License, or (at your option) any later version.
 */

/*
 * This is a version of ip_compute_csum() optimized for IP headers,
 * which always checksum on 4 octet boundaries.  ihl is the number
 * of 32-bit words and is always >= 5.
 */
#ifdef CONFIG_GENERIC_CSUM
#include <asm-generic/checksum.h>
#else
extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);

/*
 * computes the checksum of a memory block at buff, length len,
 * and adds in "sum" (32-bit)
@@ -137,6 +130,44 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
#endif
}

/*
 * This is a version of ip_compute_csum() optimized for IP headers,
 * which always checksum on 4 octet boundaries.  ihl is the number
 * of 32-bit words and is always >= 5.
 */
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
{
	const u32 *ptr = (const u32 *)iph + 1;
#ifdef __powerpc64__
	unsigned int i;
	u64 s = *(const u32 *)iph;

	for (i = 0; i < ihl - 1; i++, ptr++)
		s += *ptr;
	s += (s >> 32);
	return (__force __wsum)s;
#else
	__wsum sum, tmp;

	asm("mtctr %3;"
	    "addc %0,%4,%5;"
	    "1: lwzu %1, 4(%2);"
	    "adde %0,%0,%1;"
	    "bdnz 1b;"
	    "addze %0,%0;"
	    : "=r" (sum), "=r" (tmp), "+b" (ptr)
	    : "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
	    : "ctr", "xer", "memory");

	return sum;
#endif
}

static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
	return csum_fold(ip_fast_csum_nofold(iph, ihl));
}

#endif
#endif /* __KERNEL__ */
#endif
+0 −21
Original line number Diff line number Diff line
@@ -19,27 +19,6 @@

	.text

/*
 * ip_fast_csum(buf, len) -- Optimized for IP header
 * len is in words and is always >= 5.
 */
_GLOBAL(ip_fast_csum)
	lwz	r0,0(r3)
	lwzu	r5,4(r3)
	addic.	r4,r4,-2
	addc	r0,r0,r5
	mtctr	r4
	blelr-
1:	lwzu	r4,4(r3)
	adde	r0,r0,r4
	bdnz	1b
	addze	r0,r0		/* add in final carry */
	rlwinm	r3,r0,16,0,31	/* fold two halves together */
	add	r3,r0,r3
	not	r3,r3
	srwi	r3,r3,16
	blr

/*
 * computes the checksum of a memory block at buff, length len,
 * and adds in "sum" (32-bit)
+0 −27
Original line number Diff line number Diff line
@@ -17,33 +17,6 @@
#include <asm/errno.h>
#include <asm/ppc_asm.h>

/*
 * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
 * len is in words and is always >= 5.
 *
 * In practice len == 5, but this is not guaranteed.  So this code does not
 * attempt to use doubleword instructions.
 */
_GLOBAL(ip_fast_csum)
	lwz	r0,0(r3)
	lwzu	r5,4(r3)
	addic.	r4,r4,-2
	addc	r0,r0,r5
	mtctr	r4
	blelr-
1:	lwzu	r4,4(r3)
	adde	r0,r0,r4
	bdnz	1b
	addze	r0,r0		/* add in final carry */
        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
        add     r0,r0,r4
        srdi    r0,r0,32
	rlwinm	r3,r0,16,0,31	/* fold two halves together */
	add	r3,r0,r3
	not	r3,r3
	srwi	r3,r3,16
	blr

/*
 * Computes the checksum of a memory block at buff, length len,
 * and adds in "sum" (32-bit).
+0 −1
Original line number Diff line number Diff line
@@ -19,7 +19,6 @@ EXPORT_SYMBOL(strncmp);
#ifndef CONFIG_GENERIC_CSUM
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_generic);
EXPORT_SYMBOL(ip_fast_csum);
#endif

EXPORT_SYMBOL(__copy_tofrom_user);