Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d4fde568 authored by Paul Mackerras's avatar Paul Mackerras Committed by Michael Ellerman
Browse files

powerpc/64: Use optimized checksum routines on little-endian



Currently we have optimized hand-coded assembly checksum routines for
big-endian 64-bit systems, but for little-endian we use the generic C
routines. This modifies the optimized routines to work for
little-endian. With this, we no longer need to enable
CONFIG_GENERIC_CSUM. This also fixes a couple of comments in
checksum_64.S so they accurately reflect what the associated instruction
does.

Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
[mpe: Use the more common __BIG_ENDIAN__]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent b492f7e4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -167,7 +167,7 @@ config PPC
	select HAVE_CC_STACKPROTECTOR

config GENERIC_CSUM
	def_bool CPU_LITTLE_ENDIAN
	def_bool n

config EARLY_PRINTK
	bool
+4 −0
Original line number Diff line number Diff line
@@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,

	s += (__force u32)saddr;
	s += (__force u32)daddr;
#ifdef __BIG_ENDIAN__
	s += proto + len;
#else
	s += (proto + len) << 8;
#endif
	return (__force __wsum) from64to32(s);
#else
    __asm__("\n\
+0 −2
Original line number Diff line number Diff line
@@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
obj64-$(CONFIG_SMP)	+= locks.o
obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o

ifeq ($(CONFIG_GENERIC_CSUM),)
obj-y			+= checksum_$(BITS).o checksum_wrappers.o
endif

obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o

+10 −2
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
	 * work to calculate the correct checksum, we ignore that case
	 * and take the potential slowdown of unaligned loads.
	 */
	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
	beq	.Lcsum_aligned

	li	r7,4
@@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
	beq	.Lcsum_finish

	lbz	r6,0(r3)
#ifdef __BIG_ENDIAN__
	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
	adde	r0,r0,r9
#else
	adde	r0,r0,r6
#endif

.Lcsum_finish:
	addze	r0,r0			/* add in final carry */
@@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic)
	 * If the source and destination are relatively unaligned we only
	 * align the source. This keeps things simple.
	 */
	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
	beq	.Lcopy_aligned

	li	r9,4
@@ -386,8 +390,12 @@ dstnr; sth r6,0(r4)
	beq	.Lcopy_finish

srcnr;	lbz	r6,0(r3)
#ifdef __BIG_ENDIAN__
	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
	adde	r0,r0,r9
#else
	adde	r0,r0,r6
#endif
dstnr;	stb	r6,0(r4)

.Lcopy_finish: