Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 773ff788 authored by Atsushi Nemoto's avatar Atsushi Nemoto Committed by Ralf Baechle
Browse files

[MIPS] Optimize flow of csum_partial



Delete dead codes at end of the function and move small_csumcopy
there.  This makes some labels (maybe_end_cruft, small_memcpy,
end_bytes, out) needless and eliminates some branches.

Signed-off-by: default avatarAtsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 52ffe760
Loading
Loading
Loading
Loading
+54 −75
Original line number Diff line number Diff line
@@ -65,64 +65,6 @@

	.text
	.set	noreorder

/* unknown src alignment and < 8 bytes to go  */
small_csumcpy:
	move	a1, t2

	andi	t0, a1, 4
	beqz	t0, 1f
	 andi	t0, a1, 2

	/* Still a full word to go  */
	ulw	t1, (src)
	PTR_ADDIU	src, 4
	ADDC(sum, t1)

1:	move	t1, zero
	beqz	t0, 1f
	 andi	t0, a1, 1

	/* Still a halfword to go  */
	ulhu	t1, (src)
	PTR_ADDIU	src, 2

1:	beqz	t0, 1f
	 sll	t1, t1, 16

	lbu	t2, (src)
	 nop

#ifdef __MIPSEB__
	sll	t2, t2, 8
#endif
	or	t1, t2

1:	ADDC(sum, t1)

	/* fold checksum */
	sll	v1, sum, 16
	addu	sum, v1
	sltu	v1, sum, v1
	srl	sum, sum, 16
	addu	sum, v1

	/* odd buffer alignment? */
	beqz	t7, 1f
	 nop
	sll	v1, sum, 8
	srl	sum, sum, 8
	or	sum, v1
	andi	sum, 0xffff
1:
	.set	reorder
	/* Add the passed partial csum.  */
	ADDC(sum, a2)
	jr	ra
	.set	noreorder

/* ------------------------------------------------------------------------- */

	.align	5
LEAF(csum_partial)
	move	sum, zero
@@ -132,7 +74,6 @@ LEAF(csum_partial)
	bnez	t8, small_csumcpy		/* < 8 bytes to copy */
	 move	t2, a1

	beqz	a1, out
	andi	t7, src, 0x1			/* odd buffer? */

hword_align:
@@ -232,7 +173,8 @@ move_32bytes:
	PTR_ADDU	src, src, 0x20

do_end_words:
	beqz	t8, maybe_end_cruft
	beqz	t8, small_csumcpy
	 andi	t2, a1, 0x3
	LONG_SRL	t8, t8, 0x2

end_words:
@@ -242,21 +184,58 @@ end_words:
	bnez	t8, end_words
	 PTR_ADDU	src, src, 0x4

maybe_end_cruft:
	andi	t2, a1, 0x3

small_memcpy:
 j small_csumcpy; move a1, t2		/* XXX ??? */
	beqz	t2, out
/* unknown src alignment and < 8 bytes to go  */
small_csumcpy:
	move	a1, t2

end_bytes:
	lb	t0, (src)
	LONG_SUBU	a1, a1, 0x1
	bnez	a2, end_bytes
	 PTR_ADDU	src, src, 0x1
	andi	t0, a1, 4
	beqz	t0, 1f
	 andi	t0, a1, 2

	/* Still a full word to go  */
	ulw	t1, (src)
	PTR_ADDIU	src, 4
	ADDC(sum, t1)

1:	move	t1, zero
	beqz	t0, 1f
	 andi	t0, a1, 1

	/* Still a halfword to go  */
	ulhu	t1, (src)
	PTR_ADDIU	src, 2

1:	beqz	t0, 1f
	 sll	t1, t1, 16

	lbu	t2, (src)
	 nop

#ifdef __MIPSEB__
	sll	t2, t2, 8
#endif
	or	t1, t2

1:	ADDC(sum, t1)

	/* fold checksum */
	sll	v1, sum, 16
	addu	sum, v1
	sltu	v1, sum, v1
	srl	sum, sum, 16
	addu	sum, v1

out:
	/* odd buffer alignment? */
	beqz	t7, 1f
	 nop
	sll	v1, sum, 8
	srl	sum, sum, 8
	or	sum, v1
	andi	sum, 0xffff
1:
	.set	reorder
	/* Add the passed partial csum.  */
	ADDC(sum, a2)
	jr	ra
	 move	v0, sum
	.set	noreorder
	END(csum_partial)