MN10300: Optimise do_csum() (ab244c1a) · Commits · e / devices / android_kernel_fairphone_FP4

arch/mn10300/lib/do_csum.S

+22 −27

Original line number	Diff line number	Diff line
		@@ -15,21 +15,20 @@

		###############################################################################
		#
		# unsigned int do_csum(const unsigned char *buff, size_t len)
		# unsigned int do_csum(const unsigned char *buff, int len)
		#
		###############################################################################
		.globl do_csum
		.type do_csum,@function
		do_csum:
		movm [d2,d3],(sp)
		mov d0,(12,sp)
		mov d1,(16,sp)
		mov d1,d2 # count
		mov d0,a0 # buff
		mov a0,a1
		clr d1 # accumulator

		cmp +0,d2
		beq do_csum_done # return if zero-length buffer
		ble do_csum_done # check for zero length or negative

		# 4-byte align the buffer pointer
		btst +3,a0
		@@ -41,17 +40,15 @@ do_csum:
		inc a0
		asl +8,d0
		add d0,d1
		addc +0,d1
		add -1,d2
		do_csum_addr_not_odd:

		do_csum_addr_not_odd:
		cmp +2,d2
		bcs do_csum_fewer_than_4
		btst +2,a0
		beq do_csum_now_4b_aligned
		movhu (a0+),d0
		add d0,d1
		addc +0,d1
		add -2,d2
		cmp +4,d2
		bcs do_csum_fewer_than_4
		@@ -66,20 +63,20 @@ do_csum_now_4b_aligned:

		do_csum_loop:
		mov (a0+),d0
		add d0,d1
		mov (a0+),e0
		addc e0,d1
		mov (a0+),e1
		addc e1,d1
		mov (a0+),e3
		add d0,d1
		addc e0,d1
		addc e1,d1
		addc e3,d1
		mov (a0+),d0
		addc d0,d1
		mov (a0+),e0
		addc e0,d1
		mov (a0+),e1
		addc e1,d1
		mov (a0+),e3
		addc d0,d1
		addc e0,d1
		addc e1,d1
		addc e3,d1
		addc +0,d1

		@@ -94,12 +91,12 @@ do_csum_remainder:
		cmp +16,d2
		bcs do_csum_fewer_than_16
		mov (a0+),d0
		add d0,d1
		mov (a0+),e0
		addc e0,d1
		mov (a0+),e1
		addc e1,d1
		mov (a0+),e3
		add d0,d1
		addc e0,d1
		addc e1,d1
		addc e3,d1
		addc +0,d1
		add -16,d2
		@@ -131,9 +128,9 @@ do_csum_fewer_than_4:
		xor_cmp d0,d0,+2,d2
		bcs do_csum_fewer_than_2
		movhu (a0+),d0
		do_csum_fewer_than_2:
		and +1,d2
		beq do_csum_add_last_bit
		do_csum_fewer_than_2:
		movbu (a0),d3
		add d3,d0
		do_csum_add_last_bit:
		@@ -142,21 +139,19 @@ do_csum_add_last_bit:

		do_csum_done:
		# compress the checksum down to 16 bits
		mov +0xffff0000,d2
		and d1,d2
		mov +0xffff0000,d0
		and d1,d0
		asl +16,d1
		add d2,d1,d0
		add d1,d0
		addc +0xffff,d0
		lsr +16,d0

		# flip the halves of the word result if the buffer was oddly aligned
		mov (12,sp),d1
		and +1,d1
		and +1,a1
		beq do_csum_not_oddly_aligned
		swaph d0,d0 # exchange bits 15:8 with 7:0

		do_csum_not_oddly_aligned:
		ret [d2,d3],8

		do_csum_end:
		.size do_csum, do_csum_end-do_csum
		.size do_csum, .-do_csum