CRIS v32: Update lib/checksum.S and lib/checksumcopy.S (41f9412b) · Commits · e / devices / android_kernel_teracube_emerald

arch/cris/arch-v32/lib/checksum.S

+23 −49

Original line number	Diff line number	Diff line
		/*
		* A fast checksum routine using movem
		* Copyright (c) 1998-2001, 2003 Axis Communications AB
		* Copyright (c) 1998-2007 Axis Communications AB
		*
		* csum_partial(const unsigned char * buff, int len, unsigned int sum)
		*/
		@@ -12,30 +12,23 @@ csum_partial:
		;; r11 - length
		;; r12 - checksum

		;; check for breakeven length between movem and normal word looping versions
		;; we also do _NOT_ want to compute a checksum over more than the
		;; actual length when length < 40

		cmpu.w 80,$r11
		blo _word_loop
		nop

		;; need to save the registers we use below in the movem loop
		;; this overhead is why we have a check above for breakeven length
		;; only r0 - r8 have to be saved, the other ones are clobber-able
		;; according to the ABI
		;; Optimized for large packets
		subq 10*4, $r11
		blt _word_loop
		move.d $r11, $acr

		subq 9*4,$sp
		subq 10*4,$r11 ; update length for the first loop
		clearf c
		movem $r8,[$sp]

		;; do a movem checksum

		_mloop: movem [$r10+],$r9 ; read 10 longwords

		;; Loop count without touching the c flag.
		addoq -10*4, $acr, $acr
		;; perform dword checksumming on the 10 longwords

		add.d $r0,$r12
		addc $r0,$r12
		addc $r1,$r12
		addc $r2,$r12
		addc $r3,$r12
		@@ -46,28 +39,19 @@ _mloop: movem [$r10+],$r9 ; read 10 longwords
		addc $r8,$r12
		addc $r9,$r12

		;; fold the carry into the checksum, to avoid having to loop the carry
		;; back into the top
		;; test $acr without trashing carry.
		move.d $acr, $acr
		bpl _mloop
		;; r11 <= acr is not really needed in the mloop, just using the dslot
		;; to prepare for what is needed after mloop.
		move.d $acr, $r11

		;; fold the last carry into r13
		addc 0, $r12
		addc 0,$r12 ; do it again, since we might have generated a carry

		subq 10*4,$r11
		bge _mloop
		nop

		addq 10*4,$r11 ; compensate for last loop underflowing length

		movem [$sp+],$r8 ; restore regs

		_word_loop:
		;; only fold if there is anything to fold.

		cmpq 0,$r12
		beq _no_fold

		;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
		;; r9 and r13 can be used as temporaries.
		addq 10*4,$r11 ; compensate for last loop underflowing length

		moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
		lsrq 16,$r9
		@@ -75,31 +59,21 @@ _word_loop:
		move.d $r12,$r13
		lsrq 16,$r13 ; r13 = checksum >> 16
		and.d $r9,$r12 ; checksum = checksum & 0xffff
		add.d $r13,$r12 ; checksum += r13
		move.d $r12,$r13 ; do the same again, maybe we got a carry last add
		lsrq 16,$r13
		and.d $r9,$r12
		add.d $r13,$r12

		_no_fold:
		cmpq 2,$r11
		subq 2,$r11
		blt _no_words
		nop
		add.d $r13,$r12 ; checksum += r13

		;; checksum the rest of the words

		subq 2,$r11

		_wloop: subq 2,$r11
		bge _wloop
		addu.w [$r10+],$r12

		addq 2,$r11

		_no_words:
		addq 2,$r11
		;; see if we have one odd byte more
		cmpq 1,$r11
		beq _do_byte
		bne _do_byte
		nop
		ret
		move.d $r12,$r10

arch/cris/arch-v32/lib/checksumcopy.S

+20 −49

Original line number	Diff line number	Diff line
		/*
		* A fast checksum+copy routine using movem
		* Copyright (c) 1998, 2001, 2003 Axis Communications AB
		* Copyright (c) 1998-2007 Axis Communications AB
		*
		* Authors: Bjorn Wesen
		*
		@@ -16,32 +16,23 @@ csum_partial_copy_nocheck:
		;; r12 - length
		;; r13 - checksum

		;; check for breakeven length between movem and normal word looping versions
		;; we also do _NOT_ want to compute a checksum over more than the
		;; actual length when length < 40

		cmpu.w 80,$r12
		blo _word_loop
		nop

		;; need to save the registers we use below in the movem loop
		;; this overhead is why we have a check above for breakeven length
		;; only r0 - r8 have to be saved, the other ones are clobber-able
		;; according to the ABI
		;; Optimized for large packets
		subq 10*4, $r12
		blt _word_loop
		move.d $r12, $acr

		subq 9*4,$sp
		subq 10*4,$r12 ; update length for the first loop
		clearf c
		movem $r8,[$sp]

		;; do a movem copy and checksum

		1: ;; A failing userspace access (the read) will have this as PC.
		_mloop: movem [$r10+],$r9 ; read 10 longwords
		addoq -10*4, $acr, $acr ; loop counter in latency cycle
		movem $r9,[$r11+] ; write 10 longwords

		;; perform dword checksumming on the 10 longwords

		add.d $r0,$r13
		addc $r0,$r13
		addc $r1,$r13
		addc $r2,$r13
		addc $r3,$r13
		@@ -52,47 +43,30 @@ _mloop: movem [$r10+],$r9 ; read 10 longwords
		addc $r8,$r13
		addc $r9,$r13

		;; fold the carry into the checksum, to avoid having to loop the carry
		;; back into the top
		;; test $acr, without trashing carry.
		move.d $acr, $acr
		bpl _mloop
		;; r12 <= acr is needed after mloop and in the exception handlers.
		move.d $acr, $r12

		;; fold the last carry into r13
		addc 0, $r13
		addc 0,$r13 ; do it again, since we might have generated a carry

		subq 10*4,$r12
		bge _mloop
		nop

		addq 10*4,$r12 ; compensate for last loop underflowing length

		movem [$sp+],$r8 ; restore regs

		_word_loop:
		;; only fold if there is anything to fold.

		cmpq 0,$r13
		beq _no_fold
		addq 10*4,$r12 ; compensate for last loop underflowing length

		;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
		;; r9 can be used as temporary.

		move.d $r13,$r9
		lsrq 16,$r9 ; r0 = checksum >> 16
		and.d 0xffff,$r13 ; checksum = checksum & 0xffff
		add.d $r9,$r13 ; checksum += r0
		move.d $r13,$r9 ; do the same again, maybe we got a carry last add
		lsrq 16,$r9
		and.d 0xffff,$r13
		add.d $r9,$r13

		_no_fold:
		cmpq 2,$r12
		subq 2, $r12
		blt _no_words
		nop
		add.d $r9,$r13 ; checksum += r0

		;; copy and checksum the rest of the words

		subq 2,$r12

		2: ;; A failing userspace access for the read below will have this as PC.
		_wloop: move.w [$r10+],$r9
		addu.w $r9,$r13
		@@ -100,12 +74,9 @@ _wloop: move.w [$r10+],$r9
		bge _wloop
		move.w $r9,[$r11+]

		addq 2,$r12

		_no_words:
		;; see if we have one odd byte more
		cmpq 1,$r12
		beq _do_byte
		addq 2,$r12
		bne _do_byte
		nop
		ret
		move.d $r13,$r10