Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca15c8ec authored by Vineet Gupta's avatar Vineet Gupta
Browse files

ARC: Checksum/byteorder/swab routines



TBD: do_csum still needs to be written in asm

Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
Acked-by: default avatarArnd Bergmann <arnd@arndb.de>
parent 64e69073
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#ifndef __ASM_ARC_BYTEORDER_H
#define __ASM_ARC_BYTEORDER_H

#ifdef CONFIG_CPU_BIG_ENDIAN
#include <linux/byteorder/big_endian.h>
#else
#include <linux/byteorder/little_endian.h>
#endif

#endif /* ASM_ARC_BYTEORDER_H */
+101 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * Joern Rennecke  <joern.rennecke@embecosm.com>: Jan 2012
 *  -Insn Scheduling improvements to csum core routines.
 *      = csum_fold( ) largely derived from ARM version.
 *      = ip_fast_cum( ) to have module scheduling
 *  -gcc 4.4.x broke networking. Alias analysis needed to be primed.
 *   worked around by adding memory clobber to ip_fast_csum( )
 *
 * vineetg: May 2010
 *  -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
 */

#ifndef _ASM_ARC_CHECKSUM_H
#define _ASM_ARC_CHECKSUM_H

/*
 *	Fold a partial checksum
 *
 *  The 2 swords comprising the 32bit sum are added, any carry to 16th bit
 *  added back and final sword result inverted.
 */
static inline __sum16 csum_fold(__wsum s)
{
	unsigned r = s << 16 | s >> 16;	/* ror */
	s = ~s;
	s -= r;
	return s >> 16;
}

/*
 *	This is a version of ip_compute_csum() optimized for IP headers,
 *	which always checksum on 4 octet boundaries.
 */
static inline __sum16
ip_fast_csum(const void *iph, unsigned int ihl)
{
	const void *ptr = iph;
	unsigned int tmp, tmp2, sum;

	__asm__(
	"	ld.ab  %0, [%3, 4]		\n"
	"	ld.ab  %2, [%3, 4]		\n"
	"	sub    %1, %4, 2		\n"
	"	lsr.f  lp_count, %1, 1		\n"
	"	bcc    0f			\n"
	"	add.f  %0, %0, %2		\n"
	"	ld.ab  %2, [%3, 4]		\n"
	"0:	lp     1f			\n"
	"	ld.ab  %1, [%3, 4]		\n"
	"	adc.f  %0, %0, %2		\n"
	"	ld.ab  %2, [%3, 4]		\n"
	"	adc.f  %0, %0, %1		\n"
	"1:	adc.f  %0, %0, %2		\n"
	"	add.cs %0,%0,1			\n"
	: "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
	: "r"(ihl)
	: "cc", "lp_count", "memory");

	return csum_fold(sum);
}

/*
 * TCP pseudo Header is 12 bytes:
 * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
 */
static inline __wsum
csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
		   unsigned short proto, __wsum sum)
{
	__asm__ __volatile__(
	"	add.f %0, %0, %1	\n"
	"	adc.f %0, %0, %2	\n"
	"	adc.f %0, %0, %3	\n"
	"	adc.f %0, %0, %4	\n"
	"	adc   %0, %0, 0		\n"
	: "+&r"(sum)
	: "r"(saddr), "r"(daddr),
#ifdef CONFIG_CPU_BIG_ENDIAN
	  "r"(len),
#else
	  "r"(len << 8),
#endif
	  "r"(htons(proto))
	: "cc");

	return sum;
}

#define csum_fold csum_fold
#define ip_fast_csum ip_fast_csum
#define csum_tcpudp_nofold csum_tcpudp_nofold

#include <asm-generic/checksum.h>

#endif /* _ASM_ARC_CHECKSUM_H */
+98 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * vineetg: May 2011
 *  -Support single cycle endian-swap insn in ARC700 4.10
 *
 * vineetg: June 2009
 *  -Better htonl implementation (5 instead of 9 ALU instructions)
 *  -Hardware assisted single cycle bswap (Use Case of ARC custom instrn)
 */

#ifndef __ASM_ARC_SWAB_H
#define __ASM_ARC_SWAB_H

#include <linux/types.h>

/* Native single cycle endian swap insn */
#ifdef CONFIG_ARC_HAS_SWAPE

#define __arch_swab32(x)		\
({					\
	unsigned int tmp = x;		\
	__asm__(			\
	"	swape	%0, %1	\n"	\
	: "=r" (tmp)			\
	: "r" (tmp));			\
	tmp;				\
})

#else

/* Several ways of Endian-Swap Emulation for ARC
 * 0: kernel generic
 * 1: ARC optimised "C"
 * 2: ARC Custom instruction
 */
#define ARC_BSWAP_TYPE	1

#if (ARC_BSWAP_TYPE == 1)		/******* Software only ********/

/* The kernel default implementation of htonl is
 *		return  x<<24 | x>>24 |
 *		 (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8;
 *
 * This generates 9 instructions on ARC (excluding the ld/st)
 *
 * 8051fd8c:	ld     r3,[r7,20]	; Mem op : Get the value to be swapped
 * 8051fd98:	asl    r5,r3,24		; get  3rd Byte
 * 8051fd9c:	lsr    r2,r3,24		; get  0th Byte
 * 8051fda0:	and    r4,r3,0xff00
 * 8051fda8:	asl    r4,r4,8		; get 1st Byte
 * 8051fdac:	and    r3,r3,0x00ff0000
 * 8051fdb4:	or     r2,r2,r5		; combine 0th and 3rd Bytes
 * 8051fdb8:	lsr    r3,r3,8		; 2nd Byte at correct place in Dst Reg
 * 8051fdbc:	or     r2,r2,r4		; combine 0,3 Bytes with 1st Byte
 * 8051fdc0:	or     r2,r2,r3		; combine 0,3,1 Bytes with 2nd Byte
 * 8051fdc4:	st     r2,[r1,20]	; Mem op : save result back to mem
 *
 * Joern suggested a better "C" algorithm which is great since
 * (1) It is portable to any architecure
 * (2) At the same time it takes advantage of ARC ISA (rotate intrns)
 */

#define __arch_swab32(x)					\
({	unsigned long __in = (x), __tmp;			\
	__tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */	\
	__in = __in << 24 | __in >> 8; /* ror in,in,8 */	\
	__tmp ^= __in;						\
	__tmp &= 0xff00ff;					\
	__tmp ^ __in;						\
})

#elif (ARC_BSWAP_TYPE == 2)	/* Custom single cycle bwap instruction */

#define __arch_swab32(x)						\
({									\
	unsigned int tmp = x;						\
	__asm__(							\
	"	.extInstruction	bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP	\n"\
	"	bswap  %0, %1						\n"\
	: "=r" (tmp)							\
	: "r" (tmp));							\
	tmp;								\
})

#endif /* ARC_BSWAP_TYPE=zzz */

#endif /* CONFIG_ARC_HAS_SWAPE */

#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
#define __SWAB_64_THRU_32__
#endif

#endif