Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5210d1e6 authored by Vineet Gupta's avatar Vineet Gupta
Browse files

ARC: String library



Hand optimised asm code for ARC700 pipeline.
Originally written/optimized by Joern Rennecke

Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
Cc: Joern Rennecke <joern.rennecke@embecosm.com>
parent 6e35fa2d
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * vineetg: May 2011
 *  -We had half-optimised memset/memcpy, got better versions of those
 *  -Added memcmp, strchr, strcpy, strcmp, strlen
 *
 * Amit Bhor: Codito Technologies 2004
 */

#ifndef _ASM_ARC_STRING_H
#define _ASM_ARC_STRING_H

#include <linux/types.h>

#ifdef __KERNEL__

#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMCMP
#define __HAVE_ARCH_STRCHR
#define __HAVE_ARCH_STRCPY
#define __HAVE_ARCH_STRCMP
#define __HAVE_ARCH_STRLEN

extern void *memset(void *ptr, int, __kernel_size_t);
extern void *memcpy(void *, const void *, __kernel_size_t);
extern void memzero(void *ptr, __kernel_size_t n);
extern int memcmp(const void *, const void *, __kernel_size_t);
extern char *strchr(const char *s, int c);
extern char *strcpy(char *dest, const char *src);
extern int strcmp(const char *cs, const char *ct);
extern __kernel_size_t strlen(const char *);

#endif /* __KERNEL__ */
#endif /* _ASM_ARC_STRING_H */

arch/arc/lib/memcmp.S

0 → 100644
+124 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <asm/linkage.h>

#ifdef __LITTLE_ENDIAN__
#define WORD2 r2
#define SHIFT r3
#else /* BIG ENDIAN */
#define WORD2 r3
#define SHIFT r2
#endif

ARC_ENTRY memcmp
	or	r12,r0,r1
	asl_s	r12,r12,30
	sub	r3,r2,1
	brls	r2,r12,.Lbytewise
	ld	r4,[r0,0]
	ld	r5,[r1,0]
	lsr.f	lp_count,r3,3
	lpne	.Loop_end
	ld_s	WORD2,[r0,4]
	ld_s	r12,[r1,4]
	brne	r4,r5,.Leven
	ld.a	r4,[r0,8]
	ld.a	r5,[r1,8]
	brne	WORD2,r12,.Lodd
.Loop_end:
	asl_s	SHIFT,SHIFT,3
	bhs_s	.Last_cmp
	brne	r4,r5,.Leven
	ld	r4,[r0,4]
	ld	r5,[r1,4]
#ifdef __LITTLE_ENDIAN__
	nop_s
	; one more load latency cycle
.Last_cmp:
	xor	r0,r4,r5
	bset	r0,r0,SHIFT
	sub_s	r1,r0,1
	bic_s	r1,r1,r0
	norm	r1,r1
	b.d	.Leven_cmp
	and	r1,r1,24
.Leven:
	xor	r0,r4,r5
	sub_s	r1,r0,1
	bic_s	r1,r1,r0
	norm	r1,r1
	; slow track insn
	and	r1,r1,24
.Leven_cmp:
	asl	r2,r4,r1
	asl	r12,r5,r1
	lsr_s	r2,r2,1
	lsr_s	r12,r12,1
	j_s.d	[blink]
	sub	r0,r2,r12
	.balign	4
.Lodd:
	xor	r0,WORD2,r12
	sub_s	r1,r0,1
	bic_s	r1,r1,r0
	norm	r1,r1
	; slow track insn
	and	r1,r1,24
	asl_s	r2,r2,r1
	asl_s	r12,r12,r1
	lsr_s	r2,r2,1
	lsr_s	r12,r12,1
	j_s.d	[blink]
	sub	r0,r2,r12
#else /* BIG ENDIAN */
.Last_cmp:
	neg_s	SHIFT,SHIFT
	lsr	r4,r4,SHIFT
	lsr	r5,r5,SHIFT
	; slow track insn
.Leven:
	sub.f	r0,r4,r5
	mov.ne	r0,1
	j_s.d	[blink]
	bset.cs	r0,r0,31
.Lodd:
	cmp_s	WORD2,r12

	mov_s	r0,1
	j_s.d	[blink]
	bset.cs	r0,r0,31
#endif /* ENDIAN */
	.balign	4
.Lbytewise:
	breq	r2,0,.Lnil
	ldb	r4,[r0,0]
	ldb	r5,[r1,0]
	lsr.f	lp_count,r3
	lpne	.Lbyte_end
	ldb_s	r3,[r0,1]
	ldb	r12,[r1,1]
	brne	r4,r5,.Lbyte_even
	ldb.a	r4,[r0,2]
	ldb.a	r5,[r1,2]
	brne	r3,r12,.Lbyte_odd
.Lbyte_end:
	bcc	.Lbyte_even
	brne	r4,r5,.Lbyte_even
	ldb_s	r3,[r0,1]
	ldb_s	r12,[r1,1]
.Lbyte_odd:
	j_s.d	[blink]
	sub	r0,r3,r12
.Lbyte_even:
	j_s.d	[blink]
	sub	r0,r4,r5
.Lnil:
	j_s.d	[blink]
	mov	r0,0
ARC_EXIT memcmp
+66 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <asm/linkage.h>

ARC_ENTRY memcpy
	or	r3,r0,r1
	asl_s	r3,r3,30
	mov_s	r5,r0
	brls.d	r2,r3,.Lcopy_bytewise
	sub.f	r3,r2,1
	ld_s	r12,[r1,0]
	asr.f	lp_count,r3,3
	bbit0.d	r3,2,.Lnox4
	bmsk_s	r2,r2,1
	st.ab	r12,[r5,4]
	ld.a	r12,[r1,4]
.Lnox4:
	lppnz	.Lendloop
	ld_s	r3,[r1,4]
	st.ab	r12,[r5,4]
	ld.a	r12,[r1,8]
	st.ab	r3,[r5,4]
.Lendloop:
	breq	r2,0,.Last_store
	ld	r3,[r5,0]
#ifdef __LITTLE_ENDIAN__
	add3	r2,-1,r2
	; uses long immediate
	xor_s	r12,r12,r3
	bmsk	r12,r12,r2
    xor_s	r12,r12,r3
#else /* BIG ENDIAN */
	sub3	r2,31,r2
	; uses long immediate
        xor_s	r3,r3,r12
        bmsk	r3,r3,r2
        xor_s	r12,r12,r3
#endif /* ENDIAN */
.Last_store:
	j_s.d	[blink]
	st	r12,[r5,0]

	.balign	4
.Lcopy_bytewise:
	jcs	[blink]
	ldb_s	r12,[r1,0]
	lsr.f	lp_count,r3
	bhs_s	.Lnox1
	stb.ab	r12,[r5,1]
	ldb.a	r12,[r1,1]
.Lnox1:
	lppnz	.Lendbloop
	ldb_s	r3,[r1,1]
	stb.ab	r12,[r5,1]
	ldb.a	r12,[r1,2]
	stb.ab	r3,[r5,1]
.Lendbloop:
	j_s.d	[blink]
	stb	r12,[r5,0]
ARC_EXIT memcpy

arch/arc/lib/memset.S

0 → 100644
+59 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <asm/linkage.h>

#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */

ARC_ENTRY memset
	mov_s	r4,r0
	or	r12,r0,r2
	bmsk.f	r12,r12,1
	extb_s	r1,r1
	asl	r3,r1,8
	beq.d	.Laligned
	or_s	r1,r1,r3
	brls	r2,SMALL,.Ltiny
	add	r3,r2,r0
	stb	r1,[r3,-1]
	bclr_s	r3,r3,0
	stw	r1,[r3,-2]
	bmsk.f	r12,r0,1
	add_s	r2,r2,r12
	sub.ne	r2,r2,4
	stb.ab	r1,[r4,1]
	and	r4,r4,-2
	stw.ab	r1,[r4,2]
	and	r4,r4,-4
.Laligned:	; This code address should be aligned for speed.
	asl	r3,r1,16
	lsr.f	lp_count,r2,2
	or_s	r1,r1,r3
	lpne	.Loop_end
	st.ab	r1,[r4,4]
.Loop_end:
	j_s	[blink]

	.balign	4
.Ltiny:
	mov.f	lp_count,r2
	lpne	.Ltiny_end
	stb.ab	r1,[r4,1]
.Ltiny_end:
	j_s	[blink]
ARC_EXIT memset

; memzero: @r0 = mem, @r1 = size_t
; memset:  @r0 = mem, @r1 = char, @r2 = size_t

ARC_ENTRY memzero
    ; adjust bzero args to memset args
    mov r2, r1
    mov r1, 0
    b  memset    ;tail call so need to tinker with blink
ARC_EXIT memzero
+123 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

/* ARC700 has a relatively long pipeline and branch prediction, so we want
   to avoid branches that are hard to predict.  On the other hand, the
   presence of the norm instruction makes it easier to operate on whole
   words branch-free.  */

#include <asm/linkage.h>

ARC_ENTRY strchr
	extb_s	r1,r1
	asl	r5,r1,8
	bmsk	r2,r0,1
	or	r5,r5,r1
	mov_s	r3,0x01010101
	breq.d	r2,r0,.Laligned
	asl	r4,r5,16
	sub_s	r0,r0,r2
	asl	r7,r2,3
	ld_s	r2,[r0]
#ifdef __LITTLE_ENDIAN__
	asl	r7,r3,r7
#else
	lsr	r7,r3,r7
#endif
	or	r5,r5,r4
	ror	r4,r3
	sub	r12,r2,r7
	bic_s	r12,r12,r2
	and	r12,r12,r4
	brne.d	r12,0,.Lfound0_ua
	xor	r6,r2,r5
	ld.a	r2,[r0,4]
	sub	r12,r6,r7
	bic	r12,r12,r6
	and	r7,r12,r4
	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
	b	.Lfound_char ; Likewise this one.
; /* We require this code address to be unaligned for speed...  */
.Laligned:
	ld_s	r2,[r0]
	or	r5,r5,r4
	ror	r4,r3
; /* ... so that this code address is aligned, for itself and ...  */
.Loop:
	sub	r12,r2,r3
	bic_s	r12,r12,r2
	and	r12,r12,r4
	brne.d	r12,0,.Lfound0
	xor	r6,r2,r5
	ld.a	r2,[r0,4]
	sub	r12,r6,r3
	bic	r12,r12,r6
	and	r7,r12,r4
	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
	; Found searched-for character.  r0 has already advanced to next word.
#ifdef __LITTLE_ENDIAN__
/* We only need the information about the first matching byte
   (i.e. the least significant matching byte) to be exact,
   hence there is no problem with carry effects.  */
.Lfound_char:
	sub	r3,r7,1
	bic	r3,r3,r7
	norm	r2,r3
	sub_s	r0,r0,1
	asr_s	r2,r2,3
	j.d	[blink]
	sub_s	r0,r0,r2

	.balign	4
.Lfound0_ua:
	mov	r3,r7
.Lfound0:
	sub	r3,r6,r3
	bic	r3,r3,r6
	and	r2,r3,r4
	or_s	r12,r12,r2
	sub_s	r3,r12,1
	bic_s	r3,r3,r12
	norm	r3,r3
	add_s	r0,r0,3
	asr_s	r12,r3,3
	asl.f	0,r2,r3
	sub_s	r0,r0,r12
	j_s.d	[blink]
	mov.pl	r0,0
#else /* BIG ENDIAN */
.Lfound_char:
	lsr	r7,r7,7

	bic	r2,r7,r6
	norm	r2,r2
	sub_s	r0,r0,4
	asr_s	r2,r2,3
	j.d	[blink]
	add_s	r0,r0,r2

.Lfound0_ua:
	mov_s	r3,r7
.Lfound0:
	asl_s	r2,r2,7
	or	r7,r6,r4
	bic_s	r12,r12,r2
	sub	r2,r7,r3
	or	r2,r2,r6
	bic	r12,r2,r12
	bic.f	r3,r4,r12
	norm	r3,r3

	add.pl	r3,r3,1
	asr_s	r12,r3,3
	asl.f	0,r2,r3
	add_s	r0,r0,r12
	j_s.d	[blink]
	mov.mi	r0,0
#endif /* ENDIAN */
ARC_EXIT strchr
Loading