Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26c5e07d authored by Steven J. Hill's avatar Steven J. Hill Committed by Ralf Baechle
Browse files

MIPS: microMIPS: Optimise 'memset' core library function.



Optimise 'memset' to use microMIPS instructions and/or optimisations
for binary size reduction. When the microMIPS ISA is not being used,
the library function compiles to the original binary code.

Signed-off-by: default avatarSteven J. Hill <Steven.Hill@imgtec.com>
parent bce86083
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -296,6 +296,7 @@ symbol = value
#define LONG_SUBU	subu
#define LONG_L		lw
#define LONG_S		sw
#define LONG_SP		swp
#define LONG_SLL	sll
#define LONG_SLLV	sllv
#define LONG_SRL	srl
@@ -318,6 +319,7 @@ symbol = value
#define LONG_SUBU	dsubu
#define LONG_L		ld
#define LONG_S		sd
#define LONG_SP		sdp
#define LONG_SLL	dsll
#define LONG_SLLV	dsllv
#define LONG_SRL	dsrl
+54 −30
Original line number Diff line number Diff line
@@ -5,7 +5,8 @@
 *
 * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
 * Copyright (C) 2007  Maciej W. Rozycki
 * Copyright (C) 2007 by Maciej W. Rozycki
 * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
 */
#include <asm/asm.h>
#include <asm/asm-offsets.h>
@@ -19,6 +20,20 @@
#define LONG_S_R sdr
#endif

#ifdef CONFIG_CPU_MICROMIPS
#define STORSIZE (LONGSIZE * 2)
#define STORMASK (STORSIZE - 1)
#define FILL64RG t8
#define FILLPTRG t7
#undef  LONG_S
#define LONG_S LONG_SP
#else
#define STORSIZE LONGSIZE
#define STORMASK LONGMASK
#define FILL64RG a1
#define FILLPTRG t0
#endif

#define EX(insn,reg,addr,handler)			\
9:	insn	reg, addr;				\
	.section __ex_table,"a";			\
@@ -26,23 +41,25 @@
	.previous

	.macro	f_fill64 dst, offset, val, fixup
	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
#if LONGSIZE == 4
	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  0 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  1 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  2 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  3 * STORSIZE)(\dst), \fixup)
#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
	EX(LONG_S, \val, (\offset +  4 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  5 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  6 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  7 * STORSIZE)(\dst), \fixup)
#endif
#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
	EX(LONG_S, \val, (\offset +  8 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset +  9 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
	EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
#endif
	.endm

@@ -71,16 +88,20 @@ LEAF(memset)
1:

FEXPORT(__bzero)
	sltiu		t0, a2, LONGSIZE	/* very small region? */
	sltiu		t0, a2, STORSIZE	/* very small region? */
	bnez		t0, .Lsmall_memset
	 andi		t0, a0, LONGMASK	/* aligned? */
	 andi		t0, a0, STORMASK	/* aligned? */

#ifdef CONFIG_CPU_MICROMIPS
	move		t8, a1			/* used by 'swp' instruction */
	move		t9, a1
#endif
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	beqz		t0, 1f
	 PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */
	 PTR_SUBU	t0, STORSIZE		/* alignment in bytes */
#else
	.set		noat
	li		AT, LONGSIZE
	li		AT, STORSIZE
	beqz		t0, 1f
	 PTR_SUBU	t0, AT			/* alignment in bytes */
	.set		at
@@ -99,24 +120,27 @@ FEXPORT(__bzero)
1:	ori		t1, a2, 0x3f		/* # of full blocks */
	xori		t1, 0x3f
	beqz		t1, .Lmemset_partial	/* no block to fill */
	 andi		t0, a2, 0x40-LONGSIZE
	 andi		t0, a2, 0x40-STORSIZE

	PTR_ADDU	t1, a0			/* end address */
	.set		reorder
1:	PTR_ADDIU	a0, 64
	R10KCBARRIER(0(ra))
	f_fill64 a0, -64, a1, .Lfwd_fixup
	f_fill64 a0, -64, FILL64RG, .Lfwd_fixup
	bne		t1, a0, 1b
	.set		noreorder

.Lmemset_partial:
	R10KCBARRIER(0(ra))
	PTR_LA		t1, 2f			/* where to start */
#ifdef CONFIG_CPU_MICROMIPS
	LONG_SRL	t7, t0, 1
#endif
#if LONGSIZE == 4
	PTR_SUBU	t1, t0
	PTR_SUBU	t1, FILLPTRG
#else
	.set		noat
	LONG_SRL		AT, t0, 1
	LONG_SRL	AT, FILLPTRG, 1
	PTR_SUBU	t1, AT
	.set		at
#endif
@@ -126,9 +150,9 @@ FEXPORT(__bzero)
	.set		push
	.set		noreorder
	.set		nomacro
	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */
	f_fill64 a0, -64, FILL64RG, .Lpartial_fixup	/* ... but first do longs ... */
2:	.set		pop
	andi		a2, LONGMASK		/* At most one long to go */
	andi		a2, STORMASK		/* At most one long to go */

	beqz		a2, 1f
	 PTR_ADDU	a0, a2			/* What's left */
@@ -169,7 +193,7 @@ FEXPORT(__bzero)

.Lpartial_fixup:
	PTR_L		t0, TI_TASK($28)
	andi		a2, LONGMASK
	andi		a2, STORMASK
	LONG_L		t0, THREAD_BUADDR(t0)
	LONG_ADDU	a2, t1
	jr		ra
@@ -177,4 +201,4 @@ FEXPORT(__bzero)

.Llast_fixup:
	jr		ra
	 andi		v1, a2, LONGMASK
	 andi		v1, a2, STORMASK