Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f91a8dcc authored by Nicolas Pitre's avatar Nicolas Pitre Committed by Lennert Buytenhek
Browse files

[ARM] cache align memset and memzero



This is a natural extension following the previous patch.
Non Feroceon based targets are unchanged.

Signed-off-by: default avatarNicolas Pitre <nico@marvell.com>
Signed-off-by: default avatarLennert Buytenhek <buytenh@marvell.com>
parent 2239aff6
Loading
Loading
Loading
Loading
+46 −0
Original line number Diff line number Diff line
@@ -39,6 +39,9 @@ ENTRY(memset)
	mov	r3, r1
	cmp	r2, #16
	blt	4f

#if ! CALGN(1)+0

/*
 * We need an extra register for this loop - save the return address and
 * use the LR
@@ -64,6 +67,49 @@ ENTRY(memset)
	stmneia	r0!, {r1, r3, ip, lr}
	ldr	lr, [sp], #4

#else

/*
 * This version aligns the destination pointer in order to write
 * whole cache lines at once.
 */

	stmfd	sp!, {r4-r7, lr}
	mov	r4, r1
	mov	r5, r1
	mov	r6, r1
	mov	r7, r1
	mov	ip, r1
	mov	lr, r1

	cmp	r2, #96
	tstgt	r0, #31
	ble	3f

	and	ip, r0, #31
	rsb	ip, ip, #32
	sub	r2, r2, ip
	movs	ip, ip, lsl #(32 - 4)
	stmcsia	r0!, {r4, r5, r6, r7}
	stmmiia	r0!, {r4, r5}
	tst	ip, #(1 << 30)
	mov	ip, r1
	strne	r1, [r0], #4

3:	subs	r2, r2, #64
	stmgeia	r0!, {r1, r3-r7, ip, lr}
	stmgeia	r0!, {r1, r3-r7, ip, lr}
	bgt	3b
	ldmeqfd	sp!, {r4-r7, pc}

	tst	r2, #32
	stmneia	r0!, {r1, r3-r7, ip, lr}
	tst	r2, #16
	stmneia	r0!, {r4-r7}
	ldmfd	sp!, {r4-r7, lr}

#endif

4:	tst	r2, #8
	stmneia	r0!, {r1, r3}
	tst	r2, #4
+44 −0
Original line number Diff line number Diff line
@@ -39,6 +39,9 @@ ENTRY(__memzero)
 */
	cmp	r1, #16			@ 1 we can skip this chunk if we
	blt	4f			@ 1 have < 16 bytes

#if ! CALGN(1)+0

/*
 * We need an extra register for this loop - save the return address and
 * use the LR
@@ -64,6 +67,47 @@ ENTRY(__memzero)
	stmneia	r0!, {r2, r3, ip, lr}	@ 4
	ldr	lr, [sp], #4		@ 1

#else

/*
 * This version aligns the destination pointer in order to write
 * whole cache lines at once.
 */

	stmfd	sp!, {r4-r7, lr}
	mov	r4, r2
	mov	r5, r2
	mov	r6, r2
	mov	r7, r2
	mov	ip, r2
	mov	lr, r2

	cmp	r1, #96
	andgts	ip, r0, #31
	ble	3f

	rsb	ip, ip, #32
	sub	r1, r1, ip
	movs	ip, ip, lsl #(32 - 4)
	stmcsia	r0!, {r4, r5, r6, r7}
	stmmiia	r0!, {r4, r5}
	movs	ip, ip, lsl #2
	strcs	r2, [r0], #4

3:	subs	r1, r1, #64
	stmgeia	r0!, {r2-r7, ip, lr}
	stmgeia	r0!, {r2-r7, ip, lr}
	bgt	3b
	ldmeqfd	sp!, {r4-r7, pc}

	tst	r1, #32
	stmneia	r0!, {r2-r7, ip, lr}
	tst	r1, #16
	stmneia	r0!, {r4-r7}
	ldmfd	sp!, {r4-r7, lr}

#endif

4:	tst	r1, #8			@ 1 8 bytes or more?
	stmneia	r0!, {r2, r3}		@ 2
	tst	r1, #4			@ 1 4 bytes or more?