Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4bcaf195 authored by Hong-Mei Li's avatar Hong-Mei Li Committed by Andrea
Browse files

arm64: lib: memory utilities optimization



Optimize memcpy and memmove, to prefetch several cache lines.
We can achieve 15% memcpy speed improvement with the preload method.

Change-Id: I2259b98a33eba0b7466920b3f270f953e609cf13
Signed-off-by: default avatarHong-Mei Li <a21834@motorola.com>
Reviewed-on: http://gerrit.mot.com/740766


SLTApproved: Slta Waiver <sltawvr@motorola.com>
SME-Granted: SME Approvals Granted
Tested-by: default avatarJira Key <jirakey@motorola.com>
Reviewed-by: default avatarZhi-Ming Yuan <a14194@motorola.com>
Submit-Approved: Jira Key <jirakey@motorola.com>
Signed-off-by: default avatarPranav Vashi <neobuddy89@gmail.com>
parent 886cbdc0
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -51,6 +51,7 @@ C_h .req x12
D_l	.req	x13
D_l	.req	x13
D_h	.req	x14
D_h	.req	x14


	prfm    pldl1strm, [src, #(1*L1_CACHE_BYTES)]
	mov	dst, dstin
	mov	dst, dstin
	cmp	count, #16
	cmp	count, #16
	/*When memory length is less than 16, the accessed are not aligned.*/
	/*When memory length is less than 16, the accessed are not aligned.*/
@@ -181,6 +182,7 @@ D_h .req x14
	ldp1	C_l, C_h, src, #16
	ldp1	C_l, C_h, src, #16
	stp1	D_l, D_h, dst, #16
	stp1	D_l, D_h, dst, #16
	ldp1	D_l, D_h, src, #16
	ldp1	D_l, D_h, src, #16
	prfm    pldl1strm, [src, #(4*L1_CACHE_BYTES)]
	subs	count, count, #64
	subs	count, count, #64
	b.ge	1b
	b.ge	1b
	stp1	A_l, A_h, dst, #16
	stp1	A_l, A_h, dst, #16
+2 −0
Original line number Original line Diff line number Diff line
@@ -60,6 +60,7 @@ D_h .req x14
	.weak memmove
	.weak memmove
ENTRY(__memmove)
ENTRY(__memmove)
ENTRY(memmove)
ENTRY(memmove)
	prfm    pldl1strm, [src, #L1_CACHE_BYTES]
	cmp	dstin, src
	cmp	dstin, src
	b.lo	__memcpy
	b.lo	__memcpy
	add	tmp1, src, count
	add	tmp1, src, count
@@ -186,6 +187,7 @@ ENTRY(memmove)
	ldp	C_l, C_h, [src, #-48]
	ldp	C_l, C_h, [src, #-48]
	stp	D_l, D_h, [dst, #-64]!
	stp	D_l, D_h, [dst, #-64]!
	ldp	D_l, D_h, [src, #-64]!
	ldp	D_l, D_h, [src, #-64]!
	prfm    pldl1strm, [src, #(4*L1_CACHE_BYTES)]
	subs	count, count, #64
	subs	count, count, #64
	b.ge	1b
	b.ge	1b
	stp	A_l, A_h, [dst, #-16]
	stp	A_l, A_h, [dst, #-16]