Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 112908b6 authored by Danny Lin's avatar Danny Lin Committed by Gagan Malvi
Browse files

arm64: lse: Prefetch operands to speed up atomic operations



On a Kryo 485 CPU (semi-custom Cortex-A76 derivative) in a Snapdragon
855 (SM8150) SoC, switching from traditional LL/SC atomics to LSE
causes LKDTM's ATOMIC_TIMING test to regress by 2x:

LL/SC ATOMIC_TIMING:    34.14s  34.08s
LSE ATOMIC_TIMING:      70.84s  71.06s

Prefetching the target operands fixes the regression and makes LSE
perform better than LSE as expected:

LSE+prfm ATOMIC_TIMING: 21.36s  21.21s

"dd if=/dev/zero of=/dev/null count=10000000" also runs faster:
    LL/SC:  3.3 3.2 3.3 s
    LSE:    3.1 3.2 3.2 s
    LSE+p:  2.3 2.3 2.3 s

Commit 0ea366f5 applied the same change
to LL/SC atomics, but it was never ported to LSE.

Signed-off-by: default avatarDanny Lin <danny@kdrag0n.dev>
parent af776024
Loading
Loading
Loading
Loading
+48 −19
Original line number Diff line number Diff line
@@ -32,7 +32,12 @@ static inline void atomic_##op(int i, atomic_t *v) \
	register int w0 asm ("w0") = i;					\
	register atomic_t *x1 asm ("x1") = v;				\
									\
	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op),		\
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC(op)						\
	__nops(1),							\
	/* LSE atomics */						\
"	prfm	pstl1strm, %[v]\n"					\
"	" #asm_op "	%w[i], %[v]\n")					\
	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
	: "r" (x1)							\
@@ -54,8 +59,10 @@ static inline int atomic_fetch_##op##name(int i, atomic_t *v) \
									\
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC(fetch_##op##name),				\
	__LL_SC_ATOMIC(fetch_##op##name)				\
	__nops(1),							\
	/* LSE atomics */						\
"	prfm	pstl1strm, %[v]\n"					\
"	" #asm_op #mb "	%w[i], %w[i], %[v]")				\
	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
	: "r" (x1)							\
@@ -87,8 +94,9 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC(add_return##name)				\
	__nops(1),							\
	__nops(2),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
	"	add	%w[i], %w[i], w30")				\
	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
@@ -113,8 +121,9 @@ static inline void atomic_and(int i, atomic_t *v)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	__LL_SC_ATOMIC(and)
	__nops(1),
	__nops(2),
	/* LSE atomics */
	"	prfm	pstl1strm, %[v]\n"
	"	mvn	%w[i], %w[i]\n"
	"	stclr	%w[i], %[v]")
	: [i] "+&r" (w0), [v] "+Q" (v->counter)
@@ -131,8 +140,9 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC(fetch_and##name)					\
	__nops(1),							\
	__nops(2),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	mvn	%w[i], %w[i]\n"					\
	"	ldclr" #mb "	%w[i], %w[i], %[v]")			\
	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
@@ -157,8 +167,9 @@ static inline void atomic_sub(int i, atomic_t *v)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	__LL_SC_ATOMIC(sub)
	__nops(1),
	__nops(2),
	/* LSE atomics */
	"	prfm	pstl1strm, %[v]\n"
	"	neg	%w[i], %w[i]\n"
	"	stadd	%w[i], %[v]")
	: [i] "+&r" (w0), [v] "+Q" (v->counter)
@@ -175,8 +186,9 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC(sub_return##name)				\
	__nops(2),							\
	__nops(3),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	neg	%w[i], %w[i]\n"					\
	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
	"	add	%w[i], %w[i], w30")				\
@@ -203,8 +215,9 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC(fetch_sub##name)					\
	__nops(1),							\
	__nops(2),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	neg	%w[i], %w[i]\n"					\
	"	ldadd" #mb "	%w[i], %w[i], %[v]")			\
	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
@@ -229,7 +242,12 @@ static inline void atomic64_##op(long i, atomic64_t *v) \
	register long x0 asm ("x0") = i;				\
	register atomic64_t *x1 asm ("x1") = v;				\
									\
	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),	\
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC64(op)						\
	__nops(1),							\
	/* LSE atomics */						\
"	prfm	pstl1strm, %[v]\n"					\
"	" #asm_op "	%[i], %[v]\n")					\
	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
	: "r" (x1)							\
@@ -251,8 +269,10 @@ static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \
									\
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC64(fetch_##op##name),				\
	__LL_SC_ATOMIC64(fetch_##op##name)				\
	__nops(1),							\
	/* LSE atomics */						\
"	prfm	pstl1strm, %[v]\n"					\
"	" #asm_op #mb "	%[i], %[i], %[v]")				\
	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
	: "r" (x1)							\
@@ -284,8 +304,9 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC64(add_return##name)				\
	__nops(1),							\
	__nops(2),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
	"	add	%[i], %[i], x30")				\
	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
@@ -310,8 +331,9 @@ static inline void atomic64_and(long i, atomic64_t *v)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	__LL_SC_ATOMIC64(and)
	__nops(1),
	__nops(2),
	/* LSE atomics */
	"	prfm	pstl1strm, %[v]\n"
	"	mvn	%[i], %[i]\n"
	"	stclr	%[i], %[v]")
	: [i] "+&r" (x0), [v] "+Q" (v->counter)
@@ -328,8 +350,9 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC64(fetch_and##name)				\
	__nops(1),							\
	__nops(2),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	mvn	%[i], %[i]\n"					\
	"	ldclr" #mb "	%[i], %[i], %[v]")			\
	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
@@ -354,8 +377,9 @@ static inline void atomic64_sub(long i, atomic64_t *v)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	__LL_SC_ATOMIC64(sub)
	__nops(1),
	__nops(2),
	/* LSE atomics */
	"	prfm	pstl1strm, %[v]\n"
	"	neg	%[i], %[i]\n"
	"	stadd	%[i], %[v]")
	: [i] "+&r" (x0), [v] "+Q" (v->counter)
@@ -372,8 +396,9 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC64(sub_return##name)				\
	__nops(2),							\
	__nops(3),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	neg	%[i], %[i]\n"					\
	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
	"	add	%[i], %[i], x30")				\
@@ -400,8 +425,9 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_ATOMIC64(fetch_sub##name)				\
	__nops(1),							\
	__nops(2),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	neg	%[i], %[i]\n"					\
	"	ldadd" #mb "	%[i], %[i], %[v]")			\
	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
@@ -425,8 +451,9 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	__LL_SC_ATOMIC64(dec_if_positive)
	__nops(6),
	__nops(7),
	/* LSE atomics */
	"	prfm	pstl1strm, %[v]\n"
	"1:	ldr	x30, %[v]\n"
	"	subs	%[ret], x30, #1\n"
	"	b.lt	2f\n"
@@ -458,8 +485,9 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_CMPXCHG(name)						\
	__nops(2),							\
	__nops(3),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	mov	" #w "30, %" #w "[old]\n"			\
	"	cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n"		\
	"	mov	%" #w "[ret], " #w "30")			\
@@ -510,8 +538,9 @@ static inline long __cmpxchg_double##name(unsigned long old1, \
	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
	/* LL/SC */							\
	__LL_SC_CMPXCHG_DBL(name)					\
	__nops(3),							\
	__nops(4),							\
	/* LSE atomics */						\
	"	prfm	pstl1strm, %[v]\n"				\
	"	casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
	"	eor	%[old1], %[old1], %[oldval1]\n"			\
	"	eor	%[old2], %[old2], %[oldval2]\n"			\