Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0ea366f5 authored by Will Deacon's avatar Will Deacon
Browse files

arm64: atomics: prefetch the destination word for write prior to stxr



The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch makes use of prfm to prefetch cachelines for write prior to
ldxr/stxr loops when using the ll/sc atomic routines.

Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent a82e6238
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
	int result;							\
									\
	asm volatile("// atomic_" #op "\n"				\
"	prfm	pstl1strm, %2\n"					\
"1:	ldxr	%w0, %2\n"						\
"	" #asm_op "	%w0, %w0, %w3\n"				\
"	stxr	%w1, %w0, %2\n"						\
@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
	int result;							\
									\
	asm volatile("// atomic_" #op "_return\n"			\
"	prfm	pstl1strm, %2\n"					\
"1:	ldxr	%w0, %2\n"						\
"	" #asm_op "	%w0, %w0, %w3\n"				\
"	stlxr	%w1, %w0, %2\n"						\
@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
	int oldval;

	asm volatile("// atomic_cmpxchg\n"
"	prfm	pstl1strm, %2\n"
"1:	ldxr	%w1, %2\n"
"	eor	%w0, %w1, %w3\n"
"	cbnz	%w0, 2f\n"
@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
	unsigned long tmp;						\
									\
	asm volatile("// atomic64_" #op "\n"				\
"	prfm	pstl1strm, %2\n"					\
"1:	ldxr	%0, %2\n"						\
"	" #asm_op "	%0, %0, %3\n"					\
"	stxr	%w1, %0, %2\n"						\
@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
	unsigned long tmp;						\
									\
	asm volatile("// atomic64_" #op "_return\n"			\
"	prfm	pstl1strm, %2\n"					\
"1:	ldxr	%0, %2\n"						\
"	" #asm_op "	%0, %0, %3\n"					\
"	stlxr	%w1, %0, %2\n"						\
@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
	unsigned long res;

	asm volatile("// atomic64_cmpxchg\n"
"	prfm	pstl1strm, %2\n"
"1:	ldxr	%1, %2\n"
"	eor	%0, %1, %3\n"
"	cbnz	%w0, 2f\n"
@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
	unsigned long tmp;

	asm volatile("// atomic64_dec_if_positive\n"
"	prfm	pstl1strm, %2\n"
"1:	ldxr	%0, %2\n"
"	subs	%0, %0, #1\n"
"	b.mi	2f\n"
@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
	unsigned long tmp, oldval;					\
									\
	asm volatile(							\
	"	prfm	pstl1strm, %2\n"				\
	"1:	ldxr" #sz "\t%" #w "[oldval], %[v]\n"			\
	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
	"	cbnz	%" #w "[tmp], 2f\n"				\
@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
	unsigned long tmp, ret;						\
									\
	asm volatile("// __cmpxchg_double" #name "\n"			\
	"	prfm	pstl1strm, %2\n"				\
	"1:	ldxp	%0, %1, %2\n"					\
	"	eor	%0, %0, %3\n"					\
	"	eor	%1, %1, %4\n"					\
+8 −0
Original line number Diff line number Diff line
@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
	case 1:
		asm volatile(ARM64_LSE_ATOMIC_INSN(
		/* LL/SC */
		"	prfm	pstl1strm, %2\n"
		"1:	ldxrb	%w0, %2\n"
		"	stlxrb	%w1, %w3, %2\n"
		"	cbnz	%w1, 1b\n"
		"	dmb	ish",
		/* LSE atomics */
		"	nop\n"
		"	nop\n"
		"	swpalb	%w3, %w0, %2\n"
		"	nop\n"
		"	nop")
@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
	case 2:
		asm volatile(ARM64_LSE_ATOMIC_INSN(
		/* LL/SC */
		"	prfm	pstl1strm, %2\n"
		"1:	ldxrh	%w0, %2\n"
		"	stlxrh	%w1, %w3, %2\n"
		"	cbnz	%w1, 1b\n"
		"	dmb	ish",
		/* LSE atomics */
		"	nop\n"
		"	nop\n"
		"	swpalh	%w3, %w0, %2\n"
		"	nop\n"
		"	nop")
@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
	case 4:
		asm volatile(ARM64_LSE_ATOMIC_INSN(
		/* LL/SC */
		"	prfm	pstl1strm, %2\n"
		"1:	ldxr	%w0, %2\n"
		"	stlxr	%w1, %w3, %2\n"
		"	cbnz	%w1, 1b\n"
		"	dmb	ish",
		/* LSE atomics */
		"	nop\n"
		"	nop\n"
		"	swpal	%w3, %w0, %2\n"
		"	nop\n"
		"	nop")
@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
	case 8:
		asm volatile(ARM64_LSE_ATOMIC_INSN(
		/* LL/SC */
		"	prfm	pstl1strm, %2\n"
		"1:	ldxr	%0, %2\n"
		"	stlxr	%w1, %3, %2\n"
		"	cbnz	%w1, 1b\n"
		"	dmb	ish",
		/* LSE atomics */
		"	nop\n"
		"	nop\n"
		"	swpal	%3, %0, %2\n"
		"	nop\n"
		"	nop")
+2 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
	asm volatile(							\
	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,		\
		    CONFIG_ARM64_PAN)					\
"	prfm	pstl1strm, %2\n"					\
"1:	ldxr	%w1, %2\n"						\
	insn "\n"							\
"2:	stlxr	%w3, %w0, %2\n"						\
@@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
		return -EFAULT;

	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
"	prfm	pstl1strm, %2\n"
"1:	ldxr	%w1, %2\n"
"	sub	%w3, %w1, %w4\n"
"	cbnz	%w3, 3f\n"
+2 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@ ENTRY( \name )
	eor	w0, w0, w3		// Clear low bits
	mov	x2, #1
	add	x1, x1, x0, lsr #3	// Get word offset
alt_lse "	prfm	pstl1strm, [x1]",	"nop"
	lsl	x3, x2, x3		// Create mask

alt_lse	"1:	ldxr	x2, [x1]",		"\lse	x3, [x1]"
@@ -48,6 +49,7 @@ ENTRY( \name )
	eor	w0, w0, w3		// Clear low bits
	mov	x2, #1
	add	x1, x1, x0, lsr #3	// Get word offset
alt_lse "	prfm	pstl1strm, [x1]",	"nop"
	lsl	x4, x2, x3		// Create mask

alt_lse	"1:	ldxr	x2, [x1]",		"\lse	x4, x2, [x1]"