Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c09d6a04 authored by Will Deacon's avatar Will Deacon
Browse files

arm64: atomics: patch in lse instructions when supported by the CPU



On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of atomic_t and atomic64_t
routines so that the call-site for the out-of-line ll/sc sequences is
patched with an LSE atomic instruction when we detect that
the CPU supports it.

If binutils is not recent enough to assemble the LSE instructions, then
the ll/sc sequences are inlined as though CONFIG_ARM64_LSE_ATOMICS=n.

Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent c0385b24
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -17,7 +17,18 @@ GZFLAGS :=-9

KBUILD_DEFCONFIG := defconfig

KBUILD_CFLAGS	+= -mgeneral-regs-only
# Check for binutils support for specific extensions
lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1)

ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y)
  ifeq ($(lseinstr),)
$(warning LSE atomics not supported by binutils)
  endif
endif

KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
KBUILD_AFLAGS	+= $(lseinstr)

ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS	+= -mbig-endian
AS		+= -EB
+2 −2
Original line number Diff line number Diff line
@@ -21,11 +21,11 @@
#define __ASM_ATOMIC_H

#include <linux/compiler.h>
#include <linux/stringify.h>
#include <linux/types.h>

#include <asm/barrier.h>
#include <asm/cmpxchg.h>
#include <asm/lse.h>

#define ATOMIC_INIT(i)	{ (i) }

@@ -33,7 +33,7 @@

#define __ARM64_IN_ATOMIC_IMPL

#ifdef CONFIG_ARM64_LSE_ATOMICS
#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
#include <asm/atomic_lse.h>
#else
#include <asm/atomic_ll_sc.h>
+0 −12
Original line number Diff line number Diff line
@@ -37,18 +37,6 @@
 * (the optimize attribute silently ignores these options).
 */

#ifndef __LL_SC_INLINE
#define __LL_SC_INLINE		static inline
#endif

#ifndef __LL_SC_PREFIX
#define __LL_SC_PREFIX(x)	x
#endif

#ifndef __LL_SC_EXPORT
#define __LL_SC_EXPORT(x)
#endif

#define ATOMIC_OP(op, asm_op)						\
__LL_SC_INLINE void							\
__LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))				\
+291 −109
Original line number Diff line number Diff line
@@ -25,60 +25,129 @@
#error "please don't include this file directly"
#endif

/* Move the ll/sc atomics out-of-line */
#define __LL_SC_INLINE
#define __LL_SC_PREFIX(x)	__ll_sc_##x
#define __LL_SC_EXPORT(x)	EXPORT_SYMBOL(__LL_SC_PREFIX(x))

/* Macros for constructing calls to out-of-line ll/sc atomics */
#define __LL_SC_CALL(op)						\
	"bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n"
#define __LL_SC_CALL64(op)						\
	"bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n"

#define ATOMIC_OP(op, asm_op)						\
static inline void atomic_##op(int i, atomic_t *v)			\
{									\
	register int w0 asm ("w0") = i;					\
	register atomic_t *x1 asm ("x1") = v;				\
									\
	asm volatile(							\
	__LL_SC_CALL(op)						\
	: "+r" (w0), "+Q" (v->counter)					\
	: "r" (x1)							\
	: "x30");							\
}									\

#define ATOMIC_OP_RETURN(op, asm_op)					\
static inline int atomic_##op##_return(int i, atomic_t *v)		\
{									\
	register int w0 asm ("w0") = i;					\
	register atomic_t *x1 asm ("x1") = v;				\
									\
	asm volatile(							\
	__LL_SC_CALL(op##_return)					\
	: "+r" (w0)							\
	: "r" (x1)							\
	: "x30", "memory");						\
									\
	return w0;							\
}

#define ATOMIC_OPS(op, asm_op)						\
	ATOMIC_OP(op, asm_op)						\
	ATOMIC_OP_RETURN(op, asm_op)

ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, sub)

ATOMIC_OP(and, and)
ATOMIC_OP(andnot, bic)
ATOMIC_OP(or, orr)
ATOMIC_OP(xor, eor)

#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#define __LL_SC_ATOMIC(op)	__LL_SC_CALL(atomic_##op)

static inline void atomic_andnot(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
	"	stclr	%w[i], %[v]\n")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic_or(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
	"	stset	%w[i], %[v]\n")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic_xor(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
	"	steor	%w[i], %[v]\n")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic_add(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
	"	stadd	%w[i], %[v]\n")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline int atomic_add_return(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC(add_return),
	/* LSE atomics */
	"	ldaddal	%w[i], w30, %[v]\n"
	"	add	%w[i], %w[i], w30")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30", "memory");

	return w0;
}

static inline void atomic_and(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC(and),
	/* LSE atomics */
	"	mvn	%w[i], %w[i]\n"
	"	stclr	%w[i], %[v]")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic_sub(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC(sub),
	/* LSE atomics */
	"	neg	%w[i], %w[i]\n"
	"	stadd	%w[i], %[v]")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline int atomic_sub_return(int i, atomic_t *v)
{
	register int w0 asm ("w0") = i;
	register atomic_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC(sub_return)
	"	nop",
	/* LSE atomics */
	"	neg	%w[i], %w[i]\n"
	"	ldaddal	%w[i], w30, %[v]\n"
	"	add	%w[i], %w[i], w30")
	: [i] "+r" (w0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30", "memory");

	return w0;
}

static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
{
@@ -86,69 +155,164 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
	register int w1 asm ("w1") = old;
	register int w2 asm ("w2") = new;

	asm volatile(
	__LL_SC_CALL(cmpxchg)
	: "+r" (x0)
	: "r" (w1), "r" (w2)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC(cmpxchg)
	"	nop",
	/* LSE atomics */
	"	mov	w30, %w[old]\n"
	"	casal	w30, %w[new], %[v]\n"
	"	mov	%w[ret], w30")
	: [ret] "+r" (x0), [v] "+Q" (ptr->counter)
	: [old] "r" (w1), [new] "r" (w2)
	: "x30", "cc", "memory");

	return x0;
}

#define ATOMIC64_OP(op, asm_op)						\
static inline void atomic64_##op(long i, atomic64_t *v)			\
{									\
	register long x0 asm ("x0") = i;				\
	register atomic64_t *x1 asm ("x1") = v;				\
									\
	asm volatile(							\
	__LL_SC_CALL64(op)						\
	: "+r" (x0), "+Q" (v->counter)					\
	: "r" (x1)							\
	: "x30");							\
}									\

#define ATOMIC64_OP_RETURN(op, asm_op)					\
static inline long atomic64_##op##_return(long i, atomic64_t *v)	\
{									\
	register long x0 asm ("x0") = i;				\
	register atomic64_t *x1 asm ("x1") = v;				\
									\
	asm volatile(							\
	__LL_SC_CALL64(op##_return)					\
	: "+r" (x0)							\
	: "r" (x1)							\
	: "x30", "memory");						\
									\
	return x0;							\
}

#define ATOMIC64_OPS(op, asm_op)					\
	ATOMIC64_OP(op, asm_op)						\
	ATOMIC64_OP_RETURN(op, asm_op)

ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, sub)

ATOMIC64_OP(and, and)
ATOMIC64_OP(andnot, bic)
ATOMIC64_OP(or, orr)
ATOMIC64_OP(xor, eor)

#undef ATOMIC64_OPS
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
#undef __LL_SC_ATOMIC

#define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)

static inline void atomic64_andnot(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
	"	stclr	%[i], %[v]\n")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic64_or(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
	"	stset	%[i], %[v]\n")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic64_xor(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
	"	steor	%[i], %[v]\n")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic64_add(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
	"	stadd	%[i], %[v]\n")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline long atomic64_add_return(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC64(add_return),
	/* LSE atomics */
	"	ldaddal	%[i], x30, %[v]\n"
	"	add	%[i], %[i], x30")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30", "memory");

	return x0;
}

static inline void atomic64_and(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC64(and),
	/* LSE atomics */
	"	mvn	%[i], %[i]\n"
	"	stclr	%[i], %[v]")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline void atomic64_sub(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC64(sub),
	/* LSE atomics */
	"	neg	%[i], %[i]\n"
	"	stadd	%[i], %[v]")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30");
}

static inline long atomic64_sub_return(long i, atomic64_t *v)
{
	register long x0 asm ("x0") = i;
	register atomic64_t *x1 asm ("x1") = v;

	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC64(sub_return)
	"	nop",
	/* LSE atomics */
	"	neg	%[i], %[i]\n"
	"	ldaddal	%[i], x30, %[v]\n"
	"	add	%[i], %[i], x30")
	: [i] "+r" (x0), [v] "+Q" (v->counter)
	: "r" (x1)
	: "x30", "memory");

	return x0;
}
static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
{
	register unsigned long x0 asm ("x0") = (unsigned long)ptr;
	register long x1 asm ("x1") = old;
	register long x2 asm ("x2") = new;

	asm volatile(
	__LL_SC_CALL64(cmpxchg)
	: "+r" (x0)
	: "r" (x1), "r" (x2)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC64(cmpxchg)
	"	nop",
	/* LSE atomics */
	"	mov	x30, %[old]\n"
	"	casal	x30, %[new], %[v]\n"
	"	mov	%[ret], x30")
	: [ret] "+r" (x0), [v] "+Q" (ptr->counter)
	: [old] "r" (x1), [new] "r" (x2)
	: "x30", "cc", "memory");

	return x0;
@@ -156,15 +320,33 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)

static inline long atomic64_dec_if_positive(atomic64_t *v)
{
	register unsigned long x0 asm ("x0") = (unsigned long)v;
	register long x0 asm ("x0") = (long)v;

	asm volatile(
	__LL_SC_CALL64(dec_if_positive)
	: "+r" (x0)
	asm volatile(ARM64_LSE_ATOMIC_INSN(
	/* LL/SC */
	"	nop\n"
	__LL_SC_ATOMIC64(dec_if_positive)
	"	nop\n"
	"	nop\n"
	"	nop\n"
	"	nop\n"
	"	nop",
	/* LSE atomics */
	"1:	ldr	x30, %[v]\n"
	"	subs	%[ret], x30, #1\n"
	"	b.mi	2f\n"
	"	casal	x30, %[ret], %[v]\n"
	"	sub	x30, x30, #1\n"
	"	sub	x30, x30, %[ret]\n"
	"	cbnz	x30, 1b\n"
	"2:")
	: [ret] "+&r" (x0), [v] "+Q" (v->counter)
	:
	: "x30", "cc", "memory");

	return x0;
}

#undef __LL_SC_ATOMIC64

#endif	/* __ASM_ATOMIC_LSE_H */
+34 −0
Original line number Diff line number Diff line
#ifndef __ASM_LSE_H
#define __ASM_LSE_H

#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)

#include <linux/stringify.h>

#include <asm/alternative.h>
#include <asm/cpufeature.h>

__asm__(".arch_extension	lse");

/* Move the ll/sc atomics out-of-line */
#define __LL_SC_INLINE
#define __LL_SC_PREFIX(x)	__ll_sc_##x
#define __LL_SC_EXPORT(x)	EXPORT_SYMBOL(__LL_SC_PREFIX(x))

/* Macro for constructing calls to out-of-line ll/sc atomics */
#define __LL_SC_CALL(op)	"bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"

/* In-line patching at runtime */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
	ALTERNATIVE(llsc, lse, ARM64_CPU_FEAT_LSE_ATOMICS)

#else

#define __LL_SC_INLINE		static inline
#define __LL_SC_PREFIX(x)	x
#define __LL_SC_EXPORT(x)

#define ARM64_LSE_ATOMIC_INSN(llsc, lse)	llsc

#endif	/* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
#endif	/* __ASM_LSE_H */
Loading