Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 1c6c1ca3 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Paul Burton
Browse files

mips/atomic: Fix loongson_llsc_mb() wreckage



The comment describing the loongson_llsc_mb() reorder case doesn't
make any sense what so ever. Instruction re-ordering is not an SMP
artifact, but rather a CPU local phenomenon. Clarify the comment by
explaining that these issue cause a coherence fail.

For the branch speculation case; if futex_atomic_cmpxchg_inatomic()
needs one at the bne branch target, then surely the normal
__cmpxch_asm() implementation does too. We cannot rely on the
barriers from cmpxchg() because cmpxchg_local() is implemented with
the same macro, and branch prediction and speculation are, too, CPU
local.

Fixes: e02e07e3 ("MIPS: Loongson: Introduce and use loongson_llsc_mb()")
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Huang Pei <huangpei@loongson.cn>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarPaul Burton <paul.burton@mips.com>
parent dfc8d8de
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -193,6 +193,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
	if (kernel_uses_llsc) {
		int temp;

		loongson_llsc_mb();
		__asm__ __volatile__(
		"	.set	push					\n"
		"	.set	"MIPS_ISA_LEVEL"			\n"
@@ -200,12 +201,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
		"	.set	pop					\n"
		"	subu	%0, %1, %3				\n"
		"	move	%1, %0					\n"
		"	bltz	%0, 1f					\n"
		"	bltz	%0, 2f					\n"
		"	.set	push					\n"
		"	.set	"MIPS_ISA_LEVEL"			\n"
		"	sc	%1, %2					\n"
		"\t" __scbeqz "	%1, 1b					\n"
		"1:							\n"
		"2:							\n"
		"	.set	pop					\n"
		: "=&r" (result), "=&r" (temp),
		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+18 −14
Original line number Diff line number Diff line
@@ -238,36 +238,40 @@

/*
 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
 * store or pref) in between an ll & sc can cause the sc instruction to
 * store or prefetch) in between an LL & SC can cause the SC instruction to
 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
 * containing such sequences, this bug bites harder than we might otherwise
 * expect due to reordering & speculation:
 *
 * 1) A memory access appearing prior to the ll in program order may actually
 *    be executed after the ll - this is the reordering case.
 * 1) A memory access appearing prior to the LL in program order may actually
 *    be executed after the LL - this is the reordering case.
 *
 *    In order to avoid this we need to place a memory barrier (ie. a sync
 *    instruction) prior to every ll instruction, in between it & any earlier
 *    memory access instructions. Many of these cases are already covered by
 *    smp_mb__before_llsc() but for the remaining cases, typically ones in
 *    which multiple CPUs may operate on a memory location but ordering is not
 *    usually guaranteed, we use loongson_llsc_mb() below.
 *    In order to avoid this we need to place a memory barrier (ie. a SYNC
 *    instruction) prior to every LL instruction, in between it and any earlier
 *    memory access instructions.
 *
 *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
 *
 * 2) If a conditional branch exists between an ll & sc with a target outside
 *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
 * 2) If a conditional branch exists between an LL & SC with a target outside
 *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
 *    or similar, then misprediction of the branch may allow speculative
 *    execution of memory accesses from outside of the ll-sc loop.
 *    execution of memory accesses from outside of the LL-SC loop.
 *
 *    In order to avoid this we need a memory barrier (ie. a sync instruction)
 *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
 *    at each affected branch target, for which we also use loongson_llsc_mb()
 *    defined below.
 *
 *    This case affects all current Loongson 3 CPUs.
 *
 * The above described cases cause an error in the cache coherence protocol;
 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
 * erroneously observes its core still has Exclusive state and lets the SC
 * proceed.
 *
 * Therefore the error only occurs on SMP systems.
 */
#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
#define loongson_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
#define loongson_llsc_mb()	__asm__ __volatile__("sync" : : :"memory")
#else
#define loongson_llsc_mb()	do { } while (0)
#endif
+5 −0
Original line number Diff line number Diff line
@@ -249,6 +249,7 @@ static inline int test_and_set_bit(unsigned long nr,
		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
		unsigned long temp;

		loongson_llsc_mb();
		do {
			__asm__ __volatile__(
			"	.set	push				\n"
@@ -305,6 +306,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
		unsigned long temp;

		loongson_llsc_mb();
		do {
			__asm__ __volatile__(
			"	.set	push				\n"
@@ -364,6 +366,7 @@ static inline int test_and_clear_bit(unsigned long nr,
		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
		unsigned long temp;

		loongson_llsc_mb();
		do {
			__asm__ __volatile__(
			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
@@ -379,6 +382,7 @@ static inline int test_and_clear_bit(unsigned long nr,
		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
		unsigned long temp;

		loongson_llsc_mb();
		do {
			__asm__ __volatile__(
			"	.set	push				\n"
@@ -438,6 +442,7 @@ static inline int test_and_change_bit(unsigned long nr,
		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
		unsigned long temp;

		loongson_llsc_mb();
		do {
			__asm__ __volatile__(
			"	.set	push				\n"
+5 −0
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
	__typeof(*(m)) __ret;						\
									\
	if (kernel_uses_llsc) {						\
		loongson_llsc_mb();					\
		__asm__ __volatile__(					\
		"	.set	push				\n"	\
		"	.set	noat				\n"	\
@@ -117,6 +118,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
	__typeof(*(m)) __ret;						\
									\
	if (kernel_uses_llsc) {						\
		loongson_llsc_mb();					\
		__asm__ __volatile__(					\
		"	.set	push				\n"	\
		"	.set	noat				\n"	\
@@ -134,6 +136,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)		\
		: "memory");						\
		loongson_llsc_mb();					\
	} else {							\
		unsigned long __flags;					\
									\
@@ -229,6 +232,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
	 */
	local_irq_save(flags);

	loongson_llsc_mb();
	asm volatile(
	"	.set	push				\n"
	"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"
@@ -274,6 +278,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
	  "r" (old),
	  "r" (new)
	: "memory");
	loongson_llsc_mb();

	local_irq_restore(flags);
	return ret;
+1 −0
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
		  [efault] "i" (-EFAULT)
		: "memory");
	} else if (cpu_has_llsc) {
		loongson_llsc_mb();
		__asm__ __volatile__ (
		"	.set	push					\n"
		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"