locking/barriers: Replace smp_cond_acquire() with smp_cond_load_acquire() (1f03e8d2) · Commits · e / devices / android_kernel_oneplus_sm8150

include/linux/compiler.h

+19 −6

Original line number	Diff line number	Diff line
		@@ -305,21 +305,34 @@ static __always_inline void __write_once_size(volatile void p, void res, int s
		})

		/**
		* smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
		* smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
		* @ptr: pointer to the variable to wait on
		* @cond: boolean expression to wait for
		*
		* Equivalent to using smp_load_acquire() on the condition variable but employs
		* the control dependency of the wait to reduce the barrier on many platforms.
		*
		* Due to C lacking lambda expressions we load the value of *ptr into a
		* pre-named variable @VAL to be used in @cond.
		*
		* The control dependency provides a LOAD->STORE order, the additional RMB
		* provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
		* aka. ACQUIRE.
		*/
		#define smp_cond_acquire(cond) do { \
		while (!(cond)) \
		#ifndef smp_cond_load_acquire
		#define smp_cond_load_acquire(ptr, cond_expr) ({ \
		typeof(ptr) __PTR = (ptr); \
		typeof(*ptr) VAL; \
		for (;;) { \
		VAL = READ_ONCE(*__PTR); \
		if (cond_expr) \
		break; \
		cpu_relax(); \
		} \
		smp_rmb(); /* ctrl + rmb := acquire */ \
		} while (0)
		VAL; \
		})
		#endif

		#endif /* __KERNEL__ */

kernel/locking/qspinlock.c

+6 −6

Original line number	Diff line number	Diff line
		@@ -475,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
		* sequentiality; this is because not all clear_pending_set_locked()
		* implementations imply full barriers.
		*/
		smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
		smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));

		/*
		* take ownership and clear the pending bit.
		@@ -562,7 +562,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
		*
		* The PV pv_wait_head_or_lock function, if active, will acquire
		* the lock and return a non-zero value. So we have to skip the
		* smp_cond_acquire() call. As the next PV queue head hasn't been
		* smp_cond_load_acquire() call. As the next PV queue head hasn't been
		* designated yet, there is no way for the locked value to become
		* _Q_SLOW_VAL. So both the set_locked() and the
		* atomic_cmpxchg_relaxed() calls will be safe.
		@@ -573,7 +573,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
		if ((val = pv_wait_head_or_lock(lock, node)))
		goto locked;

		smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
		val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));

		locked:
		/*
		@@ -593,9 +593,9 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
		break;
		}
		/*
		* The smp_cond_acquire() call above has provided the necessary
		* acquire semantics required for locking. At most two
		* iterations of this loop may be ran.
		* The smp_cond_load_acquire() call above has provided the
		* necessary acquire semantics required for locking. At most
		* two iterations of this loop may be ran.
		*/
		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
		if (old == val)

kernel/sched/core.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -1935,7 +1935,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
		* chain to provide order. Instead we do:
		*
		* 1) smp_store_release(X->on_cpu, 0)
		* 2) smp_cond_acquire(!X->on_cpu)
		* 2) smp_cond_load_acquire(!X->on_cpu)
		*
		* Example:
		*
		@@ -1946,7 +1946,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
		* sched-out X
		* smp_store_release(X->on_cpu, 0);
		*
		* smp_cond_acquire(!X->on_cpu);
		* smp_cond_load_acquire(&X->on_cpu, !VAL);
		* X->state = WAKING
		* set_task_cpu(X,2)
		*
		@@ -1972,7 +1972,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
		* This means that any means of doing remote wakeups must order the CPU doing
		* the wakeup against the CPU the task is going to end up running on. This,
		* however, is already required for the regular Program-Order guarantee above,
		* since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
		* since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
		*
		*/

		@@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		* This ensures that tasks getting woken will be fully ordered against
		* their previous state and preserve Program Order.
		*/
		smp_cond_acquire(!p->on_cpu);
		smp_cond_load_acquire(&p->on_cpu, !VAL);

		p->sched_contributes_to_load = !!task_contributes_to_load(p);
		p->state = TASK_WAKING;

kernel/sched/sched.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq rq, struct task_struct prev)
		* In particular, the load of prev->state in finish_task_switch() must
		* happen before this.
		*
		* Pairs with the smp_cond_acquire() in try_to_wake_up().
		* Pairs with the smp_cond_load_acquire() in try_to_wake_up().
		*/
		smp_store_release(&prev->on_cpu, 0);
		#endif

kernel/smp.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -107,7 +107,7 @@ void __init call_function_init(void)
		*/
		static __always_inline void csd_lock_wait(struct call_single_data *csd)
		{
		smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
		smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
		}

		static __always_inline void csd_lock(struct call_single_data *csd)