Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 77e430e3 authored by Will Deacon's avatar Will Deacon Committed by Ingo Molnar
Browse files

locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics



The qrwlock implementation is slightly heavy in its use of memory
barriers, mainly through the use of _cmpxchg() and _return() atomics, which
imply full barrier semantics.

This patch modifies the qrwlock code to use the more relaxed atomic
routines so that we can reduce the unnecessary barrier overhead on
weakly-ordered architectures.

Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-7-git-send-email-will.deacon@arm.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 2b2a85a4
Loading
Loading
Loading
Loading
+6 −7
Original line number Original line Diff line number Diff line
@@ -68,7 +68,7 @@ static inline int queued_read_trylock(struct qrwlock *lock)


	cnts = atomic_read(&lock->cnts);
	cnts = atomic_read(&lock->cnts);
	if (likely(!(cnts & _QW_WMASK))) {
	if (likely(!(cnts & _QW_WMASK))) {
		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
		cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
		if (likely(!(cnts & _QW_WMASK)))
		if (likely(!(cnts & _QW_WMASK)))
			return 1;
			return 1;
		atomic_sub(_QR_BIAS, &lock->cnts);
		atomic_sub(_QR_BIAS, &lock->cnts);
@@ -89,7 +89,7 @@ static inline int queued_write_trylock(struct qrwlock *lock)
	if (unlikely(cnts))
	if (unlikely(cnts))
		return 0;
		return 0;


	return likely(atomic_cmpxchg(&lock->cnts,
	return likely(atomic_cmpxchg_acquire(&lock->cnts,
					     cnts, cnts | _QW_LOCKED) == cnts);
					     cnts, cnts | _QW_LOCKED) == cnts);
}
}
/**
/**
@@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
{
{
	u32 cnts;
	u32 cnts;


	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
	if (likely(!(cnts & _QW_WMASK)))
	if (likely(!(cnts & _QW_WMASK)))
		return;
		return;


@@ -115,7 +115,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
static inline void queued_write_lock(struct qrwlock *lock)
static inline void queued_write_lock(struct qrwlock *lock)
{
{
	/* Optimize for the unfair lock case where the fair flag is 0. */
	/* Optimize for the unfair lock case where the fair flag is 0. */
	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
		return;
		return;


	queued_write_lock_slowpath(lock);
	queued_write_lock_slowpath(lock);
@@ -130,8 +130,7 @@ static inline void queued_read_unlock(struct qrwlock *lock)
	/*
	/*
	 * Atomically decrement the reader count
	 * Atomically decrement the reader count
	 */
	 */
	smp_mb__before_atomic();
	(void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
	atomic_sub(_QR_BIAS, &lock->cnts);
}
}


/**
/**
+12 −12
Original line number Original line Diff line number Diff line
@@ -55,7 +55,7 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
{
{
	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
		cpu_relax_lowlatency();
		cpu_relax_lowlatency();
		cnts = smp_load_acquire((u32 *)&lock->cnts);
		cnts = atomic_read_acquire(&lock->cnts);
	}
	}
}
}


@@ -74,8 +74,9 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
		 * Readers in interrupt context will get the lock immediately
		 * Readers in interrupt context will get the lock immediately
		 * if the writer is just waiting (not holding the lock yet).
		 * if the writer is just waiting (not holding the lock yet).
		 * The rspin_until_writer_unlock() function returns immediately
		 * The rspin_until_writer_unlock() function returns immediately
		 * in this case. Otherwise, they will spin until the lock
		 * in this case. Otherwise, they will spin (with ACQUIRE
		 * is available without waiting in the queue.
		 * semantics) until the lock is available without waiting in
		 * the queue.
		 */
		 */
		rspin_until_writer_unlock(lock, cnts);
		rspin_until_writer_unlock(lock, cnts);
		return;
		return;
@@ -88,12 +89,11 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
	arch_spin_lock(&lock->lock);
	arch_spin_lock(&lock->lock);


	/*
	/*
	 * At the head of the wait queue now, increment the reader count
	 * The ACQUIRE semantics of the following spinning code ensure
	 * and wait until the writer, if it has the lock, has gone away.
	 * that accesses can't leak upwards out of our subsequent critical
	 * At ths stage, it is not possible for a writer to remain in the
	 * section in the case that the lock is currently held for write.
	 * waiting state (_QW_WAITING). So there won't be any deadlock.
	 */
	 */
	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
	rspin_until_writer_unlock(lock, cnts);
	rspin_until_writer_unlock(lock, cnts);


	/*
	/*
@@ -116,7 +116,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)


	/* Try to acquire the lock directly if no reader is present */
	/* Try to acquire the lock directly if no reader is present */
	if (!atomic_read(&lock->cnts) &&
	if (!atomic_read(&lock->cnts) &&
	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
	    (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
		goto unlock;
		goto unlock;


	/*
	/*
@@ -127,7 +127,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
		struct __qrwlock *l = (struct __qrwlock *)lock;
		struct __qrwlock *l = (struct __qrwlock *)lock;


		if (!READ_ONCE(l->wmode) &&
		if (!READ_ONCE(l->wmode) &&
		   (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
			break;
			break;


		cpu_relax_lowlatency();
		cpu_relax_lowlatency();
@@ -137,7 +137,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
	for (;;) {
	for (;;) {
		cnts = atomic_read(&lock->cnts);
		cnts = atomic_read(&lock->cnts);
		if ((cnts == _QW_WAITING) &&
		if ((cnts == _QW_WAITING) &&
		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
		    (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING,
					    _QW_LOCKED) == _QW_WAITING))
					    _QW_LOCKED) == _QW_WAITING))
			break;
			break;