Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e274795e authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

locking/mutex: Fix mutex handoff



While reviewing the ww_mutex patches, I noticed that it was still
possible to (incorrectly) succeed for (incorrect) code like:

	mutex_lock(&a);
	mutex_lock(&a);

This was possible if the second mutex_lock() would block (as expected)
but then receive a spurious wakeup. At that point it would find itself
at the front of the queue, request a handoff and instantly claim
ownership and continue, since owner would point to itself.

Avoid this scenario and simplify the code by introducing a third low
bit to signal handoff pickup. So once we request handoff, unlock
clears the handoff bit and sets the pickup bit along with the new
owner.

This also removes the need for the .handoff argument to
__mutex_trylock(), since that becomes superfluous with PICKUP.

In order to guarantee enough low bits, ensure task_struct alignment is
at least L1_CACHE_BYTES (which seems a good ideal regardless).

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9d659ae1 ("locking/mutex: Add lock handoff to avoid starvation")
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 52b94129
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ struct mutex {

static inline struct task_struct *__mutex_owner(struct mutex *lock)
{
	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
}

/*
+4 −2
Original line number Diff line number Diff line
@@ -432,11 +432,13 @@ void __init fork_init(void)
	int i;
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
#define ARCH_MIN_TASKALIGN	0
#endif
	int align = min_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);

	/* create a slab on which task_structs can be allocated */
	task_struct_cachep = kmem_cache_create("task_struct",
			arch_task_struct_size, ARCH_MIN_TASKALIGN,
			arch_task_struct_size, align,
			SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
#endif

+52 −56
Original line number Diff line number Diff line
@@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init);
/*
 * @owner: contains: 'struct task_struct *' to the current lock owner,
 * NULL means not owned. Since task_struct pointers are aligned at
 * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low
 * bits to store extra state.
 * at least L1_CACHE_BYTES, we have low bits to store extra state.
 *
 * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
 * Bit1 indicates unlock needs to hand the lock to the top-waiter
 * Bit2 indicates handoff has been done and we're waiting for pickup.
 */
#define MUTEX_FLAG_WAITERS	0x01
#define MUTEX_FLAG_HANDOFF	0x02
#define MUTEX_FLAG_PICKUP	0x04

#define MUTEX_FLAGS		0x03
#define MUTEX_FLAGS		0x07

static inline struct task_struct *__owner_task(unsigned long owner)
{
@@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
}

/*
 * Actual trylock that will work on any unlocked state.
 *
 * When setting the owner field, we must preserve the low flag bits.
 *
 * Be careful with @handoff, only set that in a wait-loop (where you set
 * HANDOFF) to avoid recursive lock attempts.
 * Trylock variant that retuns the owning task on failure.
 */
static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
{
	unsigned long owner, curr = (unsigned long)current;

	owner = atomic_long_read(&lock->owner);
	for (;;) { /* must loop, can race against a flag */
		unsigned long old, flags = __owner_flags(owner);
		unsigned long task = owner & ~MUTEX_FLAGS;

		if (__owner_task(owner)) {
			if (handoff && unlikely(__owner_task(owner) == current)) {
				/*
				 * Provide ACQUIRE semantics for the lock-handoff.
				 *
				 * We cannot easily use load-acquire here, since
				 * the actual load is a failed cmpxchg, which
				 * doesn't imply any barriers.
				 *
				 * Also, this is a fairly unlikely scenario, and
				 * this contains the cost.
				 */
				smp_mb(); /* ACQUIRE */
				return true;
			}
		if (task) {
			if (likely(task != curr))
				break;

			return false;
			if (likely(!(flags & MUTEX_FLAG_PICKUP)))
				break;

			flags &= ~MUTEX_FLAG_PICKUP;
		} else {
#ifdef CONFIG_DEBUG_MUTEXES
			DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
#endif
		}

		/*
@@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
		 * past the point where we acquire it. This would be possible
		 * if we (accidentally) set the bit on an unlocked mutex.
		 */
		if (handoff)
		flags &= ~MUTEX_FLAG_HANDOFF;

		old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
		if (old == owner)
			return true;
			return NULL;

		owner = old;
	}

	return __owner_task(owner);
}

/*
 * Actual trylock that will work on any unlocked state.
 */
static inline bool __mutex_trylock(struct mutex *lock)
{
	return !__mutex_trylock_or_owner(lock);
}

#ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait

/*
 * Give up ownership to a specific task, when @task = NULL, this is equivalent
 * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE
 * semantics like a regular unlock, the __mutex_trylock() provides matching
 * ACQUIRE semantics for the handoff.
 * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
 * WAITERS. Provides RELEASE semantics like a regular unlock, the
 * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
 */
static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
{
@@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)

#ifdef CONFIG_DEBUG_MUTEXES
		DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
		DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif

		new = (owner & MUTEX_FLAG_WAITERS);
		new |= (unsigned long)task;
		if (task)
			new |= MUTEX_FLAG_PICKUP;

		old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
		if (old == owner)
@@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock,
				  struct ww_acquire_ctx *ww_ctx,
				  const bool use_ww_ctx, const bool waiter)
{
	struct task_struct *task = current;

	if (!waiter) {
		/*
		 * The purpose of the mutex_can_spin_on_owner() function is
@@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
				goto fail_unlock;
		}

		/* Try to acquire the mutex... */
		owner = __mutex_trylock_or_owner(lock);
		if (!owner)
			break;

		/*
		 * If there's an owner, wait for it to either
		 * There's an owner, wait for it to either
		 * release the lock or go to sleep.
		 */
		owner = __mutex_owner(lock);
		if (owner) {
			if (waiter && owner == task) {
				smp_mb(); /* ACQUIRE */
				break;
			}

		if (!mutex_spin_on_owner(lock, owner))
			goto fail_unlock;
		}

		/* Try to acquire the mutex if it is unlocked. */
		if (__mutex_trylock(lock, waiter))
			break;

		/*
		 * The cpu_relax() call is a compiler barrier which forces
@@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
	preempt_disable();
	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);

	if (__mutex_trylock(lock, false) ||
	if (__mutex_trylock(lock) ||
	    mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
		/* got the lock, yay! */
		lock_acquired(&lock->dep_map, ip);
@@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
	/*
	 * After waiting to acquire the wait_lock, try again.
	 */
	if (__mutex_trylock(lock, false))
	if (__mutex_trylock(lock))
		goto skip_wait;

	debug_mutex_lock_common(lock, &waiter);
@@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
		 * before testing the error conditions to make sure we pick up
		 * the handoff.
		 */
		if (__mutex_trylock(lock, first))
		if (__mutex_trylock(lock))
			goto acquired;

		/*
@@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
		 * state back to RUNNING and fall through the next schedule(),
		 * or we must see its unlock and acquire.
		 */
		if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
		     __mutex_trylock(lock, first))
		if (__mutex_trylock(lock) ||
		    (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)))
			break;

		spin_lock_mutex(&lock->wait_lock, flags);
@@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne

#ifdef CONFIG_DEBUG_MUTEXES
		DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
		DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif

		if (owner & MUTEX_FLAG_HANDOFF)
@@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
 */
int __sched mutex_trylock(struct mutex *lock)
{
	bool locked = __mutex_trylock(lock, false);
	bool locked = __mutex_trylock(lock);

	if (locked)
		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);