Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a8ddac7e authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds
Browse files

mutex: speed up generic mutex implementations



- atomic operations which both modify the variable and return something imply
  full smp memory barriers before and after the memory operations involved
  (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because
  they don't modify the target). See Documentation/atomic_ops.txt.
  So remove extra barriers and branches.

- All architectures support atomic_cmpxchg. This has no relation to
  __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally

This reduces a simple single threaded fastpath lock+unlock test from 590 cycles
to 203 cycles on a ppc970 system.

Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5a439c56
Loading
Loading
Loading
Loading
+2 −24
Original line number Diff line number Diff line
@@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
	if (unlikely(atomic_dec_return(count) < 0))
		fail_fn(count);
	else
		smp_mb();
}

/**
@@ -41,11 +39,8 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
{
	if (unlikely(atomic_dec_return(count) < 0))
		return fail_fn(count);
	else {
		smp_mb();
	return 0;
}
}

/**
 *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
@@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
	smp_mb();
	if (unlikely(atomic_inc_return(count) <= 0))
		fail_fn(count);
}
@@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
	/*
	 * We have two variants here. The cmpxchg based one is the best one
	 * because it never induce a false contention state.  It is included
	 * here because architectures using the inc/dec algorithms over the
	 * xchg ones are much more likely to support cmpxchg natively.
	 *
	 * If not we fall back to the spinlock based variant - that is
	 * just as efficient (and simpler) as a 'destructive' probing of
	 * the mutex state would be.
	 */
#ifdef __HAVE_ARCH_CMPXCHG
	if (likely(atomic_cmpxchg(count, 1, 0) == 1)) {
		smp_mb();
	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
		return 1;
	}
	return 0;
#else
	return fail_fn(count);
#endif
}

#endif
+1 −8
Original line number Diff line number Diff line
@@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
	if (unlikely(atomic_xchg(count, 0) != 1))
		fail_fn(count);
	else
		smp_mb();
}

/**
@@ -46,11 +44,8 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
{
	if (unlikely(atomic_xchg(count, 0) != 1))
		return fail_fn(count);
	else {
		smp_mb();
	return 0;
}
}

/**
 *  __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
@@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
	smp_mb();
	if (unlikely(atomic_xchg(count, 1) != 0))
		fail_fn(count);
}
@@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
		if (prev < 0)
			prev = 0;
	}
	smp_mb();

	return prev;
}