Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 500c2e1f authored by David Daney's avatar David Daney Committed by Ralf Baechle
Browse files

MIPS: Optimize spinlocks.



The current locking mechanism uses a ll/sc sequence to release a
spinlock.  This is slower than a wmb() followed by a store to unlock.

The branching forward to .subsection 2 on sc failure slows down the
contended case.  So we get rid of that part too.

Since we are now working on naturally aligned u16 values, we can get
rid of a masking operation as the LHU already does the right thing.
The ANDI are reversed for better scheduling on multi-issue CPUs

On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet
forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%.

Signed-off-by: default avatarDavid Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/937/


Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent e275ed5e
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -168,8 +168,14 @@

#ifdef CONFIG_CPU_CAVIUM_OCTEON
#define smp_mb__before_llsc() smp_wmb()
/* Cause previous writes to become visible on all CPUs as soon as possible */
#define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
					    ".set arch=octeon\n\t"	\
					    "syncw\n\t"			\
					    ".set pop" : : : "memory")
#else
#define smp_mb__before_llsc() smp_llsc_mb()
#define nudge_writes() mb()
#endif

#endif /* __ASM_BARRIER_H */
+43 −75
Original line number Diff line number Diff line
@@ -36,9 +36,9 @@

static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
	unsigned int counters = ACCESS_ONCE(lock->lock);
	u32 counters = ACCESS_ONCE(lock->lock);

	return ((counters >> 14) ^ counters) & 0x1fff;
	return ((counters >> 16) ^ counters) & 0xffff;
}

#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
@@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)

static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
	unsigned int counters = ACCESS_ONCE(lock->lock);
	u32 counters = ACCESS_ONCE(lock->lock);

	return (((counters >> 14) - counters) & 0x1fff) > 1;
	return (((counters >> 16) - counters) & 0xffff) > 1;
}
#define arch_spin_is_contended	arch_spin_is_contended

@@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
{
	int my_ticket;
	int tmp;
	int inc = 0x10000;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__ (
@@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
		"	.set noreorder					\n"
		"							\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	addiu	%[my_ticket], %[ticket], 0x4000		\n"
		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
		"	sc	%[my_ticket], %[ticket_ptr]		\n"
		"	beqzl	%[my_ticket], 1b			\n"
		"	 nop						\n"
		"	srl	%[my_ticket], %[ticket], 14		\n"
		"	andi	%[my_ticket], %[my_ticket], 0x1fff	\n"
		"	andi	%[ticket], %[ticket], 0x1fff		\n"
		"	srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[ticket], %[ticket], 0xffff		\n"
		"	andi	%[my_ticket], %[my_ticket], 0xffff	\n"
		"	bne	%[ticket], %[my_ticket], 4f		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"2:							\n"
		"	.subsection 2					\n"
		"4:	andi	%[ticket], %[ticket], 0x1fff		\n"
		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
		"	sll	%[ticket], 5				\n"
		"							\n"
		"6:	bnez	%[ticket], 6b				\n"
		"	 subu	%[ticket], 1				\n"
		"							\n"
		"	lw	%[ticket], %[ticket_ptr]		\n"
		"	andi	%[ticket], %[ticket], 0x1fff		\n"
		"	lhu	%[ticket], %[serving_now_ptr]		\n"
		"	beq	%[ticket], %[my_ticket], 2b		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"	b	4b					\n"
@@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+m" (lock->lock),
		  [serving_now_ptr] "+m" (lock->h.serving_now),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (my_ticket));
		  [my_ticket] "=&r" (my_ticket)
		: [inc] "r" (inc));
	} else {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_lock	\n"
		"	.set noreorder					\n"
		"							\n"
		"	ll	%[ticket], %[ticket_ptr]		\n"
		"1:	addiu	%[my_ticket], %[ticket], 0x4000		\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
		"	sc	%[my_ticket], %[ticket_ptr]		\n"
		"	beqz	%[my_ticket], 3f			\n"
		"	 nop						\n"
		"	srl	%[my_ticket], %[ticket], 14		\n"
		"	andi	%[my_ticket], %[my_ticket], 0x1fff	\n"
		"	andi	%[ticket], %[ticket], 0x1fff		\n"
		"	beqz	%[my_ticket], 1b			\n"
		"	 srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[ticket], %[ticket], 0xffff		\n"
		"	andi	%[my_ticket], %[my_ticket], 0xffff	\n"
		"	bne	%[ticket], %[my_ticket], 4f		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"2:							\n"
		"	.subsection 2					\n"
		"3:	b	1b					\n"
		"	 ll	%[ticket], %[ticket_ptr]		\n"
		"							\n"
		"4:	andi	%[ticket], %[ticket], 0x1fff		\n"
		"	sll	%[ticket], 5				\n"
		"							\n"
		"6:	bnez	%[ticket], 6b				\n"
		"	 subu	%[ticket], 1				\n"
		"							\n"
		"	lw	%[ticket], %[ticket_ptr]		\n"
		"	andi	%[ticket], %[ticket], 0x1fff		\n"
		"	lhu	%[ticket], %[serving_now_ptr]		\n"
		"	beq	%[ticket], %[my_ticket], 2b		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"	b	4b					\n"
@@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+m" (lock->lock),
		  [serving_now_ptr] "+m" (lock->h.serving_now),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (my_ticket));
		  [my_ticket] "=&r" (my_ticket)
		: [inc] "r" (inc));
	}

	smp_llsc_mb();
@@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)

static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
	int tmp;

	smp_mb__before_llsc();

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__ (
		"				# arch_spin_unlock	\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	addiu	%[ticket], %[ticket], 1			\n"
		"	ori	%[ticket], %[ticket], 0x2000		\n"
		"	xori	%[ticket], %[ticket], 0x2000		\n"
		"	sc	%[ticket], %[ticket_ptr]		\n"
		"	beqzl	%[ticket], 1b				\n"
		: [ticket_ptr] "+m" (lock->lock),
		  [ticket] "=&r" (tmp));
	} else {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_unlock	\n"
		"	.set noreorder					\n"
		"							\n"
		"	ll	%[ticket], %[ticket_ptr]		\n"
		"1:	addiu	%[ticket], %[ticket], 1			\n"
		"	ori	%[ticket], %[ticket], 0x2000		\n"
		"	xori	%[ticket], %[ticket], 0x2000		\n"
		"	sc	%[ticket], %[ticket_ptr]		\n"
		"	beqz	%[ticket], 2f				\n"
		"	 nop						\n"
		"							\n"
		"	.subsection 2					\n"
		"2:	b	1b					\n"
		"	 ll	%[ticket], %[ticket_ptr]		\n"
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+m" (lock->lock),
		  [ticket] "=&r" (tmp));
	}
	unsigned int serving_now = lock->h.serving_now + 1;
	wmb();
	lock->h.serving_now = (u16)serving_now;
	nudge_writes();
}

static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
{
	int tmp, tmp2, tmp3;
	int inc = 0x10000;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__ (
@@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
		"	.set noreorder					\n"
		"							\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	srl	%[my_ticket], %[ticket], 14		\n"
		"	andi	%[my_ticket], %[my_ticket], 0x1fff	\n"
		"	andi	%[now_serving], %[ticket], 0x1fff	\n"
		"	srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[my_ticket], %[my_ticket], 0xffff	\n"
		"	andi	%[now_serving], %[ticket], 0xffff	\n"
		"	bne	%[my_ticket], %[now_serving], 3f	\n"
		"	 addiu	%[ticket], %[ticket], 0x4000		\n"
		"	 addu	%[ticket], %[ticket], %[inc]		\n"
		"	sc	%[ticket], %[ticket_ptr]		\n"
		"	beqzl	%[ticket], 1b				\n"
		"	 li	%[ticket], 1				\n"
@@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
		: [ticket_ptr] "+m" (lock->lock),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (tmp2),
		  [now_serving] "=&r" (tmp3));
		  [now_serving] "=&r" (tmp3)
		: [inc] "r" (inc));
	} else {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_trylock	\n"
		"	.set noreorder					\n"
		"							\n"
		"	ll	%[ticket], %[ticket_ptr]		\n"
		"1:	srl	%[my_ticket], %[ticket], 14		\n"
		"	andi	%[my_ticket], %[my_ticket], 0x1fff	\n"
		"	andi	%[now_serving], %[ticket], 0x1fff	\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[my_ticket], %[my_ticket], 0xffff	\n"
		"	andi	%[now_serving], %[ticket], 0xffff	\n"
		"	bne	%[my_ticket], %[now_serving], 3f	\n"
		"	 addiu	%[ticket], %[ticket], 0x4000		\n"
		"	 addu	%[ticket], %[ticket], %[inc]		\n"
		"	sc	%[ticket], %[ticket_ptr]		\n"
		"	beqz	%[ticket], 4f				\n"
		"	beqz	%[ticket], 1b				\n"
		"	 li	%[ticket], 1				\n"
		"2:							\n"
		"	.subsection 2					\n"
		"3:	b	2b					\n"
		"	 li	%[ticket], 0				\n"
		"4:	b	1b					\n"
		"	 ll	%[ticket], %[ticket_ptr]		\n"
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+m" (lock->lock),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (tmp2),
		  [now_serving] "=&r" (tmp3));
		  [now_serving] "=&r" (tmp3)
		: [inc] "r" (inc));
	}

	smp_llsc_mb();
+18 −6
Original line number Diff line number Diff line
@@ -5,16 +5,28 @@
# error "please don't include this file directly"
#endif

typedef struct {
#include <linux/types.h>

#include <asm/byteorder.h>

typedef union {
	/*
	 * bits  0..13: serving_now
	 * bits 14    : junk data
	 * bits 15..28: ticket
	 * bits  0..15 : serving_now
	 * bits 16..31 : ticket
	 */
	unsigned int lock;
	u32 lock;
	struct {
#ifdef __BIG_ENDIAN
		u16 ticket;
		u16 serving_now;
#else
		u16 serving_now;
		u16 ticket;
#endif
	} h;
} arch_spinlock_t;

#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
#define __ARCH_SPIN_LOCK_UNLOCKED	{ .lock = 0 }

typedef struct {
	volatile unsigned int lock;