Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bafaecd1 authored by Linus Torvalds's avatar Linus Torvalds Committed by H. Peter Anvin
Browse files

x86-64: support native xadd rwsem implementation



This one is much faster than the spinlock based fallback rwsem code,
with certain artifical benchmarks having shown 300%+ improvement on
threaded page faults etc.

Again, note the 32767-thread limit here. So this really does need that
whole "make rwsem_count_t be 64-bit and fix the BIAS values to match"
extension on top of it, but that is conceptually a totally independent
issue.

NOT TESTED! The original patch that this all was based on were tested by
KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the
cleaned-up series, so caveat emptor..

Also note that it _may_ be a good idea to mark some more registers
clobbered on x86-64 in the inline asms instead of saving/restoring them.
They are inline functions, but they are only used in places where there
are not a lot of live registers _anyway_, so doing for example the
clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any
worse, and would make the slow-path code smaller.

(Not that the slow-path really matters to that degree. Saving a few
unnecessary registers is the _least_ of our problems when we hit the slow
path. The instruction/cycle counting really only matters in the fast
path).

Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent 5d0b7235
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT

config X86_XADD
	def_bool y
	depends on X86_32 && !M386
	depends on X86_64 || !M386

config X86_PPRO_FENCE
	bool "PentiumPro memory ordering errata workaround"
+1 −0
Original line number Diff line number Diff line
@@ -39,4 +39,5 @@ else
        lib-y += thunk_64.o clear_page_64.o copy_page_64.o
        lib-y += memmove_64.o memset_64.o
        lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
	lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
endif
+81 −0
Original line number Diff line number Diff line
/*
 * x86-64 rwsem wrappers
 *
 * This interfaces the inline asm code to the slow-path
 * C routines. We need to save the call-clobbered regs
 * that the asm does not mark as clobbered, and move the
 * argument from %rax to %rdi.
 *
 * NOTE! We don't need to save %rax, because the functions
 * will always return the semaphore pointer in %rax (which
 * is also the input argument to these helpers)
 *
 * The following can clobber %rdx because the asm clobbers it:
 *   call_rwsem_down_write_failed
 *   call_rwsem_wake
 * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
 */

#include <linux/linkage.h>
#include <asm/rwlock.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>

#define save_common_regs \
	pushq %rdi; \
	pushq %rsi; \
	pushq %rcx; \
	pushq %r8; \
	pushq %r9; \
	pushq %r10; \
	pushq %r11

#define restore_common_regs \
	popq %r11; \
	popq %r10; \
	popq %r9; \
	popq %r8; \
	popq %rcx; \
	popq %rsi; \
	popq %rdi

/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
	save_common_regs
	pushq %rdx
	movq %rax,%rdi
	call rwsem_down_read_failed
	popq %rdx
	restore_common_regs
	ret
	ENDPROC(call_rwsem_down_read_failed)

ENTRY(call_rwsem_down_write_failed)
	save_common_regs
	movq %rax,%rdi
	call rwsem_down_write_failed
	restore_common_regs
	ret
	ENDPROC(call_rwsem_down_write_failed)

ENTRY(call_rwsem_wake)
	decw %dx    /* do nothing if still outstanding active readers */
	jnz 1f
	save_common_regs
	movq %rax,%rdi
	call rwsem_wake
	restore_common_regs
1:	ret
	ENDPROC(call_rwsem_wake)

/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
	save_common_regs
	pushq %rdx
	movq %rax,%rdi
	call rwsem_downgrade_wake
	popq %rdx
	restore_common_regs
	ret
	ENDPROC(call_rwsem_downgrade_wake)