Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 101068c1 authored by Fenghua Yu's avatar Fenghua Yu Committed by H. Peter Anvin
Browse files

x86, mem: memcpy_64.S: Optimize memcpy by enhanced REP MOVSB/STOSB



Support memcpy() with enhanced rep movsb. On processors supporting enhanced
rep movsb, the alternative memcpy() function using enhanced rep movsb overrides the original function and the fast string
function.

Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-8-git-send-email-fenghua.yu@intel.com


Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 4307bec9
Loading
Loading
Loading
Loading
+32 −13
Original line number Original line Diff line number Diff line
@@ -4,6 +4,7 @@


#include <asm/cpufeature.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>


/*
/*
 * memcpy - Copy a memory block.
 * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
.Lmemcpy_e:
.Lmemcpy_e:
	.previous
	.previous


/*
 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
 * memcpy_c. Use memcpy_c_e when possible.
 *
 * This gets patched over the unrolled variant (below) via the
 * alternative instructions framework:
 */
	.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
	movq %rdi, %rax

	movl %edx, %ecx
	rep movsb
	ret
.Lmemcpy_e_e:
	.previous

ENTRY(__memcpy)
ENTRY(__memcpy)
ENTRY(memcpy)
ENTRY(memcpy)
	CFI_STARTPROC
	CFI_STARTPROC
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
ENDPROC(__memcpy)
ENDPROC(__memcpy)


	/*
	/*
	 * Some CPUs run faster using the string copy instructions.
	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
	 * It is also a lot simpler. Use this when possible:
	 * If the feature is supported, memcpy_c_e() is the first choice.
	 */
	 * If enhanced rep movsb copy is not available, use fast string copy

	 * memcpy_c() when possible. This is faster and code is simpler than
	.section .altinstructions, "a"
	 * original memcpy().
	.align 8
	 * Otherwise, original memcpy() is used.
	.quad memcpy
	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
	.quad .Lmemcpy_c
         * feature to implement the right patch order.
	.word X86_FEATURE_REP_GOOD
	 *

	/*
	 * Replace only beginning, memcpy is used to apply alternatives,
	 * Replace only beginning, memcpy is used to apply alternatives,
	 * so it is silly to overwrite itself with nops - reboot is the
	 * so it is silly to overwrite itself with nops - reboot is the
	 * only outcome...
	 * only outcome...
	 */
	 */
	.byte .Lmemcpy_e - .Lmemcpy_c
	.section .altinstructions, "a"
	.byte .Lmemcpy_e - .Lmemcpy_c
	altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
	altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
	.previous
	.previous