Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2f19e06a authored by Fenghua Yu's avatar Fenghua Yu Committed by H. Peter Anvin
Browse files

x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB



Support memset() with enhanced rep stosb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memset_c_e function using enhanced rep stosb
overrides the fast string alternative memset_c and the original function.

Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-10-git-send-email-fenghua.yu@intel.com


Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 057e05c1
Loading
Loading
Loading
Loading
+42 −12
Original line number Original line Diff line number Diff line
@@ -2,9 +2,13 @@


#include <linux/linkage.h>
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>


/*
/*
 * ISO C memset - set a memory block to a byte value.
 * ISO C memset - set a memory block to a byte value. This function uses fast
 * string to get better performance than the original function. The code is
 * simpler and shorter than the orignal function as well.
 *	
 *	
 * rdi   destination
 * rdi   destination
 * rsi   value (char) 
 * rsi   value (char) 
@@ -31,6 +35,28 @@
.Lmemset_e:
.Lmemset_e:
	.previous
	.previous


/*
 * ISO C memset - set a memory block to a byte value. This function uses
 * enhanced rep stosb to override the fast string function.
 * The code is simpler and shorter than the fast string function as well.
 *
 * rdi   destination
 * rsi   value (char)
 * rdx   count (bytes)
 *
 * rax   original destination
 */
	.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
	movq %rdi,%r9
	movb %sil,%al
	movl %edx,%ecx
	rep stosb
	movq %r9,%rax
	ret
.Lmemset_e_e:
	.previous

ENTRY(memset)
ENTRY(memset)
ENTRY(__memset)
ENTRY(__memset)
	CFI_STARTPROC
	CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
ENDPROC(memset)
ENDPROC(memset)
ENDPROC(__memset)
ENDPROC(__memset)


	/* Some CPUs run faster using the string instructions.
	/* Some CPUs support enhanced REP MOVSB/STOSB feature.
	   It is also a lot simpler. Use this when possible */
	 * It is recommended to use this when possible.

	 *
#include <asm/cpufeature.h>
	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string

	 * instructions.
	 *
	 * Otherwise, use original memset function.
	 *
	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
         * feature to implement the right patch order.
	 */
	.section .altinstructions,"a"
	.section .altinstructions,"a"
	.align 8
	altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
	.quad memset
			     .Lfinal-memset,.Lmemset_e-.Lmemset_c
	.quad .Lmemset_c
	altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
	.word X86_FEATURE_REP_GOOD
			     .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
	.byte .Lfinal - memset
	.byte .Lmemset_e - .Lmemset_c
	.previous
	.previous