Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e0bc8d17 authored by Borislav Petkov's avatar Borislav Petkov
Browse files

x86/lib/memcpy_64.S: Convert memcpy to ALTERNATIVE_2 macro



Make REP_GOOD variant the default after alternatives have run.

Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent a77600cd
Loading
Loading
Loading
Loading
+21 −47
Original line number Original line Diff line number Diff line
/* Copyright 2002 Andi Kleen */
/* Copyright 2002 Andi Kleen */


#include <linux/linkage.h>
#include <linux/linkage.h>

#include <asm/cpufeature.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
#include <asm/alternative-asm.h>


/*
 * We build a jump to memcpy_orig by default which gets NOPped out on
 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
 */

.weak memcpy

/*
/*
 * memcpy - Copy a memory block.
 * memcpy - Copy a memory block.
 *
 *
@@ -17,15 +25,11 @@
 * Output:
 * Output:
 * rax original destination
 * rax original destination
 */
 */
ENTRY(__memcpy)
ENTRY(memcpy)
	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
		      "jmp memcpy_erms", X86_FEATURE_ERMS


/*
 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
 *
 * This gets patched over the unrolled variant (below) via the
 * alternative instructions framework:
 */
	.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
	movq %rdi, %rax
	movq %rdi, %rax
	movq %rdx, %rcx
	movq %rdx, %rcx
	shrq $3, %rcx
	shrq $3, %rcx
@@ -34,29 +38,21 @@
	movl %edx, %ecx
	movl %edx, %ecx
	rep movsb
	rep movsb
	ret
	ret
.Lmemcpy_e:
ENDPROC(memcpy)
	.previous
ENDPROC(__memcpy)


/*
/*
 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
 * memcpy_erms() - enhanced fast string memcpy. This is faster and
 * memcpy_c. Use memcpy_c_e when possible.
 * simpler than memcpy. Use memcpy_erms when possible.
 *
 * This gets patched over the unrolled variant (below) via the
 * alternative instructions framework:
 */
 */
	.section .altinstr_replacement, "ax", @progbits
ENTRY(memcpy_erms)
.Lmemcpy_c_e:
	movq %rdi, %rax
	movq %rdi, %rax
	movq %rdx, %rcx
	movq %rdx, %rcx
	rep movsb
	rep movsb
	ret
	ret
.Lmemcpy_e_e:
ENDPROC(memcpy_erms)
	.previous

.weak memcpy


ENTRY(__memcpy)
ENTRY(memcpy_orig)
ENTRY(memcpy)
	CFI_STARTPROC
	CFI_STARTPROC
	movq %rdi, %rax
	movq %rdi, %rax


@@ -183,26 +179,4 @@ ENTRY(memcpy)
.Lend:
.Lend:
	retq
	retq
	CFI_ENDPROC
	CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(memcpy_orig)
ENDPROC(__memcpy)

	/*
	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
	 * If the feature is supported, memcpy_c_e() is the first choice.
	 * If enhanced rep movsb copy is not available, use fast string copy
	 * memcpy_c() when possible. This is faster and code is simpler than
	 * original memcpy().
	 * Otherwise, original memcpy() is used.
	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
         * feature to implement the right patch order.
	 *
	 * Replace only beginning, memcpy is used to apply alternatives,
	 * so it is silly to overwrite itself with nops - reboot is the
	 * only outcome...
	 */
	.section .altinstructions, "a"
	altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0
	altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0
	.previous