Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 090a3f61 authored by Borislav Petkov's avatar Borislav Petkov
Browse files

x86/lib/copy_page_64.S: Use generic ALTERNATIVE macro



... instead of the semi-version with the spelled out sections.

What is more, make the REP_GOOD version be the default copy_page()
version as the majority of the relevant x86 CPUs do set
X86_FEATURE_REP_GOOD. Thus, copy_page gets compiled to:

  ffffffff8130af80 <copy_page>:
  ffffffff8130af80:       e9 0b 00 00 00          jmpq   ffffffff8130af90 <copy_page_regs>
  ffffffff8130af85:       b9 00 02 00 00          mov    $0x200,%ecx
  ffffffff8130af8a:       f3 48 a5                rep movsq %ds:(%rsi),%es:(%rdi)
  ffffffff8130af8d:       c3                      retq
  ffffffff8130af8e:       66 90                   xchg   %ax,%ax

  ffffffff8130af90 <copy_page_regs>:
  ...

and after the alternatives have run, the JMP to the old, unrolled
version gets NOPed out:

  ffffffff8130af80 <copy_page>:
  ffffffff8130af80:  66 66 90		xchg   %ax,%ax
  ffffffff8130af83:  66 90		xchg   %ax,%ax
  ffffffff8130af85:  b9 00 02 00 00	mov    $0x200,%ecx
  ffffffff8130af8a:  f3 48 a5		rep movsq %ds:(%rsi),%es:(%rdi)
  ffffffff8130af8d:  c3			retq

On modern uarches, those NOPs are cheaper than the unconditional JMP
previously.

Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent 4fd4b6e5
Loading
Loading
Loading
Loading
+12 −25
Original line number Diff line number Diff line
@@ -2,23 +2,26 @@

#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>

/*
 * Some CPUs run faster using the string copy instructions (sane microcode).
 * It is also a lot simpler. Use this when possible. But, don't use streaming
 * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
 * prefetch distance based on SMP/UP.
 */
	ALIGN
copy_page_rep:
ENTRY(copy_page)
	CFI_STARTPROC
	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
	movl	$4096/8, %ecx
	rep	movsq
	ret
	CFI_ENDPROC
ENDPROC(copy_page_rep)

/*
 *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
 *  Could vary the prefetch distance based on SMP/UP.
*/
ENDPROC(copy_page)

ENTRY(copy_page)
ENTRY(copy_page_regs)
	CFI_STARTPROC
	subq	$2*8,	%rsp
	CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
	addq	$2*8, %rsp
	CFI_ADJUST_CFA_OFFSET -2*8
	ret
.Lcopy_page_end:
	CFI_ENDPROC
ENDPROC(copy_page)

	/* Some CPUs run faster using the string copy instructions.
	   It is also a lot simpler. Use this when possible */

#include <asm/cpufeature.h>

	.section .altinstr_replacement,"ax"
1:	.byte 0xeb					/* jmp <disp8> */
	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
2:
	.previous
	.section .altinstructions,"a"
	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\
		.Lcopy_page_end-copy_page, 2b-1b, 0
	.previous
ENDPROC(copy_page_regs)