Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f3b6eaf0 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

x86: memcpy, clean up



Impact: cleanup

Make this file more readable by bringing it more in line
with the usual kernel style.

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent dd1ef4ec
Loading
Loading
Loading
Loading
+81 −55
Original line number Diff line number Diff line
/* Copyright 2002 Andi Kleen */

#include <linux/linkage.h>
#include <asm/dwarf2.h>

#include <asm/cpufeature.h>
#include <asm/dwarf2.h>

/*
 * memcpy - Copy a memory block.
@@ -16,10 +17,17 @@
 * rax original destination
 */

/*
 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
 *
 * Calls to this get patched into the kernel image via the
 * alternative instructions framework:
 */
	ALIGN
memcpy_c:
	CFI_STARTPROC
	movq %rdi, %rax

	movl %edx, %ecx
	shrl $3, %ecx
	andl $7, %edx
@@ -33,42 +41,51 @@ ENDPROC(memcpy_c)
ENTRY(__memcpy)
ENTRY(memcpy)
	CFI_STARTPROC
	movq %rdi,%rax

	/*
	 * Put the number of full 64-byte blocks into %ecx.
	 * Tail portion is handled at the end:
	 */
	movq %rdi, %rax
	movl %edx, %ecx
	shrl   $6, %ecx
	jz .Lhandle_tail

	.p2align 4
.Lloop_64:
	/*
	 * We decrement the loop index here - and the zero-flag is
	 * checked at the end of the loop (instructions inbetween do
	 * not change the zero flag):
	 */
	decl %ecx

	movq (%rsi),%r11
	movq 8(%rsi),%r8

	movq %r11,(%rdi)
	/*
	 * Move in blocks of 4x16 bytes:
	 */
	movq 0*8(%rsi),		%r11
	movq 1*8(%rsi),		%r8
	movq %r11,		0*8(%rdi)
	movq %r8,		1*8(%rdi)

	movq 2*8(%rsi),		%r9
	movq 3*8(%rsi),		%r10

	movq %r9,		2*8(%rdi)
	movq %r10,		3*8(%rdi)

	movq 4*8(%rsi),		%r11
	movq 5*8(%rsi),		%r8

	movq %r11,		4*8(%rdi)
	movq %r8,		5*8(%rdi)

	movq 6*8(%rsi),		%r9
	movq 7*8(%rsi),		%r10

	movq %r9,		6*8(%rdi)
	movq %r10,		7*8(%rdi)

	leaq 64(%rsi), %rsi
	leaq 64(%rdi), %rdi

	jnz  .Lloop_64

.Lhandle_tail:
@@ -76,6 +93,7 @@ ENTRY(memcpy)
	andl  $63, %ecx
	shrl   $3, %ecx
	jz   .Lhandle_7

	.p2align 4
.Lloop_8:
	decl %ecx
@@ -88,7 +106,8 @@ ENTRY(memcpy)
.Lhandle_7:
	movl %edx, %ecx
	andl $7, %ecx
	jz .Lende
	jz .Lend

	.p2align 4
.Lloop_1:
	movb (%rsi), %r8b
@@ -98,27 +117,34 @@ ENTRY(memcpy)
	decl %ecx
	jnz .Lloop_1

.Lende:
.Lend:
	ret
	CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)

	/* Some CPUs run faster using the string copy instructions.
	   It is also a lot simpler. Use this when possible */
	/*
	 * Some CPUs run faster using the string copy instructions.
	 * It is also a lot simpler. Use this when possible:
	 */

	.section .altinstr_replacement, "ax"
1:	.byte 0xeb				/* jmp <disp8> */
	.byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */
2:
	.previous

	.section .altinstructions, "a"
	.align 8
	.quad memcpy
	.quad 1b
	.byte X86_FEATURE_REP_GOOD
	/* Replace only beginning, memcpy is used to apply alternatives, so it
	 * is silly to overwrite itself with nops - reboot is only outcome... */

	/*
	 * Replace only beginning, memcpy is used to apply alternatives,
	 * so it is silly to overwrite itself with nops - reboot is the
	 * only outcome...
	 */
	.byte 2b - 1b
	.byte 2b - 1b
	.previous