Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 269833bd authored by Ma Ling's avatar Ma Ling Committed by Ingo Molnar
Browse files

x86/asm: Clean up copy_page_*() comments and code



Modern CPUs use fast-string instruction to accelerate copy
performance, by combining data into 128 bit chunks.

Modify comments and coding style to match it.

Signed-off-by: default avatarMa Ling <ling.ma@intel.com>
Cc: iant@google.com
Link: http://lkml.kernel.org/r/1350503565-19167-1-git-send-email-ling.ma@intel.com


[ Cleaned up the clean up. ]
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 0e9e3e30
Loading
Loading
Loading
Loading
+59 −61
Original line number Diff line number Diff line
@@ -5,18 +5,18 @@
#include <asm/alternative-asm.h>

	ALIGN
copy_page_c:
copy_page_rep:
	CFI_STARTPROC
	movl	$4096/8, %ecx
	rep	movsq
	ret
	CFI_ENDPROC
ENDPROC(copy_page_c)
ENDPROC(copy_page_rep)

/* Don't use streaming store because it's better when the target
   ends up in cache. */
	    
/* Could vary the prefetch distance based on SMP/UP */
/*
 *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
 *  Could vary the prefetch distance based on SMP/UP.
*/

ENTRY(copy_page)
	CFI_STARTPROC
@@ -31,26 +31,25 @@ ENTRY(copy_page)
	.p2align 4
.Loop64:
	dec	%rcx

	movq        (%rsi), %rax
	movq      8 (%rsi), %rbx
	movq     16 (%rsi), %rdx
	movq     24 (%rsi), %r8
	movq     32 (%rsi), %r9
	movq     40 (%rsi), %r10
	movq     48 (%rsi), %r11
	movq     56 (%rsi), %r12
	movq	0x8*0(%rsi), %rax
	movq	0x8*1(%rsi), %rbx
	movq	0x8*2(%rsi), %rdx
	movq	0x8*3(%rsi), %r8
	movq	0x8*4(%rsi), %r9
	movq	0x8*5(%rsi), %r10
	movq	0x8*6(%rsi), %r11
	movq	0x8*7(%rsi), %r12

	prefetcht0 5*64(%rsi)

	movq     %rax,    (%rdi)
	movq     %rbx,  8 (%rdi)
	movq     %rdx, 16 (%rdi)
	movq     %r8,  24 (%rdi)
	movq     %r9,  32 (%rdi)
	movq     %r10, 40 (%rdi)
	movq     %r11, 48 (%rdi)
	movq     %r12, 56 (%rdi)
	movq	%rax, 0x8*0(%rdi)
	movq	%rbx, 0x8*1(%rdi)
	movq	%rdx, 0x8*2(%rdi)
	movq	%r8,  0x8*3(%rdi)
	movq	%r9,  0x8*4(%rdi)
	movq	%r10, 0x8*5(%rdi)
	movq	%r11, 0x8*6(%rdi)
	movq	%r12, 0x8*7(%rdi)

	leaq	64 (%rsi), %rsi
	leaq	64 (%rdi), %rdi
@@ -62,27 +61,26 @@ ENTRY(copy_page)
.Loop2:
	decl	%ecx

	movq        (%rsi), %rax
	movq      8 (%rsi), %rbx
	movq     16 (%rsi), %rdx
	movq     24 (%rsi), %r8
	movq     32 (%rsi), %r9
	movq     40 (%rsi), %r10
	movq     48 (%rsi), %r11
	movq     56 (%rsi), %r12

	movq     %rax,    (%rdi)
	movq     %rbx,  8 (%rdi)
	movq     %rdx, 16 (%rdi)
	movq     %r8,  24 (%rdi)
	movq     %r9,  32 (%rdi)
	movq     %r10, 40 (%rdi)
	movq     %r11, 48 (%rdi)
	movq     %r12, 56 (%rdi)
	movq	0x8*0(%rsi), %rax
	movq	0x8*1(%rsi), %rbx
	movq	0x8*2(%rsi), %rdx
	movq	0x8*3(%rsi), %r8
	movq	0x8*4(%rsi), %r9
	movq	0x8*5(%rsi), %r10
	movq	0x8*6(%rsi), %r11
	movq	0x8*7(%rsi), %r12

	movq	%rax, 0x8*0(%rdi)
	movq	%rbx, 0x8*1(%rdi)
	movq	%rdx, 0x8*2(%rdi)
	movq	%r8,  0x8*3(%rdi)
	movq	%r9,  0x8*4(%rdi)
	movq	%r10, 0x8*5(%rdi)
	movq	%r11, 0x8*6(%rdi)
	movq	%r12, 0x8*7(%rdi)

	leaq	64(%rdi), %rdi
	leaq	64(%rsi), %rsi

	jnz	.Loop2

	movq	(%rsp), %rbx
@@ -103,7 +101,7 @@ ENDPROC(copy_page)

	.section .altinstr_replacement,"ax"
1:	.byte 0xeb					/* jmp <disp8> */
	.byte (copy_page_c - copy_page) - (2f - 1b)	/* offset */
	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
2:
	.previous
	.section .altinstructions,"a"