Loading arch/x86/lib/memcpy_64.S +21 −47 Original line number Original line Diff line number Diff line /* Copyright 2002 Andi Kleen */ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> #include <linux/linkage.h> #include <asm/cpufeature.h> #include <asm/cpufeature.h> #include <asm/dwarf2.h> #include <asm/dwarf2.h> #include <asm/alternative-asm.h> #include <asm/alternative-asm.h> /* * We build a jump to memcpy_orig by default which gets NOPped out on * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ .weak memcpy /* /* * memcpy - Copy a memory block. * memcpy - Copy a memory block. * * Loading @@ -17,15 +25,11 @@ * Output: * Output: * rax original destination * rax original destination */ */ ENTRY(__memcpy) ENTRY(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS /* * memcpy_c() - fast string ops (REP MOVSQ) based variant. * * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ .section .altinstr_replacement, "ax", @progbits .Lmemcpy_c: movq %rdi, %rax movq %rdi, %rax movq %rdx, %rcx movq %rdx, %rcx shrq $3, %rcx shrq $3, %rcx Loading @@ -34,29 +38,21 @@ movl %edx, %ecx movl %edx, %ecx rep movsb rep movsb ret ret .Lmemcpy_e: ENDPROC(memcpy) .previous ENDPROC(__memcpy) /* /* * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than * memcpy_erms() - enhanced fast string memcpy. This is faster and * memcpy_c. Use memcpy_c_e when possible. * simpler than memcpy. Use memcpy_erms when possible. * * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ */ .section .altinstr_replacement, "ax", @progbits ENTRY(memcpy_erms) .Lmemcpy_c_e: movq %rdi, %rax movq %rdi, %rax movq %rdx, %rcx movq %rdx, %rcx rep movsb rep movsb ret ret .Lmemcpy_e_e: ENDPROC(memcpy_erms) .previous .weak memcpy ENTRY(__memcpy) ENTRY(memcpy_orig) ENTRY(memcpy) CFI_STARTPROC CFI_STARTPROC movq %rdi, %rax movq %rdi, %rax Loading Loading @@ -183,26 +179,4 @@ ENTRY(memcpy) .Lend: .Lend: retq retq CFI_ENDPROC CFI_ENDPROC ENDPROC(memcpy) ENDPROC(memcpy_orig) ENDPROC(__memcpy) /* * Some CPUs are adding enhanced REP MOVSB/STOSB feature * If the feature is supported, memcpy_c_e() is the first choice. * If enhanced rep movsb copy is not available, use fast string copy * memcpy_c() when possible. This is faster and code is simpler than * original memcpy(). * Otherwise, original memcpy() is used. * In .altinstructions section, ERMS feature is placed after REG_GOOD * feature to implement the right patch order. * * Replace only beginning, memcpy is used to apply alternatives, * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ .section .altinstructions, "a" altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0 altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0 .previous Loading
arch/x86/lib/memcpy_64.S +21 −47 Original line number Original line Diff line number Diff line /* Copyright 2002 Andi Kleen */ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> #include <linux/linkage.h> #include <asm/cpufeature.h> #include <asm/cpufeature.h> #include <asm/dwarf2.h> #include <asm/dwarf2.h> #include <asm/alternative-asm.h> #include <asm/alternative-asm.h> /* * We build a jump to memcpy_orig by default which gets NOPped out on * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ .weak memcpy /* /* * memcpy - Copy a memory block. * memcpy - Copy a memory block. * * Loading @@ -17,15 +25,11 @@ * Output: * Output: * rax original destination * rax original destination */ */ ENTRY(__memcpy) ENTRY(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS /* * memcpy_c() - fast string ops (REP MOVSQ) based variant. * * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ .section .altinstr_replacement, "ax", @progbits .Lmemcpy_c: movq %rdi, %rax movq %rdi, %rax movq %rdx, %rcx movq %rdx, %rcx shrq $3, %rcx shrq $3, %rcx Loading @@ -34,29 +38,21 @@ movl %edx, %ecx movl %edx, %ecx rep movsb rep movsb ret ret .Lmemcpy_e: ENDPROC(memcpy) .previous ENDPROC(__memcpy) /* /* * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than * memcpy_erms() - enhanced fast string memcpy. This is faster and * memcpy_c. Use memcpy_c_e when possible. * simpler than memcpy. Use memcpy_erms when possible. * * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ */ .section .altinstr_replacement, "ax", @progbits ENTRY(memcpy_erms) .Lmemcpy_c_e: movq %rdi, %rax movq %rdi, %rax movq %rdx, %rcx movq %rdx, %rcx rep movsb rep movsb ret ret .Lmemcpy_e_e: ENDPROC(memcpy_erms) .previous .weak memcpy ENTRY(__memcpy) ENTRY(memcpy_orig) ENTRY(memcpy) CFI_STARTPROC CFI_STARTPROC movq %rdi, %rax movq %rdi, %rax Loading Loading @@ -183,26 +179,4 @@ ENTRY(memcpy) .Lend: .Lend: retq retq CFI_ENDPROC CFI_ENDPROC ENDPROC(memcpy) ENDPROC(memcpy_orig) ENDPROC(__memcpy) /* * Some CPUs are adding enhanced REP MOVSB/STOSB feature * If the feature is supported, memcpy_c_e() is the first choice. * If enhanced rep movsb copy is not available, use fast string copy * memcpy_c() when possible. This is faster and code is simpler than * original memcpy(). * Otherwise, original memcpy() is used. * In .altinstructions section, ERMS feature is placed after REG_GOOD * feature to implement the right patch order. * * Replace only beginning, memcpy is used to apply alternatives, * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ .section .altinstructions, "a" altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0 altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0 .previous