Loading arch/arm64/lib/copy_from_user.S +33 −3 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) Loading @@ -27,10 +28,39 @@ * x0 - bytes not copied */ ENTRY(__copy_from_user) #include "copy_template.S" add x4, x1, x2 // upper user buffer boundary subs x2, x2, #8 b.mi 2f 1: USER(9f, ldr x3, [x1], #8 ) subs x2, x2, #8 str x3, [x0], #8 b.pl 1b 2: adds x2, x2, #4 b.mi 3f USER(9f, ldr w3, [x1], #4 ) sub x2, x2, #4 str w3, [x0], #4 3: adds x2, x2, #2 b.mi 4f USER(9f, ldrh w3, [x1], #2 ) sub x2, x2, #2 strh w3, [x0], #2 4: adds x2, x2, #1 b.mi 5f USER(9f, ldrb w3, [x1] ) strb w3, [x0] 5: mov x0, #0 ret ENDPROC(__copy_from_user) .section .fixup,"ax" .align 2 copy_abort_table 9: sub x2, x4, x1 mov x3, x2 10: strb wzr, [x0], #1 // zero remaining buffer space subs x3, x3, #1 b.ne 10b mov x0, x2 // bytes not copied ret .previous arch/arm64/lib/copy_template.Sdeleted 100644 → 0 +0 −278 Original line number Diff line number Diff line /* * Copyright (c) 2013, Applied Micro Circuits Corporation * Copyright (c) 2012-2013, Linaro Limited * * Author: Feng Kan <fkan@apm.com> * Author: Philipp Tomsich <philipp.tomsich@theobroma-systems.com> * * The code is adopted from the memcpy routine by Linaro Limited. * * This file is free software: you may copy, redistribute and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 2 of the License, or (at your * option) any later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * This file incorporates work covered by the following copyright and * permission notice: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1 Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2 Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3 Neither the name of the Linaro nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <asm/assembler.h> dstin .req x0 src .req x1 count .req x2 tmp1 .req x3 tmp1w .req w3 tmp2 .req x4 tmp2w .req w4 tmp3 .req x5 tmp3w .req w5 dst .req x6 A_l .req x7 A_h .req x8 B_l .req x9 B_h .req x10 C_l .req x11 C_h .req x12 D_l .req x13 D_h .req x14 mov dst, dstin cmp count, #64 b.ge .Lcpy_not_short cmp count, #15 b.le .Ltail15tiny /* * Deal with small copies quickly by dropping straight into the * exit block. */ .Ltail63: /* * Copy up to 48 bytes of data. At this point we only need the * bottom 6 bits of count to be accurate. */ ands tmp1, count, #0x30 b.eq .Ltail15 add dst, dst, tmp1 add src, src, tmp1 cmp tmp1w, #0x20 b.eq 1f b.lt 2f USER(8f, ldp A_l, A_h, [src, #-48]) USER(8f, stp A_l, A_h, [dst, #-48]) 1: USER(8f, ldp A_l, A_h, [src, #-32]) USER(8f, stp A_l, A_h, [dst, #-32]) 2: USER(8f, ldp A_l, A_h, [src, #-16]) USER(8f, stp A_l, A_h, [dst, #-16]) .Ltail15: ands count, count, #15 beq 1f add src, src, count USER(9f, ldp A_l, A_h, [src, #-16]) add dst, dst, count USER(9f, stp A_l, A_h, [dst, #-16]) 1: b .Lsuccess .Ltail15tiny: /* * Copy up to 15 bytes of data. Does not assume additional data * being copied. */ tbz count, #3, 1f USER(10f, ldr tmp1, [src], #8) USER(10f, str tmp1, [dst], #8) 1: tbz count, #2, 1f USER(10f, ldr tmp1w, [src], #4) USER(10f, str tmp1w, [dst], #4) 1: tbz count, #1, 1f USER(10f, ldrh tmp1w, [src], #2) USER(10f, strh tmp1w, [dst], #2) 1: tbz count, #0, 1f USER(10f, ldrb tmp1w, [src]) USER(10f, strb tmp1w, [dst]) 1: b .Lsuccess .Lcpy_not_short: /* * We don't much care about the alignment of DST, but we want SRC * to be 128-bit (16 byte) aligned so that we don't cross cache line * boundaries on both loads and stores. */ neg tmp2, src ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ b.eq 2f sub count, count, tmp2 /* * Copy more data than needed; it's faster than jumping * around copying sub-Quadword quantities. We know that * it can't overrun. */ USER(11f, ldp A_l, A_h, [src]) add src, src, tmp2 USER(11f, stp A_l, A_h, [dst]) add dst, dst, tmp2 /* There may be less than 63 bytes to go now. */ cmp count, #63 b.le .Ltail63 2: subs count, count, #128 b.ge .Lcpy_body_large /* * Less than 128 bytes to copy, so handle 64 here and then jump * to the tail. */ USER(12f, ldp A_l, A_h, [src]) USER(12f, ldp B_l, B_h, [src, #16]) USER(12f, ldp C_l, C_h, [src, #32]) USER(12f, ldp D_l, D_h, [src, #48]) USER(12f, stp A_l, A_h, [dst]) USER(12f, stp B_l, B_h, [dst, #16]) USER(12f, stp C_l, C_h, [dst, #32]) USER(12f, stp D_l, D_h, [dst, #48]) tst count, #0x3f add src, src, #64 add dst, dst, #64 b.ne .Ltail63 b .Lsuccess /* * Critical loop. Start at a new cache line boundary. Assuming * 64 bytes per line this ensures the entire loop is in one line. */ .p2align 6 .Lcpy_body_large: /* There are at least 128 bytes to copy. */ USER(12f, ldp A_l, A_h, [src, #0]) sub dst, dst, #16 /* Pre-bias. */ USER(13f, ldp B_l, B_h, [src, #16]) USER(13f, ldp C_l, C_h, [src, #32]) USER(13f, ldp D_l, D_h, [src, #48]!) /* src += 64 - Pre-bias. */ 1: USER(13f, stp A_l, A_h, [dst, #16]) USER(13f, ldp A_l, A_h, [src, #16]) USER(13f, stp B_l, B_h, [dst, #32]) USER(13f, ldp B_l, B_h, [src, #32]) USER(13f, stp C_l, C_h, [dst, #48]) USER(13f, ldp C_l, C_h, [src, #48]) USER(13f, stp D_l, D_h, [dst, #64]!) USER(13f, ldp D_l, D_h, [src, #64]!) subs count, count, #64 b.ge 1b USER(14f, stp A_l, A_h, [dst, #16]) USER(14f, stp B_l, B_h, [dst, #32]) USER(14f, stp C_l, C_h, [dst, #48]) USER(14f, stp D_l, D_h, [dst, #64]) add src, src, #16 add dst, dst, #64 + 16 tst count, #0x3f b.ne .Ltail63 .Lsuccess: /* Nothing left to copy */ mov x0, #0 ret .macro copy_abort_table 8: /* * Count bytes remain * dst points to (dst + tmp1) */ mov x0, count sub dst, dst, tmp1 b .Lfinalize 9: /* * 16 bytes remain * dst is accurate */ mov x0, #16 b .Lfinalize 10: /* * count is accurate * dst is accurate */ mov x0, count b .Lfinalize 11: /* *(count + tmp2) bytes remain * dst points to the start of the remaining bytes */ add x0, count, tmp2 b .Lfinalize 12: /* * (count + 128) bytes remain * dst is accurate */ add x0, count, #128 b .Lfinalize 13: /* * (count + 128) bytes remain * dst is pre-biased to (dst + 16) */ add x0, count, #128 sub dst, dst, #16 b .Lfinalize 14: /* * count is accurate * dst is pre-biased to (dst + 16) */ mov x0, count sub dst, dst, #16 /* fall-through */ .Lfinalize: /* * Zeroize remaining destination-buffer */ mov count, x0 20: /* Zero remaining buffer space */ strb wzr, [dst], #1 subs count, count, #1 b.ne 20b ret .endm arch/arm64/lib/copy_to_user.S +28 −3 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) Loading @@ -27,10 +28,34 @@ * x0 - bytes not copied */ ENTRY(__copy_to_user) #include "copy_template.S" add x4, x0, x2 // upper user buffer boundary subs x2, x2, #8 b.mi 2f 1: ldr x3, [x1], #8 subs x2, x2, #8 USER(9f, str x3, [x0], #8 ) b.pl 1b 2: adds x2, x2, #4 b.mi 3f ldr w3, [x1], #4 sub x2, x2, #4 USER(9f, str w3, [x0], #4 ) 3: adds x2, x2, #2 b.mi 4f ldrh w3, [x1], #2 sub x2, x2, #2 USER(9f, strh w3, [x0], #2 ) 4: adds x2, x2, #1 b.mi 5f ldrb w3, [x1] USER(9f, strb w3, [x0] ) 5: mov x0, #0 ret ENDPROC(__copy_to_user) .section .fixup,"ax" .align 2 copy_abort_table 9: sub x0, x4, x0 // bytes not copied ret .previous Loading
arch/arm64/lib/copy_from_user.S +33 −3 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) Loading @@ -27,10 +28,39 @@ * x0 - bytes not copied */ ENTRY(__copy_from_user) #include "copy_template.S" add x4, x1, x2 // upper user buffer boundary subs x2, x2, #8 b.mi 2f 1: USER(9f, ldr x3, [x1], #8 ) subs x2, x2, #8 str x3, [x0], #8 b.pl 1b 2: adds x2, x2, #4 b.mi 3f USER(9f, ldr w3, [x1], #4 ) sub x2, x2, #4 str w3, [x0], #4 3: adds x2, x2, #2 b.mi 4f USER(9f, ldrh w3, [x1], #2 ) sub x2, x2, #2 strh w3, [x0], #2 4: adds x2, x2, #1 b.mi 5f USER(9f, ldrb w3, [x1] ) strb w3, [x0] 5: mov x0, #0 ret ENDPROC(__copy_from_user) .section .fixup,"ax" .align 2 copy_abort_table 9: sub x2, x4, x1 mov x3, x2 10: strb wzr, [x0], #1 // zero remaining buffer space subs x3, x3, #1 b.ne 10b mov x0, x2 // bytes not copied ret .previous
arch/arm64/lib/copy_template.Sdeleted 100644 → 0 +0 −278 Original line number Diff line number Diff line /* * Copyright (c) 2013, Applied Micro Circuits Corporation * Copyright (c) 2012-2013, Linaro Limited * * Author: Feng Kan <fkan@apm.com> * Author: Philipp Tomsich <philipp.tomsich@theobroma-systems.com> * * The code is adopted from the memcpy routine by Linaro Limited. * * This file is free software: you may copy, redistribute and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 2 of the License, or (at your * option) any later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * This file incorporates work covered by the following copyright and * permission notice: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1 Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2 Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3 Neither the name of the Linaro nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <asm/assembler.h> dstin .req x0 src .req x1 count .req x2 tmp1 .req x3 tmp1w .req w3 tmp2 .req x4 tmp2w .req w4 tmp3 .req x5 tmp3w .req w5 dst .req x6 A_l .req x7 A_h .req x8 B_l .req x9 B_h .req x10 C_l .req x11 C_h .req x12 D_l .req x13 D_h .req x14 mov dst, dstin cmp count, #64 b.ge .Lcpy_not_short cmp count, #15 b.le .Ltail15tiny /* * Deal with small copies quickly by dropping straight into the * exit block. */ .Ltail63: /* * Copy up to 48 bytes of data. At this point we only need the * bottom 6 bits of count to be accurate. */ ands tmp1, count, #0x30 b.eq .Ltail15 add dst, dst, tmp1 add src, src, tmp1 cmp tmp1w, #0x20 b.eq 1f b.lt 2f USER(8f, ldp A_l, A_h, [src, #-48]) USER(8f, stp A_l, A_h, [dst, #-48]) 1: USER(8f, ldp A_l, A_h, [src, #-32]) USER(8f, stp A_l, A_h, [dst, #-32]) 2: USER(8f, ldp A_l, A_h, [src, #-16]) USER(8f, stp A_l, A_h, [dst, #-16]) .Ltail15: ands count, count, #15 beq 1f add src, src, count USER(9f, ldp A_l, A_h, [src, #-16]) add dst, dst, count USER(9f, stp A_l, A_h, [dst, #-16]) 1: b .Lsuccess .Ltail15tiny: /* * Copy up to 15 bytes of data. Does not assume additional data * being copied. */ tbz count, #3, 1f USER(10f, ldr tmp1, [src], #8) USER(10f, str tmp1, [dst], #8) 1: tbz count, #2, 1f USER(10f, ldr tmp1w, [src], #4) USER(10f, str tmp1w, [dst], #4) 1: tbz count, #1, 1f USER(10f, ldrh tmp1w, [src], #2) USER(10f, strh tmp1w, [dst], #2) 1: tbz count, #0, 1f USER(10f, ldrb tmp1w, [src]) USER(10f, strb tmp1w, [dst]) 1: b .Lsuccess .Lcpy_not_short: /* * We don't much care about the alignment of DST, but we want SRC * to be 128-bit (16 byte) aligned so that we don't cross cache line * boundaries on both loads and stores. */ neg tmp2, src ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ b.eq 2f sub count, count, tmp2 /* * Copy more data than needed; it's faster than jumping * around copying sub-Quadword quantities. We know that * it can't overrun. */ USER(11f, ldp A_l, A_h, [src]) add src, src, tmp2 USER(11f, stp A_l, A_h, [dst]) add dst, dst, tmp2 /* There may be less than 63 bytes to go now. */ cmp count, #63 b.le .Ltail63 2: subs count, count, #128 b.ge .Lcpy_body_large /* * Less than 128 bytes to copy, so handle 64 here and then jump * to the tail. */ USER(12f, ldp A_l, A_h, [src]) USER(12f, ldp B_l, B_h, [src, #16]) USER(12f, ldp C_l, C_h, [src, #32]) USER(12f, ldp D_l, D_h, [src, #48]) USER(12f, stp A_l, A_h, [dst]) USER(12f, stp B_l, B_h, [dst, #16]) USER(12f, stp C_l, C_h, [dst, #32]) USER(12f, stp D_l, D_h, [dst, #48]) tst count, #0x3f add src, src, #64 add dst, dst, #64 b.ne .Ltail63 b .Lsuccess /* * Critical loop. Start at a new cache line boundary. Assuming * 64 bytes per line this ensures the entire loop is in one line. */ .p2align 6 .Lcpy_body_large: /* There are at least 128 bytes to copy. */ USER(12f, ldp A_l, A_h, [src, #0]) sub dst, dst, #16 /* Pre-bias. */ USER(13f, ldp B_l, B_h, [src, #16]) USER(13f, ldp C_l, C_h, [src, #32]) USER(13f, ldp D_l, D_h, [src, #48]!) /* src += 64 - Pre-bias. */ 1: USER(13f, stp A_l, A_h, [dst, #16]) USER(13f, ldp A_l, A_h, [src, #16]) USER(13f, stp B_l, B_h, [dst, #32]) USER(13f, ldp B_l, B_h, [src, #32]) USER(13f, stp C_l, C_h, [dst, #48]) USER(13f, ldp C_l, C_h, [src, #48]) USER(13f, stp D_l, D_h, [dst, #64]!) USER(13f, ldp D_l, D_h, [src, #64]!) subs count, count, #64 b.ge 1b USER(14f, stp A_l, A_h, [dst, #16]) USER(14f, stp B_l, B_h, [dst, #32]) USER(14f, stp C_l, C_h, [dst, #48]) USER(14f, stp D_l, D_h, [dst, #64]) add src, src, #16 add dst, dst, #64 + 16 tst count, #0x3f b.ne .Ltail63 .Lsuccess: /* Nothing left to copy */ mov x0, #0 ret .macro copy_abort_table 8: /* * Count bytes remain * dst points to (dst + tmp1) */ mov x0, count sub dst, dst, tmp1 b .Lfinalize 9: /* * 16 bytes remain * dst is accurate */ mov x0, #16 b .Lfinalize 10: /* * count is accurate * dst is accurate */ mov x0, count b .Lfinalize 11: /* *(count + tmp2) bytes remain * dst points to the start of the remaining bytes */ add x0, count, tmp2 b .Lfinalize 12: /* * (count + 128) bytes remain * dst is accurate */ add x0, count, #128 b .Lfinalize 13: /* * (count + 128) bytes remain * dst is pre-biased to (dst + 16) */ add x0, count, #128 sub dst, dst, #16 b .Lfinalize 14: /* * count is accurate * dst is pre-biased to (dst + 16) */ mov x0, count sub dst, dst, #16 /* fall-through */ .Lfinalize: /* * Zeroize remaining destination-buffer */ mov count, x0 20: /* Zero remaining buffer space */ strb wzr, [dst], #1 subs count, count, #1 b.ne 20b ret .endm
arch/arm64/lib/copy_to_user.S +28 −3 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) Loading @@ -27,10 +28,34 @@ * x0 - bytes not copied */ ENTRY(__copy_to_user) #include "copy_template.S" add x4, x0, x2 // upper user buffer boundary subs x2, x2, #8 b.mi 2f 1: ldr x3, [x1], #8 subs x2, x2, #8 USER(9f, str x3, [x0], #8 ) b.pl 1b 2: adds x2, x2, #4 b.mi 3f ldr w3, [x1], #4 sub x2, x2, #4 USER(9f, str w3, [x0], #4 ) 3: adds x2, x2, #2 b.mi 4f ldrh w3, [x1], #2 sub x2, x2, #2 USER(9f, strh w3, [x0], #2 ) 4: adds x2, x2, #1 b.mi 5f ldrb w3, [x1] USER(9f, strb w3, [x0] ) 5: mov x0, #0 ret ENDPROC(__copy_to_user) .section .fixup,"ax" .align 2 copy_abort_table 9: sub x0, x4, x0 // bytes not copied ret .previous