Loading arch/arm64/crypto/aes-ce-ccm-core.S +55 −95 Original line number Original line Diff line number Diff line Loading @@ -19,33 +19,24 @@ * u32 *macp, u8 const rk[], u32 rounds); * u32 *macp, u8 const rk[], u32 rounds); */ */ ENTRY(ce_aes_ccm_auth_data) ENTRY(ce_aes_ccm_auth_data) frame_push 7 ldr w8, [x3] /* leftover from prev round? */ mov x19, x0 mov x20, x1 mov x21, x2 mov x22, x3 mov x23, x4 mov x24, x5 ldr w25, [x22] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ ld1 {v0.16b}, [x0] /* load mac */ cbz w25, 1f cbz w8, 1f sub w25, w25, #16 sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b eor v1.16b, v1.16b, v1.16b 0: ldrb w7, [x20], #1 /* get 1 byte of input */ 0: ldrb w7, [x1], #1 /* get 1 byte of input */ subs w21, w21, #1 subs w2, w2, #1 add w25, w25, #1 add w8, w8, #1 ins v1.b[0], w7 ins v1.b[0], w7 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ beq 8f /* out of input? */ beq 8f /* out of input? */ cbnz w25, 0b cbnz w8, 0b eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b 1: ld1 {v3.4s}, [x23] /* load first round key */ 1: ld1 {v3.4s}, [x4] /* load first round key */ prfm pldl1strm, [x20] prfm pldl1strm, [x1] cmp w24, #12 /* which key size? */ cmp w5, #12 /* which key size? */ add x6, x23, #16 add x6, x4, #16 sub w7, w24, #2 /* modified # of rounds */ sub w7, w5, #2 /* modified # of rounds */ bmi 2f bmi 2f bne 5f bne 5f mov v5.16b, v3.16b mov v5.16b, v3.16b Loading @@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v5.4s}, [x6], #16 /* load next round key */ ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b bpl 3b aese v0.16b, v4.16b aese v0.16b, v4.16b subs w21, w21, #16 /* last data? */ subs w2, w2, #16 /* last data? */ eor v0.16b, v0.16b, v5.16b /* final round */ eor v0.16b, v0.16b, v5.16b /* final round */ bmi 6f bmi 6f ld1 {v1.16b}, [x20], #16 /* load next input block */ ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ beq 6f bne 1b 6: st1 {v0.16b}, [x0] /* store mac */ if_will_cond_yield_neon st1 {v0.16b}, [x19] /* store mac */ do_cond_yield_neon ld1 {v0.16b}, [x19] /* reload mac */ endif_yield_neon b 1b 6: st1 {v0.16b}, [x19] /* store mac */ beq 10f beq 10f adds w21, w21, #16 adds w2, w2, #16 beq 10f beq 10f mov w25, w21 mov w8, w2 7: ldrb w7, [x20], #1 7: ldrb w7, [x1], #1 umov w6, v0.b[0] umov w6, v0.b[0] eor w6, w6, w7 eor w6, w6, w7 strb w6, [x19], #1 strb w6, [x0], #1 subs w21, w21, #1 subs w2, w2, #1 beq 10f beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b b 7b 8: mov w7, w25 8: mov w7, w8 add w25, w25, #16 add w8, w8, #16 9: ext v1.16b, v1.16b, v1.16b, #1 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 adds w7, w7, #1 bne 9b bne 9b eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b st1 {v0.16b}, [x19] st1 {v0.16b}, [x0] 10: str w25, [x22] 10: str w8, [x3] frame_pop ret ret ENDPROC(ce_aes_ccm_auth_data) ENDPROC(ce_aes_ccm_auth_data) Loading Loading @@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc .macro aes_ccm_do_crypt,enc frame_push 8 ldr x8, [x6, #8] /* load lower ctr */ ld1 {v0.16b}, [x5] /* load mac */ mov x19, x0 CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ mov x20, x1 mov x21, x2 mov x22, x3 mov x23, x4 mov x24, x5 mov x25, x6 ldr x26, [x25, #8] /* load lower ctr */ ld1 {v0.16b}, [x24] /* load mac */ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ 0: /* outer loop */ ld1 {v1.8b}, [x25] /* load upper ctr */ ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x20] prfm pldl1strm, [x1] add x26, x26, #1 add x8, x8, #1 rev x9, x26 rev x9, x8 cmp w23, #12 /* which key size? */ cmp w4, #12 /* which key size? */ sub w7, w23, #2 /* get modified # of rounds */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ ins v1.d[1], x9 /* no carry in lower ctr */ ld1 {v3.4s}, [x22] /* load first round key */ ld1 {v3.4s}, [x3] /* load first round key */ add x10, x22, #16 add x10, x3, #16 bmi 1f bmi 1f bne 4f bne 4f mov v5.16b, v3.16b mov v5.16b, v3.16b Loading @@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ bpl 2b bpl 2b aese v0.16b, v4.16b aese v0.16b, v4.16b aese v1.16b, v4.16b aese v1.16b, v4.16b subs w21, w21, #16 subs w2, w2, #16 bmi 7f /* partial block? */ bmi 6f /* partial block? */ ld1 {v2.16b}, [x20], #16 /* load next input block */ ld1 {v2.16b}, [x1], #16 /* load next input block */ .if \enc == 1 .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ Loading @@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ eor v1.16b, v2.16b, v5.16b /* final round enc */ eor v1.16b, v2.16b, v5.16b /* final round enc */ .endif .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v1.16b}, [x19], #16 /* write output block */ st1 {v1.16b}, [x0], #16 /* write output block */ beq 5f bne 0b CPU_LE( rev x8, x8 ) if_will_cond_yield_neon st1 {v0.16b}, [x5] /* store mac */ st1 {v0.16b}, [x24] /* store mac */ str x8, [x6, #8] /* store lsb end of ctr (BE) */ do_cond_yield_neon 5: ret ld1 {v0.16b}, [x24] /* reload mac */ endif_yield_neon 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ b 0b 5: CPU_LE( rev x26, x26 ) st1 {v0.16b}, [x24] /* store mac */ str x26, [x25, #8] /* store lsb end of ctr (BE) */ 6: frame_pop ret 7: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ eor v1.16b, v1.16b, v5.16b /* final round enc */ st1 {v0.16b}, [x24] /* store mac */ st1 {v0.16b}, [x5] /* store mac */ add w21, w21, #16 /* process partial tail block */ add w2, w2, #16 /* process partial tail block */ 8: ldrb w9, [x20], #1 /* get 1 byte of input */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w7, v0.b[0] /* get top mac byte */ umov w7, v0.b[0] /* get top mac byte */ .if \enc == 1 .if \enc == 1 Loading @@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 ) eor w9, w9, w6 eor w9, w9, w6 eor w7, w7, w9 eor w7, w7, w9 .endif .endif strb w9, [x19], #1 /* store out byte */ strb w9, [x0], #1 /* store out byte */ strb w7, [x24], #1 /* store mac byte */ strb w7, [x5], #1 /* store mac byte */ subs w21, w21, #1 subs w2, w2, #1 beq 6b beq 5b ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ b 8b b 7b .endm .endm /* /* Loading arch/arm64/crypto/ghash-ce-core.S +25 −51 Original line number Original line Diff line number Diff line Loading @@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8) .endm .endm .macro pmull_gcm_do_crypt, enc .macro pmull_gcm_do_crypt, enc frame_push 10 ld1 {SHASH.2d}, [x4] ld1 {XL.2d}, [x1] ldr x8, [x5, #8] // load lower counter mov x19, x0 load_round_keys w7, x6 mov x20, x1 mov x21, x2 mov x22, x3 mov x23, x4 mov x24, x5 mov x25, x6 mov x26, x7 .if \enc == 1 ldr x27, [sp, #96] // first stacked arg .endif ldr x28, [x24, #8] // load lower counter CPU_LE( rev x28, x28 ) 0: mov x0, x25 load_round_keys w26, x0 ld1 {SHASH.2d}, [x23] ld1 {XL.2d}, [x20] movi MASK.16b, #0xe1 movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 CPU_LE( rev x8, x8 ) shl MASK.2d, MASK.2d, #57 shl MASK.2d, MASK.2d, #57 eor SHASH2.16b, SHASH2.16b, SHASH.16b eor SHASH2.16b, SHASH2.16b, SHASH.16b .if \enc == 1 .if \enc == 1 ld1 {KS.16b}, [x27] ldr x10, [sp] ld1 {KS.16b}, [x10] .endif .endif 1: ld1 {CTR.8b}, [x24] // load upper counter 0: ld1 {CTR.8b}, [x5] // load upper counter ld1 {INP.16b}, [x22], #16 ld1 {INP.16b}, [x3], #16 rev x9, x28 rev x9, x8 add x28, x28, #1 add x8, x8, #1 sub w19, w19, #1 sub w0, w0, #1 ins CTR.d[1], x9 // set lower counter ins CTR.d[1], x9 // set lower counter .if \enc == 1 .if \enc == 1 eor INP.16b, INP.16b, KS.16b // encrypt input eor INP.16b, INP.16b, KS.16b // encrypt input st1 {INP.16b}, [x21], #16 st1 {INP.16b}, [x2], #16 .endif .endif rev64 T1.16b, INP.16b rev64 T1.16b, INP.16b cmp w26, #12 cmp w7, #12 b.ge 4f // AES-192/256? b.ge 2f // AES-192/256? 2: enc_round CTR, v21 1: enc_round CTR, v21 ext T2.16b, XL.16b, XL.16b, #8 ext T2.16b, XL.16b, XL.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 Loading Loading @@ -425,39 +411,27 @@ CPU_LE( rev x28, x28 ) .if \enc == 0 .if \enc == 0 eor INP.16b, INP.16b, KS.16b eor INP.16b, INP.16b, KS.16b st1 {INP.16b}, [x21], #16 st1 {INP.16b}, [x2], #16 .endif .endif cbz w19, 3f cbnz w0, 0b if_will_cond_yield_neon CPU_LE( rev x8, x8 ) st1 {XL.2d}, [x20] st1 {XL.2d}, [x1] .if \enc == 1 str x8, [x5, #8] // store lower counter st1 {KS.16b}, [x27] .endif do_cond_yield_neon b 0b endif_yield_neon b 1b 3: st1 {XL.2d}, [x20] .if \enc == 1 .if \enc == 1 st1 {KS.16b}, [x27] st1 {KS.16b}, [x10] .endif .endif CPU_LE( rev x28, x28 ) str x28, [x24, #8] // store lower counter frame_pop ret ret 4: b.eq 5f // AES-192? 2: b.eq 3f // AES-192? enc_round CTR, v17 enc_round CTR, v17 enc_round CTR, v18 enc_round CTR, v18 5: enc_round CTR, v19 3: enc_round CTR, v19 enc_round CTR, v20 enc_round CTR, v20 b 2b b 1b .endm .endm /* /* Loading drivers/crypto/padlock-aes.c +6 −2 Original line number Original line Diff line number Diff line Loading @@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, return; return; } } count -= initial; if (initial) if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) : "+S"(input), "+D"(output) Loading @@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) : "+S"(input), "+D"(output) : "d"(control_word), "b"(key), "c"(count - initial)); : "d"(control_word), "b"(key), "c"(count)); } } static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, Loading @@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, if (count < cbc_fetch_blocks) if (count < cbc_fetch_blocks) return cbc_crypt(input, output, key, iv, control_word, count); return cbc_crypt(input, output, key, iv, control_word, count); count -= initial; if (initial) if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) : "+S" (input), "+D" (output), "+a" (iv) Loading @@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) : "+S" (input), "+D" (output), "+a" (iv) : "d" (control_word), "b" (key), "c" (count-initial)); : "d" (control_word), "b" (key), "c" (count)); return iv; return iv; } } Loading Loading
arch/arm64/crypto/aes-ce-ccm-core.S +55 −95 Original line number Original line Diff line number Diff line Loading @@ -19,33 +19,24 @@ * u32 *macp, u8 const rk[], u32 rounds); * u32 *macp, u8 const rk[], u32 rounds); */ */ ENTRY(ce_aes_ccm_auth_data) ENTRY(ce_aes_ccm_auth_data) frame_push 7 ldr w8, [x3] /* leftover from prev round? */ mov x19, x0 mov x20, x1 mov x21, x2 mov x22, x3 mov x23, x4 mov x24, x5 ldr w25, [x22] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ ld1 {v0.16b}, [x0] /* load mac */ cbz w25, 1f cbz w8, 1f sub w25, w25, #16 sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b eor v1.16b, v1.16b, v1.16b 0: ldrb w7, [x20], #1 /* get 1 byte of input */ 0: ldrb w7, [x1], #1 /* get 1 byte of input */ subs w21, w21, #1 subs w2, w2, #1 add w25, w25, #1 add w8, w8, #1 ins v1.b[0], w7 ins v1.b[0], w7 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ beq 8f /* out of input? */ beq 8f /* out of input? */ cbnz w25, 0b cbnz w8, 0b eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b 1: ld1 {v3.4s}, [x23] /* load first round key */ 1: ld1 {v3.4s}, [x4] /* load first round key */ prfm pldl1strm, [x20] prfm pldl1strm, [x1] cmp w24, #12 /* which key size? */ cmp w5, #12 /* which key size? */ add x6, x23, #16 add x6, x4, #16 sub w7, w24, #2 /* modified # of rounds */ sub w7, w5, #2 /* modified # of rounds */ bmi 2f bmi 2f bne 5f bne 5f mov v5.16b, v3.16b mov v5.16b, v3.16b Loading @@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v5.4s}, [x6], #16 /* load next round key */ ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b bpl 3b aese v0.16b, v4.16b aese v0.16b, v4.16b subs w21, w21, #16 /* last data? */ subs w2, w2, #16 /* last data? */ eor v0.16b, v0.16b, v5.16b /* final round */ eor v0.16b, v0.16b, v5.16b /* final round */ bmi 6f bmi 6f ld1 {v1.16b}, [x20], #16 /* load next input block */ ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ beq 6f bne 1b 6: st1 {v0.16b}, [x0] /* store mac */ if_will_cond_yield_neon st1 {v0.16b}, [x19] /* store mac */ do_cond_yield_neon ld1 {v0.16b}, [x19] /* reload mac */ endif_yield_neon b 1b 6: st1 {v0.16b}, [x19] /* store mac */ beq 10f beq 10f adds w21, w21, #16 adds w2, w2, #16 beq 10f beq 10f mov w25, w21 mov w8, w2 7: ldrb w7, [x20], #1 7: ldrb w7, [x1], #1 umov w6, v0.b[0] umov w6, v0.b[0] eor w6, w6, w7 eor w6, w6, w7 strb w6, [x19], #1 strb w6, [x0], #1 subs w21, w21, #1 subs w2, w2, #1 beq 10f beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b b 7b 8: mov w7, w25 8: mov w7, w8 add w25, w25, #16 add w8, w8, #16 9: ext v1.16b, v1.16b, v1.16b, #1 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 adds w7, w7, #1 bne 9b bne 9b eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b st1 {v0.16b}, [x19] st1 {v0.16b}, [x0] 10: str w25, [x22] 10: str w8, [x3] frame_pop ret ret ENDPROC(ce_aes_ccm_auth_data) ENDPROC(ce_aes_ccm_auth_data) Loading Loading @@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc .macro aes_ccm_do_crypt,enc frame_push 8 ldr x8, [x6, #8] /* load lower ctr */ ld1 {v0.16b}, [x5] /* load mac */ mov x19, x0 CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ mov x20, x1 mov x21, x2 mov x22, x3 mov x23, x4 mov x24, x5 mov x25, x6 ldr x26, [x25, #8] /* load lower ctr */ ld1 {v0.16b}, [x24] /* load mac */ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ 0: /* outer loop */ ld1 {v1.8b}, [x25] /* load upper ctr */ ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x20] prfm pldl1strm, [x1] add x26, x26, #1 add x8, x8, #1 rev x9, x26 rev x9, x8 cmp w23, #12 /* which key size? */ cmp w4, #12 /* which key size? */ sub w7, w23, #2 /* get modified # of rounds */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ ins v1.d[1], x9 /* no carry in lower ctr */ ld1 {v3.4s}, [x22] /* load first round key */ ld1 {v3.4s}, [x3] /* load first round key */ add x10, x22, #16 add x10, x3, #16 bmi 1f bmi 1f bne 4f bne 4f mov v5.16b, v3.16b mov v5.16b, v3.16b Loading @@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ bpl 2b bpl 2b aese v0.16b, v4.16b aese v0.16b, v4.16b aese v1.16b, v4.16b aese v1.16b, v4.16b subs w21, w21, #16 subs w2, w2, #16 bmi 7f /* partial block? */ bmi 6f /* partial block? */ ld1 {v2.16b}, [x20], #16 /* load next input block */ ld1 {v2.16b}, [x1], #16 /* load next input block */ .if \enc == 1 .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ Loading @@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ eor v1.16b, v2.16b, v5.16b /* final round enc */ eor v1.16b, v2.16b, v5.16b /* final round enc */ .endif .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v1.16b}, [x19], #16 /* write output block */ st1 {v1.16b}, [x0], #16 /* write output block */ beq 5f bne 0b CPU_LE( rev x8, x8 ) if_will_cond_yield_neon st1 {v0.16b}, [x5] /* store mac */ st1 {v0.16b}, [x24] /* store mac */ str x8, [x6, #8] /* store lsb end of ctr (BE) */ do_cond_yield_neon 5: ret ld1 {v0.16b}, [x24] /* reload mac */ endif_yield_neon 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ b 0b 5: CPU_LE( rev x26, x26 ) st1 {v0.16b}, [x24] /* store mac */ str x26, [x25, #8] /* store lsb end of ctr (BE) */ 6: frame_pop ret 7: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ eor v1.16b, v1.16b, v5.16b /* final round enc */ st1 {v0.16b}, [x24] /* store mac */ st1 {v0.16b}, [x5] /* store mac */ add w21, w21, #16 /* process partial tail block */ add w2, w2, #16 /* process partial tail block */ 8: ldrb w9, [x20], #1 /* get 1 byte of input */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w7, v0.b[0] /* get top mac byte */ umov w7, v0.b[0] /* get top mac byte */ .if \enc == 1 .if \enc == 1 Loading @@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 ) eor w9, w9, w6 eor w9, w9, w6 eor w7, w7, w9 eor w7, w7, w9 .endif .endif strb w9, [x19], #1 /* store out byte */ strb w9, [x0], #1 /* store out byte */ strb w7, [x24], #1 /* store mac byte */ strb w7, [x5], #1 /* store mac byte */ subs w21, w21, #1 subs w2, w2, #1 beq 6b beq 5b ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ b 8b b 7b .endm .endm /* /* Loading
arch/arm64/crypto/ghash-ce-core.S +25 −51 Original line number Original line Diff line number Diff line Loading @@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8) .endm .endm .macro pmull_gcm_do_crypt, enc .macro pmull_gcm_do_crypt, enc frame_push 10 ld1 {SHASH.2d}, [x4] ld1 {XL.2d}, [x1] ldr x8, [x5, #8] // load lower counter mov x19, x0 load_round_keys w7, x6 mov x20, x1 mov x21, x2 mov x22, x3 mov x23, x4 mov x24, x5 mov x25, x6 mov x26, x7 .if \enc == 1 ldr x27, [sp, #96] // first stacked arg .endif ldr x28, [x24, #8] // load lower counter CPU_LE( rev x28, x28 ) 0: mov x0, x25 load_round_keys w26, x0 ld1 {SHASH.2d}, [x23] ld1 {XL.2d}, [x20] movi MASK.16b, #0xe1 movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 CPU_LE( rev x8, x8 ) shl MASK.2d, MASK.2d, #57 shl MASK.2d, MASK.2d, #57 eor SHASH2.16b, SHASH2.16b, SHASH.16b eor SHASH2.16b, SHASH2.16b, SHASH.16b .if \enc == 1 .if \enc == 1 ld1 {KS.16b}, [x27] ldr x10, [sp] ld1 {KS.16b}, [x10] .endif .endif 1: ld1 {CTR.8b}, [x24] // load upper counter 0: ld1 {CTR.8b}, [x5] // load upper counter ld1 {INP.16b}, [x22], #16 ld1 {INP.16b}, [x3], #16 rev x9, x28 rev x9, x8 add x28, x28, #1 add x8, x8, #1 sub w19, w19, #1 sub w0, w0, #1 ins CTR.d[1], x9 // set lower counter ins CTR.d[1], x9 // set lower counter .if \enc == 1 .if \enc == 1 eor INP.16b, INP.16b, KS.16b // encrypt input eor INP.16b, INP.16b, KS.16b // encrypt input st1 {INP.16b}, [x21], #16 st1 {INP.16b}, [x2], #16 .endif .endif rev64 T1.16b, INP.16b rev64 T1.16b, INP.16b cmp w26, #12 cmp w7, #12 b.ge 4f // AES-192/256? b.ge 2f // AES-192/256? 2: enc_round CTR, v21 1: enc_round CTR, v21 ext T2.16b, XL.16b, XL.16b, #8 ext T2.16b, XL.16b, XL.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 Loading Loading @@ -425,39 +411,27 @@ CPU_LE( rev x28, x28 ) .if \enc == 0 .if \enc == 0 eor INP.16b, INP.16b, KS.16b eor INP.16b, INP.16b, KS.16b st1 {INP.16b}, [x21], #16 st1 {INP.16b}, [x2], #16 .endif .endif cbz w19, 3f cbnz w0, 0b if_will_cond_yield_neon CPU_LE( rev x8, x8 ) st1 {XL.2d}, [x20] st1 {XL.2d}, [x1] .if \enc == 1 str x8, [x5, #8] // store lower counter st1 {KS.16b}, [x27] .endif do_cond_yield_neon b 0b endif_yield_neon b 1b 3: st1 {XL.2d}, [x20] .if \enc == 1 .if \enc == 1 st1 {KS.16b}, [x27] st1 {KS.16b}, [x10] .endif .endif CPU_LE( rev x28, x28 ) str x28, [x24, #8] // store lower counter frame_pop ret ret 4: b.eq 5f // AES-192? 2: b.eq 3f // AES-192? enc_round CTR, v17 enc_round CTR, v17 enc_round CTR, v18 enc_round CTR, v18 5: enc_round CTR, v19 3: enc_round CTR, v19 enc_round CTR, v20 enc_round CTR, v20 b 2b b 1b .endm .endm /* /* Loading
drivers/crypto/padlock-aes.c +6 −2 Original line number Original line Diff line number Diff line Loading @@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, return; return; } } count -= initial; if (initial) if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) : "+S"(input), "+D"(output) Loading @@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) : "+S"(input), "+D"(output) : "d"(control_word), "b"(key), "c"(count - initial)); : "d"(control_word), "b"(key), "c"(count)); } } static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, Loading @@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, if (count < cbc_fetch_blocks) if (count < cbc_fetch_blocks) return cbc_crypt(input, output, key, iv, control_word, count); return cbc_crypt(input, output, key, iv, control_word, count); count -= initial; if (initial) if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) : "+S" (input), "+D" (output), "+a" (iv) Loading @@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) : "+S" (input), "+D" (output), "+a" (iv) : "d" (control_word), "b" (key), "c" (count-initial)); : "d" (control_word), "b" (key), "c" (count)); return iv; return iv; } } Loading