diff options
Diffstat (limited to 'arch/arm64/crypto/ghash-ce-core.S')
-rw-r--r-- | arch/arm64/crypto/ghash-ce-core.S | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index f0bb9f0b..cb22459 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -77,3 +77,178 @@ CPU_LE( rev64 T1.16b, T1.16b ) st1 {XL.2d}, [x1] ret ENDPROC(pmull_ghash_update) + + KS .req v8 + CTR .req v9 + INP .req v10 + + .macro load_round_keys, rounds, rk + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + ld1 {v17.4s-v18.4s}, [\rk], #32 +1111: ld1 {v19.4s-v20.4s}, [\rk], #32 +2222: ld1 {v21.4s-v24.4s}, [\rk], #64 + ld1 {v25.4s-v28.4s}, [\rk], #64 + ld1 {v29.4s-v31.4s}, [\rk] + .endm + + .macro enc_round, state, key + aese \state\().16b, \key\().16b + aesmc \state\().16b, \state\().16b + .endm + + .macro enc_block, state, rounds + cmp \rounds, #12 + b.lo 2222f /* 128 bits */ + b.eq 1111f /* 192 bits */ + enc_round \state, v17 + enc_round \state, v18 +1111: enc_round \state, v19 + enc_round \state, v20 +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + enc_round \state, \key + .endr + aese \state\().16b, v30.16b + eor \state\().16b, \state\().16b, v31.16b + .endm + + .macro pmull_gcm_do_crypt, enc + ld1 {SHASH.2d}, [x4] + ld1 {XL.2d}, [x1] + ldr x8, [x5, #8] // load lower counter + + movi MASK.16b, #0xe1 + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 +CPU_LE( rev x8, x8 ) + shl MASK.2d, MASK.2d, #57 + eor SHASH2.16b, SHASH2.16b, SHASH.16b + + .if \enc == 1 + ld1 {KS.16b}, [x7] + .endif + +0: ld1 {CTR.8b}, [x5] // load upper counter + ld1 {INP.16b}, [x3], #16 + rev x9, x8 + add x8, x8, #1 + sub w0, w0, #1 + ins CTR.d[1], x9 // set lower counter + + .if \enc == 1 + eor INP.16b, INP.16b, KS.16b // encrypt input + st1 {INP.16b}, [x2], #16 + .endif + + rev64 T1.16b, INP.16b + + cmp w6, #12 + b.ge 2f // AES-192/256? + +1: enc_round CTR, v21 + + ext T2.16b, XL.16b, XL.16b, #8 + ext IN1.16b, T1.16b, T1.16b, #8 + + enc_round CTR, v22 + + eor T1.16b, T1.16b, T2.16b + eor XL.16b, XL.16b, IN1.16b + + enc_round CTR, v23 + + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 + eor T1.16b, T1.16b, XL.16b + + enc_round CTR, v24 + + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 + pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + + enc_round CTR, v25 + + ext T1.16b, XL.16b, XH.16b, #8 + eor T2.16b, XL.16b, XH.16b + eor XM.16b, XM.16b, T1.16b + + enc_round CTR, v26 + + eor XM.16b, XM.16b, T2.16b + pmull T2.1q, XL.1d, MASK.1d + + enc_round CTR, v27 + + mov XH.d[0], XM.d[1] + mov XM.d[1], XL.d[0] + + enc_round CTR, v28 + + eor XL.16b, XM.16b, T2.16b + + enc_round CTR, v29 + + ext T2.16b, XL.16b, XL.16b, #8 + + aese CTR.16b, v30.16b + + pmull XL.1q, XL.1d, MASK.1d + eor T2.16b, T2.16b, XH.16b + + eor KS.16b, CTR.16b, v31.16b + + eor XL.16b, XL.16b, T2.16b + + .if \enc == 0 + eor INP.16b, INP.16b, KS.16b + st1 {INP.16b}, [x2], #16 + .endif + + cbnz w0, 0b + +CPU_LE( rev x8, x8 ) + st1 {XL.2d}, [x1] + str x8, [x5, #8] // store lower counter + + .if \enc == 1 + st1 {KS.16b}, [x7] + .endif + + ret + +2: b.eq 3f // AES-192? + enc_round CTR, v17 + enc_round CTR, v18 +3: enc_round CTR, v19 + enc_round CTR, v20 + b 1b + .endm + + /* + * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], + * struct ghash_key const *k, u8 ctr[], + * int rounds, u8 ks[]) + */ +ENTRY(pmull_gcm_encrypt) + pmull_gcm_do_crypt 1 +ENDPROC(pmull_gcm_encrypt) + + /* + * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], + * struct ghash_key const *k, u8 ctr[], + * int rounds) + */ +ENTRY(pmull_gcm_decrypt) + pmull_gcm_do_crypt 0 +ENDPROC(pmull_gcm_decrypt) + + /* + * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds) + */ +ENTRY(pmull_gcm_encrypt_block) + cbz x2, 0f + load_round_keys w3, x2 +0: ld1 {v0.16b}, [x1] + enc_block v0, w3 + st1 {v0.16b}, [x0] + ret +ENDPROC(pmull_gcm_encrypt_block) |