From fdd2389457b209a9723c3be818fcf301f35db906 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 26 Mar 2014 20:53:05 +0100 Subject: arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call carry-less multiply). Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu --- arch/arm64/crypto/ghash-ce-core.S | 95 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 arch/arm64/crypto/ghash-ce-core.S (limited to 'arch/arm64/crypto/ghash-ce-core.S') diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S new file mode 100644 index 0000000..b9e6eaf41 --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -0,0 +1,95 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include + + DATA .req v0 + SHASH .req v1 + IN1 .req v2 + T1 .req v2 + T2 .req v3 + T3 .req v4 + VZR .req v5 + + .text + .arch armv8-a+crypto + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update) + ld1 {DATA.16b}, [x1] + ld1 {SHASH.16b}, [x3] + eor VZR.16b, VZR.16b, VZR.16b + + /* do the head block first, if supplied */ + cbz x4, 0f + ld1 {IN1.2d}, [x4] + b 1f + +0: ld1 {IN1.2d}, [x2], #16 + sub w0, w0, #1 +1: ext IN1.16b, IN1.16b, IN1.16b, #8 +CPU_LE( rev64 IN1.16b, IN1.16b ) + eor DATA.16b, DATA.16b, IN1.16b + + /* multiply DATA by SHASH in GF(2^128) */ + ext T2.16b, DATA.16b, DATA.16b, #8 + ext T3.16b, SHASH.16b, SHASH.16b, #8 + eor T2.16b, T2.16b, DATA.16b + eor T3.16b, T3.16b, SHASH.16b + + pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 + pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 + pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) + eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) + eor T2.16b, T2.16b, DATA.16b + + ext T3.16b, VZR.16b, T2.16b, #8 + ext T2.16b, T2.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T3.16b + eor T1.16b, T1.16b, T2.16b // is result of + // carry-less multiplication + + /* first phase of the reduction */ + shl T3.2d, DATA.2d, #1 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #5 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #57 + ext T2.16b, VZR.16b, T3.16b, #8 + ext T3.16b, T3.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T2.16b + eor T1.16b, T1.16b, T3.16b + + /* second phase of the reduction */ + ushr T2.2d, DATA.2d, #5 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T1.16b, T1.16b, T2.16b + eor DATA.16b, DATA.16b, T1.16b + + cbnz w0, 0b + + st1 {DATA.16b}, [x1] + ret +ENDPROC(pmull_ghash_update) -- cgit v1.1